summaryrefslogtreecommitdiff
path: root/storage/innobase
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase')
-rwxr-xr-xstorage/innobase/CMakeLists.txt92
-rw-r--r--storage/innobase/Makefile.am175
-rw-r--r--storage/innobase/btr/btr0btr.c3077
-rw-r--r--storage/innobase/btr/btr0cur.c3848
-rw-r--r--storage/innobase/btr/btr0pcur.c565
-rw-r--r--storage/innobase/btr/btr0sea.c1762
-rw-r--r--storage/innobase/buf/buf0buf.c2590
-rw-r--r--storage/innobase/buf/buf0flu.c1115
-rw-r--r--storage/innobase/buf/buf0lru.c1237
-rw-r--r--storage/innobase/buf/buf0rea.c728
-rw-r--r--storage/innobase/data/data0data.c681
-rw-r--r--storage/innobase/data/data0type.c295
-rw-r--r--storage/innobase/dict/dict0boot.c425
-rw-r--r--storage/innobase/dict/dict0crea.c1450
-rw-r--r--storage/innobase/dict/dict0dict.c4253
-rw-r--r--storage/innobase/dict/dict0load.c1360
-rw-r--r--storage/innobase/dict/dict0mem.c344
-rw-r--r--storage/innobase/dyn/dyn0dyn.c48
-rw-r--r--storage/innobase/eval/eval0eval.c836
-rw-r--r--storage/innobase/eval/eval0proc.c278
-rw-r--r--storage/innobase/fil/fil0fil.c4566
-rw-r--r--storage/innobase/fsp/fsp0fsp.c3990
-rw-r--r--storage/innobase/fut/fut0fut.c14
-rw-r--r--storage/innobase/fut/fut0lst.c518
-rw-r--r--storage/innobase/ha/ha0ha.c380
-rw-r--r--storage/innobase/ha/hash0hash.c153
-rw-r--r--storage/innobase/handler/ha_innodb.cc8534
-rw-r--r--storage/innobase/handler/ha_innodb.h255
-rw-r--r--storage/innobase/ibuf/ibuf0ibuf.c3580
-rw-r--r--storage/innobase/include/btr0btr.h451
-rw-r--r--storage/innobase/include/btr0btr.ic234
-rw-r--r--storage/innobase/include/btr0cur.h706
-rw-r--r--storage/innobase/include/btr0cur.ic154
-rw-r--r--storage/innobase/include/btr0pcur.h520
-rw-r--r--storage/innobase/include/btr0pcur.ic630
-rw-r--r--storage/innobase/include/btr0sea.h255
-rw-r--r--storage/innobase/include/btr0sea.ic67
-rw-r--r--storage/innobase/include/btr0types.h21
-rw-r--r--storage/innobase/include/buf0buf.h1074
-rw-r--r--storage/innobase/include/buf0buf.ic665
-rw-r--r--storage/innobase/include/buf0flu.h120
-rw-r--r--storage/innobase/include/buf0flu.ic106
-rw-r--r--storage/innobase/include/buf0lru.h144
-rw-r--r--storage/innobase/include/buf0lru.ic8
-rw-r--r--storage/innobase/include/buf0rea.h104
-rw-r--r--storage/innobase/include/buf0types.h20
-rw-r--r--storage/innobase/include/data0data.h424
-rw-r--r--storage/innobase/include/data0data.ic436
-rw-r--r--storage/innobase/include/data0type.h450
-rw-r--r--storage/innobase/include/data0type.ic562
-rw-r--r--storage/innobase/include/data0types.h19
-rw-r--r--storage/innobase/include/db0err.h80
-rw-r--r--storage/innobase/include/dict0boot.h134
-rw-r--r--storage/innobase/include/dict0boot.ic76
-rw-r--r--storage/innobase/include/dict0crea.h179
-rw-r--r--storage/innobase/include/dict0crea.ic8
-rw-r--r--storage/innobase/include/dict0dict.h1002
-rw-r--r--storage/innobase/include/dict0dict.ic664
-rw-r--r--storage/innobase/include/dict0load.h100
-rw-r--r--storage/innobase/include/dict0load.ic9
-rw-r--r--storage/innobase/include/dict0mem.h431
-rw-r--r--storage/innobase/include/dict0mem.ic9
-rw-r--r--storage/innobase/include/dict0types.h27
-rw-r--r--storage/innobase/include/dyn0dyn.h166
-rw-r--r--storage/innobase/include/dyn0dyn.ic346
-rw-r--r--storage/innobase/include/eval0eval.h97
-rw-r--r--storage/innobase/include/eval0eval.ic234
-rw-r--r--storage/innobase/include/eval0proc.h87
-rw-r--r--storage/innobase/include/eval0proc.ic71
-rw-r--r--storage/innobase/include/fil0fil.h716
-rw-r--r--storage/innobase/include/fsp0fsp.h391
-rw-r--r--storage/innobase/include/fsp0fsp.ic24
-rw-r--r--storage/innobase/include/fut0fut.h36
-rw-r--r--storage/innobase/include/fut0fut.ic38
-rw-r--r--storage/innobase/include/fut0lst.h198
-rw-r--r--storage/innobase/include/fut0lst.ic147
-rw-r--r--storage/innobase/include/ha0ha.h140
-rw-r--r--storage/innobase/include/ha0ha.ic185
-rw-r--r--storage/innobase/include/ha_prototypes.h76
-rw-r--r--storage/innobase/include/hash0hash.h367
-rw-r--r--storage/innobase/include/hash0hash.ic131
-rw-r--r--storage/innobase/include/ibuf0ibuf.h309
-rw-r--r--storage/innobase/include/ibuf0ibuf.ic224
-rw-r--r--storage/innobase/include/ibuf0types.h15
-rw-r--r--storage/innobase/include/lock0iter.h52
-rw-r--r--storage/innobase/include/lock0lock.h709
-rw-r--r--storage/innobase/include/lock0lock.ic81
-rw-r--r--storage/innobase/include/lock0priv.h101
-rw-r--r--storage/innobase/include/lock0priv.ic32
-rw-r--r--storage/innobase/include/lock0types.h16
-rw-r--r--storage/innobase/include/log0log.h872
-rw-r--r--storage/innobase/include/log0log.ic398
-rw-r--r--storage/innobase/include/log0recv.h349
-rw-r--r--storage/innobase/include/log0recv.ic35
-rw-r--r--storage/innobase/include/mach0data.h345
-rw-r--r--storage/innobase/include/mach0data.ic734
-rw-r--r--storage/innobase/include/mem0dbg.h126
-rw-r--r--storage/innobase/include/mem0dbg.ic93
-rw-r--r--storage/innobase/include/mem0mem.h412
-rw-r--r--storage/innobase/include/mem0mem.ic619
-rw-r--r--storage/innobase/include/mem0pool.h108
-rw-r--r--storage/innobase/include/mem0pool.ic7
-rw-r--r--storage/innobase/include/mtr0log.h217
-rw-r--r--storage/innobase/include/mtr0log.ic227
-rw-r--r--storage/innobase/include/mtr0mtr.h347
-rw-r--r--storage/innobase/include/mtr0mtr.ic251
-rw-r--r--storage/innobase/include/mtr0types.h14
-rw-r--r--storage/innobase/include/os0file.h731
-rw-r--r--storage/innobase/include/os0proc.h148
-rw-r--r--storage/innobase/include/os0proc.ic10
-rw-r--r--storage/innobase/include/os0sync.h311
-rw-r--r--storage/innobase/include/os0sync.ic152
-rw-r--r--storage/innobase/include/os0thread.h145
-rw-r--r--storage/innobase/include/os0thread.ic8
-rw-r--r--storage/innobase/include/page0cur.h286
-rw-r--r--storage/innobase/include/page0cur.ic210
-rw-r--r--storage/innobase/include/page0page.h829
-rw-r--r--storage/innobase/include/page0page.ic851
-rw-r--r--storage/innobase/include/page0types.h22
-rw-r--r--storage/innobase/include/pars0grm.h234
-rw-r--r--storage/innobase/include/pars0opt.h58
-rw-r--r--storage/innobase/include/pars0opt.ic7
-rw-r--r--storage/innobase/include/pars0pars.h731
-rw-r--r--storage/innobase/include/pars0pars.ic7
-rw-r--r--storage/innobase/include/pars0sym.h223
-rw-r--r--storage/innobase/include/pars0sym.ic7
-rw-r--r--storage/innobase/include/pars0types.h33
-rw-r--r--storage/innobase/include/que0que.h510
-rw-r--r--storage/innobase/include/que0que.ic259
-rw-r--r--storage/innobase/include/que0types.h43
-rw-r--r--storage/innobase/include/read0read.h165
-rw-r--r--storage/innobase/include/read0read.ic81
-rw-r--r--storage/innobase/include/read0types.h15
-rw-r--r--storage/innobase/include/rem0cmp.h173
-rw-r--r--storage/innobase/include/rem0cmp.ic76
-rw-r--r--storage/innobase/include/rem0rec.h582
-rw-r--r--storage/innobase/include/rem0rec.ic1531
-rw-r--r--storage/innobase/include/rem0types.h20
-rw-r--r--storage/innobase/include/row0ins.h169
-rw-r--r--storage/innobase/include/row0ins.ic9
-rw-r--r--storage/innobase/include/row0mysql.h743
-rw-r--r--storage/innobase/include/row0mysql.ic7
-rw-r--r--storage/innobase/include/row0purge.h79
-rw-r--r--storage/innobase/include/row0purge.ic8
-rw-r--r--storage/innobase/include/row0row.h250
-rw-r--r--storage/innobase/include/row0row.ic182
-rw-r--r--storage/innobase/include/row0sel.h392
-rw-r--r--storage/innobase/include/row0sel.ic88
-rw-r--r--storage/innobase/include/row0types.h37
-rw-r--r--storage/innobase/include/row0uins.h36
-rw-r--r--storage/innobase/include/row0uins.ic8
-rw-r--r--storage/innobase/include/row0umod.h35
-rw-r--r--storage/innobase/include/row0umod.ic7
-rw-r--r--storage/innobase/include/row0undo.h115
-rw-r--r--storage/innobase/include/row0undo.ic7
-rw-r--r--storage/innobase/include/row0upd.h432
-rw-r--r--storage/innobase/include/row0upd.ic122
-rw-r--r--storage/innobase/include/row0vers.h126
-rw-r--r--storage/innobase/include/row0vers.ic13
-rw-r--r--storage/innobase/include/srv0que.h53
-rw-r--r--storage/innobase/include/srv0srv.h572
-rw-r--r--storage/innobase/include/srv0srv.ic7
-rw-r--r--storage/innobase/include/srv0start.h112
-rw-r--r--storage/innobase/include/sync0arr.h122
-rw-r--r--storage/innobase/include/sync0arr.ic10
-rw-r--r--storage/innobase/include/sync0rw.h517
-rw-r--r--storage/innobase/include/sync0rw.ic559
-rw-r--r--storage/innobase/include/sync0sync.h561
-rw-r--r--storage/innobase/include/sync0sync.ic248
-rw-r--r--storage/innobase/include/sync0types.h16
-rw-r--r--storage/innobase/include/thr0loc.h67
-rw-r--r--storage/innobase/include/thr0loc.ic7
-rw-r--r--storage/innobase/include/trx0purge.h169
-rw-r--r--storage/innobase/include/trx0purge.ic26
-rw-r--r--storage/innobase/include/trx0rec.h303
-rw-r--r--storage/innobase/include/trx0rec.ic86
-rw-r--r--storage/innobase/include/trx0roll.h314
-rw-r--r--storage/innobase/include/trx0roll.ic23
-rw-r--r--storage/innobase/include/trx0rseg.h193
-rw-r--r--storage/innobase/include/trx0rseg.ic126
-rw-r--r--storage/innobase/include/trx0sys.h453
-rw-r--r--storage/innobase/include/trx0sys.ic366
-rw-r--r--storage/innobase/include/trx0trx.h713
-rw-r--r--storage/innobase/include/trx0trx.ic40
-rw-r--r--storage/innobase/include/trx0types.h45
-rw-r--r--storage/innobase/include/trx0undo.h503
-rw-r--r--storage/innobase/include/trx0undo.ic330
-rw-r--r--storage/innobase/include/trx0xa.h183
-rw-r--r--storage/innobase/include/univ.i376
-rw-r--r--storage/innobase/include/usr0sess.h61
-rw-r--r--storage/innobase/include/usr0sess.ic7
-rw-r--r--storage/innobase/include/usr0types.h14
-rw-r--r--storage/innobase/include/ut0byte.h250
-rw-r--r--storage/innobase/include/ut0byte.ic397
-rw-r--r--storage/innobase/include/ut0dbg.h113
-rw-r--r--storage/innobase/include/ut0list.h148
-rw-r--r--storage/innobase/include/ut0list.ic23
-rw-r--r--storage/innobase/include/ut0lst.h227
-rw-r--r--storage/innobase/include/ut0mem.h212
-rw-r--r--storage/innobase/include/ut0mem.ic70
-rw-r--r--storage/innobase/include/ut0rnd.h121
-rw-r--r--storage/innobase/include/ut0rnd.ic221
-rw-r--r--storage/innobase/include/ut0sort.h91
-rw-r--r--storage/innobase/include/ut0ut.h323
-rw-r--r--storage/innobase/include/ut0ut.ic174
-rw-r--r--storage/innobase/include/ut0vec.h73
-rw-r--r--storage/innobase/include/ut0vec.ic26
-rw-r--r--storage/innobase/include/ut0wqueue.h60
-rw-r--r--storage/innobase/lock/lock0iter.c90
-rw-r--r--storage/innobase/lock/lock0lock.c5189
-rw-r--r--storage/innobase/log/log0log.c3354
-rw-r--r--storage/innobase/log/log0recv.c3398
-rw-r--r--storage/innobase/mach/mach0data.c119
-rw-r--r--storage/innobase/mem/mem0dbg.c984
-rw-r--r--storage/innobase/mem/mem0mem.c577
-rw-r--r--storage/innobase/mem/mem0pool.c682
-rw-r--r--storage/innobase/mtr/mtr0log.c575
-rw-r--r--storage/innobase/mtr/mtr0mtr.c336
-rw-r--r--storage/innobase/os/os0file.c4550
-rw-r--r--storage/innobase/os/os0proc.c674
-rw-r--r--storage/innobase/os/os0sync.c753
-rw-r--r--storage/innobase/os/os0thread.c358
-rw-r--r--storage/innobase/page/page0cur.c1510
-rw-r--r--storage/innobase/page/page0page.c2038
-rw-r--r--storage/innobase/pars/lexyy.c2762
-rwxr-xr-xstorage/innobase/pars/make_bison.sh10
-rwxr-xr-xstorage/innobase/pars/make_flex.sh20
-rw-r--r--storage/innobase/pars/pars0grm.c2571
-rw-r--r--storage/innobase/pars/pars0grm.h234
-rw-r--r--storage/innobase/pars/pars0grm.y620
-rw-r--r--storage/innobase/pars/pars0lex.l648
-rw-r--r--storage/innobase/pars/pars0opt.c1208
-rw-r--r--storage/innobase/pars/pars0pars.c2200
-rw-r--r--storage/innobase/pars/pars0sym.c352
-rw-r--r--storage/innobase/plug.in44
-rw-r--r--storage/innobase/que/que0que.c1443
-rw-r--r--storage/innobase/read/read0read.c527
-rw-r--r--storage/innobase/rem/rem0cmp.c1064
-rw-r--r--storage/innobase/rem/rem0rec.c1515
-rw-r--r--storage/innobase/row/row0ins.c2522
-rw-r--r--storage/innobase/row/row0mysql.c4199
-rw-r--r--storage/innobase/row/row0purge.c673
-rw-r--r--storage/innobase/row/row0row.c726
-rw-r--r--storage/innobase/row/row0sel.c4640
-rw-r--r--storage/innobase/row/row0uins.c308
-rw-r--r--storage/innobase/row/row0umod.c762
-rw-r--r--storage/innobase/row/row0undo.c352
-rw-r--r--storage/innobase/row/row0upd.c2081
-rw-r--r--storage/innobase/row/row0vers.c665
-rw-r--r--storage/innobase/srv/srv0que.c110
-rw-r--r--storage/innobase/srv/srv0srv.c2885
-rw-r--r--storage/innobase/srv/srv0start.c2027
-rw-r--r--storage/innobase/sync/sync0arr.c1021
-rw-r--r--storage/innobase/sync/sync0rw.c997
-rw-r--r--storage/innobase/sync/sync0sync.c1425
-rw-r--r--storage/innobase/thr/thr0loc.c228
-rw-r--r--storage/innobase/trx/trx0purge.c1148
-rw-r--r--storage/innobase/trx/trx0rec.c1434
-rw-r--r--storage/innobase/trx/trx0roll.c1341
-rw-r--r--storage/innobase/trx/trx0rseg.c254
-rw-r--r--storage/innobase/trx/trx0sys.c997
-rw-r--r--storage/innobase/trx/trx0trx.c2086
-rw-r--r--storage/innobase/trx/trx0undo.c1920
-rw-r--r--storage/innobase/usr/usr0sess.c81
-rw-r--r--storage/innobase/ut/ut0byte.c31
-rw-r--r--storage/innobase/ut/ut0dbg.c98
-rw-r--r--storage/innobase/ut/ut0list.c169
-rw-r--r--storage/innobase/ut/ut0mem.c548
-rw-r--r--storage/innobase/ut/ut0rnd.c78
-rw-r--r--storage/innobase/ut/ut0ut.c592
-rw-r--r--storage/innobase/ut/ut0vec.c54
-rw-r--r--storage/innobase/ut/ut0wqueue.c92
-rw-r--r--storage/innobase/win_atomics32_test.c30
-rw-r--r--storage/innobase/win_atomics64_test.c30
274 files changed, 0 insertions, 165224 deletions
diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt
deleted file mode 100755
index 249a600834d..00000000000
--- a/storage/innobase/CMakeLists.txt
+++ /dev/null
@@ -1,92 +0,0 @@
-# Copyright (C) 2006 MySQL AB
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; version 2 of the License.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-
-SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
-SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
-ADD_DEFINITIONS(-DMYSQL_SERVER -D_WIN32 -D_LIB)
-
-# Bug 19424 - InnoDB: Possibly a memory overrun of the buffer being freed (64-bit Visual C)
-# Removing Win64 compiler optimizations for all innodb/mem/* files.
-IF(CMAKE_GENERATOR MATCHES "Visual Studio" AND CMAKE_SIZEOF_VOID_P MATCHES 8)
- SET_SOURCE_FILES_PROPERTIES(${CMAKE_SOURCE_DIR}/storage/innobase/mem/mem0mem.c
- ${CMAKE_SOURCE_DIR}/storage/innobase/mem/mem0pool.c
- PROPERTIES COMPILE_FLAGS -Od)
-ENDIF(CMAKE_GENERATOR MATCHES "Visual Studio" AND CMAKE_SIZEOF_VOID_P MATCHES 8)
-
-IF (WIN32)
- IF (NOT WITHOUT_ATOMICS)
-# Check if this Windows version supports atomic instructions
- IF (CMAKE_SIZEOF_VOID_P MATCHES 8)
-# Check for 64 bit atomics
- TRY_RUN(RUN_RES COMPILE_RES ${CMAKE_BINARY_DIR}
- ${CMAKE_SOURCE_DIR}/storage/innobase/win_atomics64_test.c)
- IF (COMPILE_RES AND NOT RUN_RES)
- MESSAGE("Adding support for Win64 atomics")
- ADD_DEFINITIONS(-DWIN_ATOMICS64)
- ENDIF (COMPILE_RES AND NOT RUN_RES)
- ELSE (CMAKE_SIZEOF_VOID_P MATCHES 8)
-# Check for 32 bit atomics
- TRY_RUN(RUN_RES COMPILE_RES ${CMAKE_BINARY_DIR}
- ${CMAKE_SOURCE_DIR}/storage/innobase/win_atomics32_test.c)
- IF (COMPILE_RES AND NOT RUN_RES)
- MESSAGE("Adding support for Win32 atomics")
- ADD_DEFINITIONS(-DWIN_ATOMICS32)
- ENDIF (COMPILE_RES AND NOT RUN_RES)
- ENDIF (CMAKE_SIZEOF_VOID_P MATCHES 8)
- ENDIF (NOT WITHOUT_ATOMICS)
-ENDIF (WIN32)
-INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/zlib
- ${CMAKE_SOURCE_DIR}/storage/innobase/include
- ${CMAKE_SOURCE_DIR}/storage/innobase/handler
- ${CMAKE_SOURCE_DIR}/sql
- ${CMAKE_SOURCE_DIR}/regex
- ${CMAKE_SOURCE_DIR}/extra/yassl/include)
-
-SET(INNOBASE_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c
- buf/buf0buf.c buf/buf0flu.c buf/buf0lru.c buf/buf0rea.c
- data/data0data.c data/data0type.c
- dict/dict0boot.c dict/dict0crea.c dict/dict0dict.c dict/dict0load.c dict/dict0mem.c
- dyn/dyn0dyn.c
- eval/eval0eval.c eval/eval0proc.c
- fil/fil0fil.c
- fsp/fsp0fsp.c
- fut/fut0fut.c fut/fut0lst.c
- ha/ha0ha.c ha/hash0hash.c
- ibuf/ibuf0ibuf.c
- pars/lexyy.c pars/pars0grm.c pars/pars0opt.c pars/pars0pars.c pars/pars0sym.c
- lock/lock0lock.c
- log/log0log.c log/log0recv.c
- mach/mach0data.c
- mem/mem0mem.c mem/mem0pool.c
- mtr/mtr0log.c mtr/mtr0mtr.c
- os/os0file.c os/os0proc.c os/os0sync.c os/os0thread.c
- page/page0cur.c page/page0page.c
- que/que0que.c
- handler/ha_innodb.cc
- read/read0read.c
- rem/rem0cmp.c rem/rem0rec.c
- row/row0ins.c row/row0mysql.c row/row0purge.c row/row0row.c row/row0sel.c row/row0uins.c
- row/row0umod.c row/row0undo.c row/row0upd.c row/row0vers.c
- srv/srv0que.c srv/srv0srv.c srv/srv0start.c
- sync/sync0arr.c sync/sync0rw.c sync/sync0sync.c
- thr/thr0loc.c
- trx/trx0purge.c trx/trx0rec.c trx/trx0roll.c trx/trx0rseg.c trx/trx0sys.c trx/trx0trx.c trx/trx0undo.c
- usr/usr0sess.c
- ut/ut0byte.c ut/ut0dbg.c ut/ut0mem.c ut/ut0rnd.c ut/ut0ut.c ut/ut0vec.c ut/ut0list.c ut/ut0wqueue.c)
-
-IF(NOT SOURCE_SUBLIBS)
- ADD_LIBRARY(innobase ${INNOBASE_SOURCES})
- ADD_DEPENDENCIES(innobase GenError)
-ENDIF(NOT SOURCE_SUBLIBS)
diff --git a/storage/innobase/Makefile.am b/storage/innobase/Makefile.am
deleted file mode 100644
index 180d2ca0b87..00000000000
--- a/storage/innobase/Makefile.am
+++ /dev/null
@@ -1,175 +0,0 @@
-# Copyright (C) 2001, 2004, 2006 MySQL AB & Innobase Oy
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; version 2 of the License.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-# Process this file with automake to create Makefile.in
-
-MYSQLDATAdir= $(localstatedir)
-MYSQLSHAREdir= $(pkgdatadir)
-MYSQLBASEdir= $(prefix)
-MYSQLLIBdir= $(pkglibdir)
-pkgplugindir= $(pkglibdir)/plugin
-INCLUDES= -I$(top_srcdir)/include -I$(top_builddir)/include \
- -I$(top_srcdir)/regex \
- -I$(top_srcdir)/storage/innobase/include \
- -I$(top_srcdir)/sql \
- -I$(srcdir)
-
-DEFS= @DEFS@
-
-
-noinst_HEADERS= include/btr0btr.h include/btr0btr.ic \
- include/btr0cur.h include/btr0cur.ic \
- include/btr0pcur.h include/btr0pcur.ic \
- include/btr0sea.h include/btr0sea.ic \
- include/btr0types.h include/buf0buf.h \
- include/buf0buf.ic include/buf0flu.h \
- include/buf0flu.ic include/buf0lru.h \
- include/buf0lru.ic include/buf0rea.h \
- include/buf0types.h include/data0data.h \
- include/data0data.ic include/data0type.h \
- include/data0type.ic include/data0types.h \
- include/db0err.h include/dict0boot.h \
- include/dict0boot.ic include/dict0crea.h \
- include/dict0crea.ic include/dict0dict.h \
- include/dict0dict.ic include/dict0load.h \
- include/dict0load.ic include/dict0mem.h \
- include/dict0mem.ic include/dict0types.h \
- include/dyn0dyn.h include/dyn0dyn.ic \
- include/eval0eval.h include/eval0eval.ic \
- include/eval0proc.h include/eval0proc.ic \
- include/fil0fil.h include/fsp0fsp.h \
- include/fsp0fsp.ic include/fut0fut.h \
- include/fut0fut.ic include/fut0lst.h \
- include/fut0lst.ic include/ha0ha.h \
- include/ha0ha.ic include/hash0hash.h \
- include/hash0hash.ic include/ibuf0ibuf.h \
- include/ibuf0ibuf.ic include/ibuf0types.h \
- include/lock0iter.h \
- include/lock0lock.h include/lock0lock.ic \
- include/lock0priv.h include/lock0priv.ic \
- include/lock0types.h include/log0log.h \
- include/log0log.ic include/log0recv.h \
- include/log0recv.ic include/mach0data.h \
- include/mach0data.ic include/mem0dbg.h \
- include/mem0dbg.ic mem/mem0dbg.c \
- include/mem0mem.h include/mem0mem.ic \
- include/mem0pool.h include/mem0pool.ic \
- include/mtr0log.h include/mtr0log.ic \
- include/mtr0mtr.h include/mtr0mtr.ic \
- include/mtr0types.h include/os0file.h \
- include/os0proc.h include/os0proc.ic \
- include/os0sync.h include/os0sync.ic \
- include/os0thread.h include/os0thread.ic \
- include/page0cur.h include/page0cur.ic \
- include/page0page.h include/page0page.ic \
- include/page0types.h include/pars0grm.h \
- include/pars0opt.h include/pars0opt.ic \
- include/pars0pars.h include/pars0pars.ic \
- include/pars0sym.h include/pars0sym.ic \
- include/pars0types.h include/que0que.h \
- include/que0que.ic include/que0types.h \
- include/read0read.h include/read0read.ic \
- include/read0types.h include/rem0cmp.h \
- include/rem0cmp.ic include/rem0rec.h \
- include/rem0rec.ic include/rem0types.h \
- include/row0ins.h include/row0ins.ic \
- include/row0mysql.h include/row0mysql.ic \
- include/row0purge.h include/row0purge.ic \
- include/row0row.h include/row0row.ic \
- include/row0sel.h include/row0sel.ic \
- include/row0types.h include/row0uins.h \
- include/row0uins.ic include/row0umod.h \
- include/row0umod.ic include/row0undo.h \
- include/row0undo.ic include/row0upd.h \
- include/row0upd.ic include/row0vers.h \
- include/row0vers.ic include/srv0que.h \
- include/srv0srv.h include/srv0srv.ic \
- include/srv0start.h include/sync0arr.h \
- include/sync0arr.ic include/sync0rw.h \
- include/sync0rw.ic include/sync0sync.h \
- include/sync0sync.ic include/sync0types.h \
- include/thr0loc.h include/thr0loc.ic \
- include/trx0purge.h include/trx0purge.ic \
- include/trx0rec.h include/trx0rec.ic \
- include/trx0roll.h include/trx0roll.ic \
- include/trx0rseg.h include/trx0rseg.ic \
- include/trx0sys.h include/trx0sys.ic \
- include/trx0trx.h include/trx0trx.ic \
- include/trx0types.h include/trx0undo.h \
- include/trx0undo.ic include/trx0xa.h \
- include/univ.i include/usr0sess.h \
- include/usr0sess.ic include/usr0types.h \
- include/ut0byte.h include/ut0byte.ic \
- include/ut0dbg.h include/ut0lst.h \
- include/ut0mem.h include/ut0mem.ic \
- include/ut0rnd.h include/ut0rnd.ic \
- include/ut0sort.h include/ut0ut.h \
- include/ut0ut.ic include/ut0vec.h \
- include/ut0vec.ic include/ut0list.h \
- include/ut0list.ic include/ut0wqueue.h \
- include/ha_prototypes.h handler/ha_innodb.h
-
-EXTRA_LIBRARIES= libinnobase.a
-noinst_LIBRARIES= @plugin_innobase_static_target@
-libinnobase_a_SOURCES= btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c \
- btr/btr0sea.c buf/buf0buf.c buf/buf0flu.c \
- buf/buf0lru.c buf/buf0rea.c data/data0data.c \
- data/data0type.c dict/dict0boot.c \
- dict/dict0crea.c dict/dict0dict.c \
- dict/dict0load.c dict/dict0mem.c dyn/dyn0dyn.c \
- eval/eval0eval.c eval/eval0proc.c \
- fil/fil0fil.c fsp/fsp0fsp.c fut/fut0fut.c \
- fut/fut0lst.c ha/ha0ha.c ha/hash0hash.c \
- ibuf/ibuf0ibuf.c lock/lock0iter.c \
- lock/lock0lock.c \
- log/log0log.c log/log0recv.c mach/mach0data.c \
- mem/mem0mem.c mem/mem0pool.c mtr/mtr0log.c \
- mtr/mtr0mtr.c os/os0file.c os/os0proc.c \
- os/os0sync.c os/os0thread.c page/page0cur.c \
- page/page0page.c pars/lexyy.c pars/pars0grm.c \
- pars/pars0opt.c pars/pars0pars.c \
- pars/pars0sym.c que/que0que.c read/read0read.c \
- rem/rem0cmp.c rem/rem0rec.c row/row0ins.c \
- row/row0mysql.c row/row0purge.c row/row0row.c \
- row/row0sel.c row/row0uins.c row/row0umod.c \
- row/row0undo.c row/row0upd.c row/row0vers.c \
- srv/srv0que.c srv/srv0srv.c srv/srv0start.c \
- sync/sync0arr.c sync/sync0rw.c \
- sync/sync0sync.c thr/thr0loc.c trx/trx0purge.c \
- trx/trx0rec.c trx/trx0roll.c trx/trx0rseg.c \
- trx/trx0sys.c trx/trx0trx.c trx/trx0undo.c \
- usr/usr0sess.c ut/ut0byte.c ut/ut0dbg.c \
- ut/ut0list.c ut/ut0mem.c ut/ut0rnd.c \
- ut/ut0ut.c ut/ut0vec.c ut/ut0wqueue.c \
- handler/ha_innodb.cc
-
-libinnobase_a_CXXFLAGS= $(AM_CFLAGS)
-libinnobase_a_CFLAGS= $(AM_CFLAGS)
-
-EXTRA_LTLIBRARIES= ha_innodb.la
-pkgplugin_LTLIBRARIES= @plugin_innobase_shared_target@
-
-ha_innodb_la_LDFLAGS= -module -rpath $(pkgplugindir)
-ha_innodb_la_CXXFLAGS= $(AM_CFLAGS) $(INNODB_DYNAMIC_CFLAGS)
-ha_innodb_la_CFLAGS= $(AM_CFLAGS) $(INNODB_DYNAMIC_CFLAGS)
-ha_innodb_la_SOURCES= $(libinnobase_a_SOURCES)
-
-EXTRA_DIST= CMakeLists.txt plug.in \
- pars/make_bison.sh pars/make_flex.sh \
- pars/pars0grm.y pars/pars0lex.l \
- win_atomics32_test.c win_atomics64_test.c
-
-# Don't update the files from bitkeeper
-%::SCCS/s.%
diff --git a/storage/innobase/btr/btr0btr.c b/storage/innobase/btr/btr0btr.c
deleted file mode 100644
index 6e8b43aeb8d..00000000000
--- a/storage/innobase/btr/btr0btr.c
+++ /dev/null
@@ -1,3077 +0,0 @@
-/******************************************************
-The B-tree
-
-(c) 1994-1996 Innobase Oy
-
-Created 6/2/1994 Heikki Tuuri
-*******************************************************/
-
-#include "btr0btr.h"
-
-#ifdef UNIV_NONINL
-#include "btr0btr.ic"
-#endif
-
-#include "fsp0fsp.h"
-#include "page0page.h"
-#include "btr0cur.h"
-#include "btr0sea.h"
-#include "btr0pcur.h"
-#include "rem0cmp.h"
-#include "lock0lock.h"
-#include "ibuf0ibuf.h"
-#include "trx0trx.h"
-
-/*
-Latching strategy of the InnoDB B-tree
---------------------------------------
-A tree latch protects all non-leaf nodes of the tree. Each node of a tree
-also has a latch of its own.
-
-A B-tree operation normally first acquires an S-latch on the tree. It
-searches down the tree and releases the tree latch when it has the
-leaf node latch. To save CPU time we do not acquire any latch on
-non-leaf nodes of the tree during a search, those pages are only bufferfixed.
-
-If an operation needs to restructure the tree, it acquires an X-latch on
-the tree before searching to a leaf node. If it needs, for example, to
-split a leaf,
-(1) InnoDB decides the split point in the leaf,
-(2) allocates a new page,
-(3) inserts the appropriate node pointer to the first non-leaf level,
-(4) releases the tree X-latch,
-(5) and then moves records from the leaf to the new allocated page.
-
-Node pointers
--------------
-Leaf pages of a B-tree contain the index records stored in the
-tree. On levels n > 0 we store 'node pointers' to pages on level
-n - 1. For each page there is exactly one node pointer stored:
-thus the our tree is an ordinary B-tree, not a B-link tree.
-
-A node pointer contains a prefix P of an index record. The prefix
-is long enough so that it determines an index record uniquely.
-The file page number of the child page is added as the last
-field. To the child page we can store node pointers or index records
-which are >= P in the alphabetical order, but < P1 if there is
-a next node pointer on the level, and P1 is its prefix.
-
-If a node pointer with a prefix P points to a non-leaf child,
-then the leftmost record in the child must have the same
-prefix P. If it points to a leaf node, the child is not required
-to contain any record with a prefix equal to P. The leaf case
-is decided this way to allow arbitrary deletions in a leaf node
-without touching upper levels of the tree.
-
-We have predefined a special minimum record which we
-define as the smallest record in any alphabetical order.
-A minimum record is denoted by setting a bit in the record
-header. A minimum record acts as the prefix of a node pointer
-which points to a leftmost node on any level of the tree.
-
-File page allocation
---------------------
-In the root node of a B-tree there are two file segment headers.
-The leaf pages of a tree are allocated from one file segment, to
-make them consecutive on disk if possible. From the other file segment
-we allocate pages for the non-leaf levels of the tree.
-*/
-
-/****************************************************************
-Returns the upper level node pointer to a page. It is assumed that
-mtr holds an x-latch on the tree. */
-static
-rec_t*
-btr_page_get_father_node_ptr(
-/*=========================*/
- /* out: pointer to node pointer record */
- dict_index_t* index, /* in: index tree */
- page_t* page, /* in: page: must contain at least one
- user record */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
-Empties an index page. */
-static
-void
-btr_page_empty(
-/*===========*/
- page_t* page, /* in: page to be emptied */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
-Returns TRUE if the insert fits on the appropriate half-page
-with the chosen split_rec. */
-static
-ibool
-btr_page_insert_fits(
-/*=================*/
- /* out: TRUE if fits */
- btr_cur_t* cursor, /* in: cursor at which insert
- should be made */
- rec_t* split_rec, /* in: suggestion for first record
- on upper half-page, or NULL if
- tuple should be first */
- const ulint* offsets, /* in: rec_get_offsets(
- split_rec, cursor->index) */
- dtuple_t* tuple, /* in: tuple to insert */
- mem_heap_t* heap); /* in: temporary memory heap */
-
-/******************************************************************
-Gets the root node of a tree and x-latches it. */
-
-page_t*
-btr_root_get(
-/*=========*/
- /* out: root page, x-latched */
- dict_index_t* index, /* in: index tree */
- mtr_t* mtr) /* in: mtr */
-{
- ulint space;
- ulint root_page_no;
- page_t* root;
-
- space = dict_index_get_space(index);
- root_page_no = dict_index_get_page(index);
-
- root = btr_page_get(space, root_page_no, RW_X_LATCH, mtr);
- ut_a((ibool)!!page_is_comp(root) == dict_table_is_comp(index->table));
-
- return(root);
-}
-
-/*****************************************************************
-Gets pointer to the previous user record in the tree. It is assumed that
-the caller has appropriate latches on the page and its neighbor. */
-
-rec_t*
-btr_get_prev_user_rec(
-/*==================*/
- /* out: previous user record, NULL if there is none */
- rec_t* rec, /* in: record on leaf level */
- mtr_t* mtr) /* in: mtr holding a latch on the page, and if
- needed, also to the previous page */
-{
- page_t* page;
- page_t* prev_page;
- ulint prev_page_no;
- ulint space;
-
- if (!page_rec_is_infimum(rec)) {
-
- rec_t* prev_rec = page_rec_get_prev(rec);
-
- if (!page_rec_is_infimum(prev_rec)) {
-
- return(prev_rec);
- }
- }
-
- page = buf_frame_align(rec);
- prev_page_no = btr_page_get_prev(page, mtr);
- space = buf_frame_get_space_id(page);
-
- if (prev_page_no != FIL_NULL) {
-
- prev_page = buf_page_get_with_no_latch(space, prev_page_no,
- mtr);
- /* The caller must already have a latch to the brother */
- ut_ad((mtr_memo_contains(mtr, buf_block_align(prev_page),
- MTR_MEMO_PAGE_S_FIX))
- || (mtr_memo_contains(mtr, buf_block_align(prev_page),
- MTR_MEMO_PAGE_X_FIX)));
- ut_a(page_is_comp(prev_page) == page_is_comp(page));
-#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_next(prev_page, mtr)
- == buf_frame_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-
- return(page_rec_get_prev(page_get_supremum_rec(prev_page)));
- }
-
- return(NULL);
-}
-
-/*****************************************************************
-Gets pointer to the next user record in the tree. It is assumed that the
-caller has appropriate latches on the page and its neighbor. */
-
-rec_t*
-btr_get_next_user_rec(
-/*==================*/
- /* out: next user record, NULL if there is none */
- rec_t* rec, /* in: record on leaf level */
- mtr_t* mtr) /* in: mtr holding a latch on the page, and if
- needed, also to the next page */
-{
- page_t* page;
- page_t* next_page;
- ulint next_page_no;
- ulint space;
-
- if (!page_rec_is_supremum(rec)) {
-
- rec_t* next_rec = page_rec_get_next(rec);
-
- if (!page_rec_is_supremum(next_rec)) {
-
- return(next_rec);
- }
- }
-
- page = buf_frame_align(rec);
- next_page_no = btr_page_get_next(page, mtr);
- space = buf_frame_get_space_id(page);
-
- if (next_page_no != FIL_NULL) {
-
- next_page = buf_page_get_with_no_latch(space, next_page_no,
- mtr);
- /* The caller must already have a latch to the brother */
- ut_ad((mtr_memo_contains(mtr, buf_block_align(next_page),
- MTR_MEMO_PAGE_S_FIX))
- || (mtr_memo_contains(mtr, buf_block_align(next_page),
- MTR_MEMO_PAGE_X_FIX)));
-#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_prev(next_page, mtr)
- == buf_frame_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-
- ut_a(page_is_comp(next_page) == page_is_comp(page));
- return(page_rec_get_next(page_get_infimum_rec(next_page)));
- }
-
- return(NULL);
-}
-
-/******************************************************************
-Creates a new index page (not the root, and also not
-used in page reorganization). */
-static
-void
-btr_page_create(
-/*============*/
- page_t* page, /* in: page to be created */
- dict_index_t* index, /* in: index */
- mtr_t* mtr) /* in: mtr */
-{
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- page_create(page, mtr, dict_table_is_comp(index->table));
- buf_block_align(page)->check_index_page_at_flush = TRUE;
-
- btr_page_set_index_id(page, index->id, mtr);
-}
-
-/******************************************************************
-Allocates a new file page to be used in an ibuf tree. Takes the page from
-the free list of the tree, which must contain pages! */
-static
-page_t*
-btr_page_alloc_for_ibuf(
-/*====================*/
- /* out: new allocated page, x-latched */
- dict_index_t* index, /* in: index tree */
- mtr_t* mtr) /* in: mtr */
-{
- fil_addr_t node_addr;
- page_t* root;
- page_t* new_page;
-
- root = btr_root_get(index, mtr);
-
- node_addr = flst_get_first(root + PAGE_HEADER
- + PAGE_BTR_IBUF_FREE_LIST, mtr);
- ut_a(node_addr.page != FIL_NULL);
-
- new_page = buf_page_get(dict_index_get_space(index), node_addr.page,
- RW_X_LATCH, mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(new_page, SYNC_TREE_NODE_NEW);
-#endif /* UNIV_SYNC_DEBUG */
-
- flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
- new_page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE,
- mtr);
- ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
- mtr));
-
- return(new_page);
-}
-
-/******************************************************************
-Allocates a new file page to be used in an index tree. NOTE: we assume
-that the caller has made the reservation for free extents! */
-
-page_t*
-btr_page_alloc(
-/*===========*/
- /* out: new allocated page, x-latched;
- NULL if out of space */
- dict_index_t* index, /* in: index */
- ulint hint_page_no, /* in: hint of a good page */
- byte file_direction, /* in: direction where a possible
- page split is made */
- ulint level, /* in: level where the page is placed
- in the tree */
- mtr_t* mtr) /* in: mtr */
-{
- fseg_header_t* seg_header;
- page_t* root;
- page_t* new_page;
- ulint new_page_no;
-
- if (index->type & DICT_IBUF) {
-
- return(btr_page_alloc_for_ibuf(index, mtr));
- }
-
- root = btr_root_get(index, mtr);
-
- if (level == 0) {
- seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
- } else {
- seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
- }
-
- /* Parameter TRUE below states that the caller has made the
- reservation for free extents, and thus we know that a page can
- be allocated: */
-
- new_page_no = fseg_alloc_free_page_general(seg_header, hint_page_no,
- file_direction, TRUE, mtr);
- if (new_page_no == FIL_NULL) {
-
- return(NULL);
- }
-
- new_page = buf_page_get(dict_index_get_space(index), new_page_no,
- RW_X_LATCH, mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(new_page, SYNC_TREE_NODE_NEW);
-#endif /* UNIV_SYNC_DEBUG */
-
- return(new_page);
-}
-
-/******************************************************************
-Gets the number of pages in a B-tree. */
-
-ulint
-btr_get_size(
-/*=========*/
- /* out: number of pages */
- dict_index_t* index, /* in: index */
- ulint flag) /* in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
-{
- fseg_header_t* seg_header;
- page_t* root;
- ulint n;
- ulint dummy;
- mtr_t mtr;
-
- mtr_start(&mtr);
-
- mtr_s_lock(dict_index_get_lock(index), &mtr);
-
- root = btr_root_get(index, &mtr);
-
- if (flag == BTR_N_LEAF_PAGES) {
- seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
-
- fseg_n_reserved_pages(seg_header, &n, &mtr);
-
- } else if (flag == BTR_TOTAL_SIZE) {
- seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
-
- n = fseg_n_reserved_pages(seg_header, &dummy, &mtr);
-
- seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
-
- n += fseg_n_reserved_pages(seg_header, &dummy, &mtr);
- } else {
- ut_error;
- }
-
- mtr_commit(&mtr);
-
- return(n);
-}
-
-/******************************************************************
-Frees a page used in an ibuf tree. Puts the page to the free list of the
-ibuf tree. */
-static
-void
-btr_page_free_for_ibuf(
-/*===================*/
- dict_index_t* index, /* in: index tree */
- page_t* page, /* in: page to be freed, x-latched */
- mtr_t* mtr) /* in: mtr */
-{
- page_t* root;
-
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- root = btr_root_get(index, mtr);
-
- flst_add_first(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
- page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr);
-
- ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
- mtr));
-}
-
-/******************************************************************
-Frees a file page used in an index tree. Can be used also to (BLOB)
-external storage pages, because the page level 0 can be given as an
-argument. */
-
-void
-btr_page_free_low(
-/*==============*/
- dict_index_t* index, /* in: index tree */
- page_t* page, /* in: page to be freed, x-latched */
- ulint level, /* in: page level */
- mtr_t* mtr) /* in: mtr */
-{
- fseg_header_t* seg_header;
- page_t* root;
- ulint space;
- ulint page_no;
-
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- /* The page gets invalid for optimistic searches: increment the frame
- modify clock */
-
- buf_frame_modify_clock_inc(page);
-
- if (index->type & DICT_IBUF) {
-
- btr_page_free_for_ibuf(index, page, mtr);
-
- return;
- }
-
- root = btr_root_get(index, mtr);
-
- if (level == 0) {
- seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
- } else {
- seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
- }
-
- space = buf_frame_get_space_id(page);
- page_no = buf_frame_get_page_no(page);
-
- fseg_free_page(seg_header, space, page_no, mtr);
-}
-
-/******************************************************************
-Frees a file page used in an index tree. NOTE: cannot free field external
-storage pages because the page must contain info on its level. */
-
-void
-btr_page_free(
-/*==========*/
- dict_index_t* index, /* in: index tree */
- page_t* page, /* in: page to be freed, x-latched */
- mtr_t* mtr) /* in: mtr */
-{
- ulint level;
-
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- level = btr_page_get_level(page, mtr);
-
- btr_page_free_low(index, page, level, mtr);
-}
-
-/******************************************************************
-Sets the child node file address in a node pointer. */
-UNIV_INLINE
-void
-btr_node_ptr_set_child_page_no(
-/*===========================*/
- rec_t* rec, /* in: node pointer record */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint page_no,/* in: child node address */
- mtr_t* mtr) /* in: mtr */
-{
- byte* field;
- ulint len;
-
- ut_ad(rec_offs_validate(rec, NULL, offsets));
- ut_ad(0 < btr_page_get_level(buf_frame_align(rec), mtr));
- ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec));
-
- /* The child address is in the last field */
- field = rec_get_nth_field(rec, offsets,
- rec_offs_n_fields(offsets) - 1, &len);
-
- ut_ad(len == 4);
-
- mlog_write_ulint(field, page_no, MLOG_4BYTES, mtr);
-}
-
-/****************************************************************
-Returns the child page of a node pointer and x-latches it. */
-static
-page_t*
-btr_node_ptr_get_child(
-/*===================*/
- /* out: child page, x-latched */
- rec_t* node_ptr,/* in: node pointer */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- mtr_t* mtr) /* in: mtr */
-{
- ulint page_no;
- ulint space;
- page_t* page;
-
- ut_ad(rec_offs_validate(node_ptr, NULL, offsets));
- space = buf_frame_get_space_id(node_ptr);
- page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
-
- page = btr_page_get(space, page_no, RW_X_LATCH, mtr);
-
- return(page);
-}
-
-/****************************************************************
-Returns the upper level node pointer to a page. It is assumed that mtr holds
-an x-latch on the tree. */
-static
-rec_t*
-btr_page_get_father_for_rec(
-/*========================*/
- /* out: pointer to node pointer record,
- its page x-latched */
- dict_index_t* index, /* in: index tree */
- page_t* page, /* in: page: must contain at least one
- user record */
- rec_t* user_rec,/* in: user_record on page */
- mtr_t* mtr) /* in: mtr */
-{
- mem_heap_t* heap;
- dtuple_t* tuple;
- btr_cur_t cursor;
- rec_t* node_ptr;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
- ut_a(page_rec_is_user_rec(user_rec));
-
- ut_ad(dict_index_get_page(index) != buf_frame_get_page_no(page));
-
- heap = mem_heap_create(100);
-
- tuple = dict_index_build_node_ptr(index, user_rec, 0, heap,
- btr_page_get_level(page, mtr));
-
- btr_cur_search_to_nth_level(index,
- btr_page_get_level(page, mtr) + 1,
- tuple, PAGE_CUR_LE,
- BTR_CONT_MODIFY_TREE, &cursor, 0, mtr);
-
- node_ptr = btr_cur_get_rec(&cursor);
- offsets = rec_get_offsets(node_ptr, index, offsets,
- ULINT_UNDEFINED, &heap);
-
- if (UNIV_UNLIKELY(btr_node_ptr_get_child_page_no(node_ptr, offsets)
- != buf_frame_get_page_no(page))) {
- rec_t* print_rec;
- fputs("InnoDB: Dump of the child page:\n", stderr);
- buf_page_print(buf_frame_align(page));
- fputs("InnoDB: Dump of the parent page:\n", stderr);
- buf_page_print(buf_frame_align(node_ptr));
-
- fputs("InnoDB: Corruption of an index tree: table ", stderr);
- ut_print_name(stderr, NULL, TRUE, index->table_name);
- fputs(", index ", stderr);
- ut_print_name(stderr, NULL, FALSE, index->name);
- fprintf(stderr, ",\n"
- "InnoDB: father ptr page no %lu, child page no %lu\n",
- (ulong)
- btr_node_ptr_get_child_page_no(node_ptr, offsets),
- (ulong) buf_frame_get_page_no(page));
- print_rec = page_rec_get_next(page_get_infimum_rec(page));
- offsets = rec_get_offsets(print_rec, index,
- offsets, ULINT_UNDEFINED, &heap);
- page_rec_print(print_rec, offsets);
- offsets = rec_get_offsets(node_ptr, index, offsets,
- ULINT_UNDEFINED, &heap);
- page_rec_print(node_ptr, offsets);
-
- fputs("InnoDB: You should dump + drop + reimport the table"
- " to fix the\n"
- "InnoDB: corruption. If the crash happens at "
- "the database startup, see\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "forcing-recovery.html about\n"
- "InnoDB: forcing recovery. "
- "Then dump + drop + reimport.\n", stderr);
- }
-
- ut_a(btr_node_ptr_get_child_page_no(node_ptr, offsets)
- == buf_frame_get_page_no(page));
- mem_heap_free(heap);
-
- return(node_ptr);
-}
-
-/****************************************************************
-Returns the upper level node pointer to a page. It is assumed that
-mtr holds an x-latch on the tree. */
-static
-rec_t*
-btr_page_get_father_node_ptr(
-/*=========================*/
- /* out: pointer to node pointer record */
- dict_index_t* index, /* in: index tree */
- page_t* page, /* in: page: must contain at least one
- user record */
- mtr_t* mtr) /* in: mtr */
-{
- return(btr_page_get_father_for_rec(
- index, page,
- page_rec_get_next(page_get_infimum_rec(page)), mtr));
-}
-
-/****************************************************************
-Creates the root node for a new index tree. */
-
-ulint
-btr_create(
-/*=======*/
- /* out: page number of the created root, FIL_NULL if
- did not succeed */
- ulint type, /* in: type of the index */
- ulint space, /* in: space where created */
- dulint index_id,/* in: index id */
- ulint comp, /* in: nonzero=compact page format */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- ulint page_no;
- buf_frame_t* ibuf_hdr_frame;
- buf_frame_t* frame;
- page_t* page;
-
- /* Create the two new segments (one, in the case of an ibuf tree) for
- the index tree; the segment headers are put on the allocated root page
- (for an ibuf tree, not in the root, but on a separate ibuf header
- page) */
-
- if (type & DICT_IBUF) {
- /* Allocate first the ibuf header page */
- ibuf_hdr_frame = fseg_create(
- space, 0, IBUF_HEADER + IBUF_TREE_SEG_HEADER, mtr);
-
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(ibuf_hdr_frame, SYNC_TREE_NODE_NEW);
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(buf_frame_get_page_no(ibuf_hdr_frame)
- == IBUF_HEADER_PAGE_NO);
- /* Allocate then the next page to the segment: it will be the
- tree root page */
-
- page_no = fseg_alloc_free_page(ibuf_hdr_frame + IBUF_HEADER
- + IBUF_TREE_SEG_HEADER,
- IBUF_TREE_ROOT_PAGE_NO,
- FSP_UP, mtr);
- ut_ad(page_no == IBUF_TREE_ROOT_PAGE_NO);
-
- frame = buf_page_get(space, page_no, RW_X_LATCH, mtr);
- } else {
- frame = fseg_create(space, 0, PAGE_HEADER + PAGE_BTR_SEG_TOP,
- mtr);
- }
-
- if (frame == NULL) {
-
- return(FIL_NULL);
- }
-
- page_no = buf_frame_get_page_no(frame);
-
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(frame, SYNC_TREE_NODE_NEW);
-#endif /* UNIV_SYNC_DEBUG */
-
- if (type & DICT_IBUF) {
- /* It is an insert buffer tree: initialize the free list */
-
- ut_ad(page_no == IBUF_TREE_ROOT_PAGE_NO);
-
- flst_init(frame + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr);
- } else {
- /* It is a non-ibuf tree: create a file segment for leaf
- pages */
- fseg_create(space, page_no, PAGE_HEADER + PAGE_BTR_SEG_LEAF,
- mtr);
- /* The fseg create acquires a second latch on the page,
- therefore we must declare it: */
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(frame, SYNC_TREE_NODE_NEW);
-#endif /* UNIV_SYNC_DEBUG */
- }
-
- /* Create a new index page on the the allocated segment page */
- page = page_create(frame, mtr, comp);
- buf_block_align(page)->check_index_page_at_flush = TRUE;
-
- /* Set the index id of the page */
- btr_page_set_index_id(page, index_id, mtr);
-
- /* Set the level of the new index page */
- btr_page_set_level(page, 0, mtr);
-
- /* Set the next node and previous node fields */
- btr_page_set_next(page, FIL_NULL, mtr);
- btr_page_set_prev(page, FIL_NULL, mtr);
-
- /* We reset the free bits for the page to allow creation of several
- trees in the same mtr, otherwise the latch on a bitmap page would
- prevent it because of the latching order */
-
- ibuf_reset_free_bits_with_type(type, page);
-
- /* In the following assertion we test that two records of maximum
- allowed size fit on the root page: this fact is needed to ensure
- correctness of split algorithms */
-
- ut_ad(page_get_max_insert_size(page, 2) > 2 * BTR_PAGE_MAX_REC_SIZE);
-
- return(page_no);
-}
-
-/****************************************************************
-Frees a B-tree except the root page, which MUST be freed after this
-by calling btr_free_root. */
-
-void
-btr_free_but_not_root(
-/*==================*/
- ulint space, /* in: space where created */
- ulint root_page_no) /* in: root page number */
-{
- ibool finished;
- page_t* root;
- mtr_t mtr;
-
-leaf_loop:
- mtr_start(&mtr);
-
- root = btr_page_get(space, root_page_no, RW_X_LATCH, &mtr);
-
- /* NOTE: page hash indexes are dropped when a page is freed inside
- fsp0fsp. */
-
- finished = fseg_free_step(root + PAGE_HEADER + PAGE_BTR_SEG_LEAF,
- &mtr);
- mtr_commit(&mtr);
-
- if (!finished) {
-
- goto leaf_loop;
- }
-top_loop:
- mtr_start(&mtr);
-
- root = btr_page_get(space, root_page_no, RW_X_LATCH, &mtr);
-
- finished = fseg_free_step_not_header(
- root + PAGE_HEADER + PAGE_BTR_SEG_TOP, &mtr);
- mtr_commit(&mtr);
-
- if (!finished) {
-
- goto top_loop;
- }
-}
-
-/****************************************************************
-Frees the B-tree root page. Other tree MUST already have been freed. */
-
-void
-btr_free_root(
-/*==========*/
- ulint space, /* in: space where created */
- ulint root_page_no, /* in: root page number */
- mtr_t* mtr) /* in: a mini-transaction which has already
- been started */
-{
- ibool finished;
- page_t* root;
-
- root = btr_page_get(space, root_page_no, RW_X_LATCH, mtr);
-
- btr_search_drop_page_hash_index(root);
-top_loop:
- finished = fseg_free_step(root + PAGE_HEADER + PAGE_BTR_SEG_TOP, mtr);
- if (!finished) {
-
- goto top_loop;
- }
-}
-
-/*****************************************************************
-Reorganizes an index page. */
-static
-void
-btr_page_reorganize_low(
-/*====================*/
- ibool recovery,/* in: TRUE if called in recovery:
- locks should not be updated, i.e.,
- there cannot exist locks on the
- page, and a hash index should not be
- dropped: it cannot exist */
- page_t* page, /* in: page to be reorganized */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr) /* in: mtr */
-{
- page_t* new_page;
- ulint log_mode;
- ulint data_size1;
- ulint data_size2;
- ulint max_ins_size1;
- ulint max_ins_size2;
-
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
- data_size1 = page_get_data_size(page);
- max_ins_size1 = page_get_max_insert_size_after_reorganize(page, 1);
-
- /* Write the log record */
- mlog_open_and_write_index(mtr, page, index, page_is_comp(page)
- ? MLOG_COMP_PAGE_REORGANIZE
- : MLOG_PAGE_REORGANIZE, 0);
-
- /* Turn logging off */
- log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
-
- new_page = buf_frame_alloc();
-
- /* Copy the old page to temporary space */
- buf_frame_copy(new_page, page);
-
- if (!recovery) {
- btr_search_drop_page_hash_index(page);
- }
-
- /* Recreate the page: note that global data on page (possible
- segment headers, next page-field, etc.) is preserved intact */
-
- page_create(page, mtr, page_is_comp(page));
- buf_block_align(page)->check_index_page_at_flush = TRUE;
-
- /* Copy the records from the temporary space to the recreated page;
- do not copy the lock bits yet */
-
- page_copy_rec_list_end_no_locks(page, new_page,
- page_get_infimum_rec(new_page),
- index, mtr);
- /* Copy max trx id to recreated page */
- page_set_max_trx_id(page, page_get_max_trx_id(new_page));
-
- if (!recovery) {
- /* Update the record lock bitmaps */
- lock_move_reorganize_page(page, new_page);
- }
-
- data_size2 = page_get_data_size(page);
- max_ins_size2 = page_get_max_insert_size_after_reorganize(page, 1);
-
- if (data_size1 != data_size2 || max_ins_size1 != max_ins_size2) {
- buf_page_print(page);
- buf_page_print(new_page);
- fprintf(stderr,
- "InnoDB: Error: page old data size %lu"
- " new data size %lu\n"
- "InnoDB: Error: page old max ins size %lu"
- " new max ins size %lu\n"
- "InnoDB: Submit a detailed bug report"
- " to http://bugs.mysql.com\n",
- (unsigned long) data_size1, (unsigned long) data_size2,
- (unsigned long) max_ins_size1,
- (unsigned long) max_ins_size2);
- }
-
- buf_frame_free(new_page);
-
- /* Restore logging mode */
- mtr_set_log_mode(mtr, log_mode);
-}
-
-/*****************************************************************
-Reorganizes an index page. */
-
-void
-btr_page_reorganize(
-/*================*/
- page_t* page, /* in: page to be reorganized */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr) /* in: mtr */
-{
- btr_page_reorganize_low(FALSE, page, index, mtr);
-}
-
-/***************************************************************
-Parses a redo log record of reorganizing a page. */
-
-byte*
-btr_parse_page_reorganize(
-/*======================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr __attribute__((unused)),
- /* in: buffer end */
- dict_index_t* index, /* in: record descriptor */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr) /* in: mtr or NULL */
-{
- ut_ad(ptr && end_ptr);
-
- /* The record is empty, except for the record initial part */
-
- if (page) {
- btr_page_reorganize_low(TRUE, page, index, mtr);
- }
-
- return(ptr);
-}
-
-/*****************************************************************
-Empties an index page. */
-static
-void
-btr_page_empty(
-/*===========*/
- page_t* page, /* in: page to be emptied */
- mtr_t* mtr) /* in: mtr */
-{
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- btr_search_drop_page_hash_index(page);
-
- /* Recreate the page: note that global data on page (possible
- segment headers, next page-field, etc.) is preserved intact */
-
- page_create(page, mtr, page_is_comp(page));
- buf_block_align(page)->check_index_page_at_flush = TRUE;
-}
-
-/*****************************************************************
-Makes tree one level higher by splitting the root, and inserts
-the tuple. It is assumed that mtr contains an x-latch on the tree.
-NOTE that the operation of this function must always succeed,
-we cannot reverse it: therefore enough free disk space must be
-guaranteed to be available before this function is called. */
-
-rec_t*
-btr_root_raise_and_insert(
-/*======================*/
- /* out: inserted record */
- btr_cur_t* cursor, /* in: cursor at which to insert: must be
- on the root page; when the function returns,
- the cursor is positioned on the predecessor
- of the inserted record */
- dtuple_t* tuple, /* in: tuple to insert */
- mtr_t* mtr) /* in: mtr */
-{
- dict_index_t* index;
- page_t* root;
- page_t* new_page;
- ulint new_page_no;
- rec_t* rec;
- mem_heap_t* heap;
- dtuple_t* node_ptr;
- ulint level;
- rec_t* node_ptr_rec;
- page_cur_t* page_cursor;
-
- root = btr_cur_get_page(cursor);
- index = btr_cur_get_index(cursor);
-
- ut_ad(dict_index_get_page(index) == buf_frame_get_page_no(root));
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(root),
- MTR_MEMO_PAGE_X_FIX));
- btr_search_drop_page_hash_index(root);
-
- /* Allocate a new page to the tree. Root splitting is done by first
- moving the root records to the new page, emptying the root, putting
- a node pointer to the new page, and then splitting the new page. */
-
- new_page = btr_page_alloc(index, 0, FSP_NO_DIR,
- btr_page_get_level(root, mtr), mtr);
-
- btr_page_create(new_page, index, mtr);
-
- level = btr_page_get_level(root, mtr);
-
- /* Set the levels of the new index page and root page */
- btr_page_set_level(new_page, level, mtr);
- btr_page_set_level(root, level + 1, mtr);
-
- /* Set the next node and previous node fields of new page */
- btr_page_set_next(new_page, FIL_NULL, mtr);
- btr_page_set_prev(new_page, FIL_NULL, mtr);
-
- /* Move the records from root to the new page */
-
- page_move_rec_list_end(new_page, root, page_get_infimum_rec(root),
- index, mtr);
- /* If this is a pessimistic insert which is actually done to
- perform a pessimistic update then we have stored the lock
- information of the record to be inserted on the infimum of the
- root page: we cannot discard the lock structs on the root page */
-
- lock_update_root_raise(new_page, root);
-
- /* Create a memory heap where the node pointer is stored */
- heap = mem_heap_create(100);
-
- rec = page_rec_get_next(page_get_infimum_rec(new_page));
- new_page_no = buf_frame_get_page_no(new_page);
-
- /* Build the node pointer (= node key and page address) for the
- child */
-
- node_ptr = dict_index_build_node_ptr(index, rec, new_page_no, heap,
- level);
- /* Reorganize the root to get free space */
- btr_page_reorganize(root, index, mtr);
-
- page_cursor = btr_cur_get_page_cur(cursor);
-
- /* Insert node pointer to the root */
-
- page_cur_set_before_first(root, page_cursor);
-
- node_ptr_rec = page_cur_tuple_insert(page_cursor, node_ptr,
- index, mtr);
-
- ut_ad(node_ptr_rec);
-
- /* The node pointer must be marked as the predefined minimum record,
- as there is no lower alphabetical limit to records in the leftmost
- node of a level: */
-
- btr_set_min_rec_mark(node_ptr_rec, page_is_comp(root), mtr);
-
- /* Free the memory heap */
- mem_heap_free(heap);
-
- /* We play safe and reset the free bits for the new page */
-
-#if 0
- fprintf(stderr, "Root raise new page no %lu\n",
- buf_frame_get_page_no(new_page));
-#endif
-
- ibuf_reset_free_bits(index, new_page);
- /* Reposition the cursor to the child node */
- page_cur_search(new_page, index, tuple,
- PAGE_CUR_LE, page_cursor);
-
- /* Split the child and insert tuple */
- return(btr_page_split_and_insert(cursor, tuple, mtr));
-}
-
-/*****************************************************************
-Decides if the page should be split at the convergence point of inserts
-converging to the left. */
-
-ibool
-btr_page_get_split_rec_to_left(
-/*===========================*/
- /* out: TRUE if split recommended */
- btr_cur_t* cursor, /* in: cursor at which to insert */
- rec_t** split_rec) /* out: if split recommended,
- the first record on upper half page,
- or NULL if tuple to be inserted should
- be first */
-{
- page_t* page;
- rec_t* insert_point;
- rec_t* infimum;
-
- page = btr_cur_get_page(cursor);
- insert_point = btr_cur_get_rec(cursor);
-
- if (page_header_get_ptr(page, PAGE_LAST_INSERT)
- == page_rec_get_next(insert_point)) {
-
- infimum = page_get_infimum_rec(page);
-
- /* If the convergence is in the middle of a page, include also
- the record immediately before the new insert to the upper
- page. Otherwise, we could repeatedly move from page to page
- lots of records smaller than the convergence point. */
-
- if (infimum != insert_point
- && page_rec_get_next(infimum) != insert_point) {
-
- *split_rec = insert_point;
- } else {
- *split_rec = page_rec_get_next(insert_point);
- }
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*****************************************************************
-Decides if the page should be split at the convergence point of inserts
-converging to the right. */
-
-ibool
-btr_page_get_split_rec_to_right(
-/*============================*/
- /* out: TRUE if split recommended */
- btr_cur_t* cursor, /* in: cursor at which to insert */
- rec_t** split_rec) /* out: if split recommended,
- the first record on upper half page,
- or NULL if tuple to be inserted should
- be first */
-{
- page_t* page;
- rec_t* insert_point;
-
- page = btr_cur_get_page(cursor);
- insert_point = btr_cur_get_rec(cursor);
-
- /* We use eager heuristics: if the new insert would be right after
- the previous insert on the same page, we assume that there is a
- pattern of sequential inserts here. */
-
- if (UNIV_LIKELY(page_header_get_ptr(page, PAGE_LAST_INSERT)
- == insert_point)) {
-
- rec_t* next_rec;
-
- next_rec = page_rec_get_next(insert_point);
-
- if (page_rec_is_supremum(next_rec)) {
-split_at_new:
- /* Split at the new record to insert */
- *split_rec = NULL;
- } else {
- rec_t* next_next_rec = page_rec_get_next(next_rec);
- if (page_rec_is_supremum(next_next_rec)) {
-
- goto split_at_new;
- }
-
- /* If there are >= 2 user records up from the insert
- point, split all but 1 off. We want to keep one because
- then sequential inserts can use the adaptive hash
- index, as they can do the necessary checks of the right
- search position just by looking at the records on this
- page. */
-
- *split_rec = next_next_rec;
- }
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*****************************************************************
-Calculates a split record such that the tuple will certainly fit on
-its half-page when the split is performed. We assume in this function
-only that the cursor page has at least one user record. */
-static
-rec_t*
-btr_page_get_sure_split_rec(
-/*========================*/
- /* out: split record, or NULL if
- tuple will be the first record on
- upper half-page */
- btr_cur_t* cursor, /* in: cursor at which insert
- should be made */
- dtuple_t* tuple) /* in: tuple to insert */
-{
- page_t* page;
- ulint insert_size;
- ulint free_space;
- ulint total_data;
- ulint total_n_recs;
- ulint total_space;
- ulint incl_data;
- rec_t* ins_rec;
- rec_t* rec;
- rec_t* next_rec;
- ulint n;
- mem_heap_t* heap;
- ulint* offsets;
-
- page = btr_cur_get_page(cursor);
-
- insert_size = rec_get_converted_size(cursor->index, tuple);
- free_space = page_get_free_space_of_empty(page_is_comp(page));
-
- /* free_space is now the free space of a created new page */
-
- total_data = page_get_data_size(page) + insert_size;
- total_n_recs = page_get_n_recs(page) + 1;
- ut_ad(total_n_recs >= 2);
- total_space = total_data + page_dir_calc_reserved_space(total_n_recs);
-
- n = 0;
- incl_data = 0;
- ins_rec = btr_cur_get_rec(cursor);
- rec = page_get_infimum_rec(page);
-
- heap = NULL;
- offsets = NULL;
-
- /* We start to include records to the left half, and when the
- space reserved by them exceeds half of total_space, then if
- the included records fit on the left page, they will be put there
- if something was left over also for the right page,
- otherwise the last included record will be the first on the right
- half page */
-
- for (;;) {
- /* Decide the next record to include */
- if (rec == ins_rec) {
- rec = NULL; /* NULL denotes that tuple is
- now included */
- } else if (rec == NULL) {
- rec = page_rec_get_next(ins_rec);
- } else {
- rec = page_rec_get_next(rec);
- }
-
- if (rec == NULL) {
- /* Include tuple */
- incl_data += insert_size;
- } else {
- offsets = rec_get_offsets(rec, cursor->index,
- offsets, ULINT_UNDEFINED,
- &heap);
- incl_data += rec_offs_size(offsets);
- }
-
- n++;
-
- if (incl_data + page_dir_calc_reserved_space(n)
- >= total_space / 2) {
-
- if (incl_data + page_dir_calc_reserved_space(n)
- <= free_space) {
- /* The next record will be the first on
- the right half page if it is not the
- supremum record of page */
-
- if (rec == ins_rec) {
- rec = NULL;
-
- goto func_exit;
- } else if (rec == NULL) {
- next_rec = page_rec_get_next(ins_rec);
- } else {
- next_rec = page_rec_get_next(rec);
- }
- ut_ad(next_rec);
- if (!page_rec_is_supremum(next_rec)) {
- rec = next_rec;
- }
- }
-
-func_exit:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(rec);
- }
- }
-}
-
-/*****************************************************************
-Returns TRUE if the insert fits on the appropriate half-page with the
-chosen split_rec. */
-static
-ibool
-btr_page_insert_fits(
-/*=================*/
- /* out: TRUE if fits */
- btr_cur_t* cursor, /* in: cursor at which insert
- should be made */
- rec_t* split_rec, /* in: suggestion for first record
- on upper half-page, or NULL if
- tuple to be inserted should be first */
- const ulint* offsets, /* in: rec_get_offsets(
- split_rec, cursor->index) */
- dtuple_t* tuple, /* in: tuple to insert */
- mem_heap_t* heap) /* in: temporary memory heap */
-{
- page_t* page;
- ulint insert_size;
- ulint free_space;
- ulint total_data;
- ulint total_n_recs;
- rec_t* rec;
- rec_t* end_rec;
- ulint* offs;
-
- page = btr_cur_get_page(cursor);
-
- ut_ad(!split_rec == !offsets);
- ut_ad(!offsets
- || !page_is_comp(page) == !rec_offs_comp(offsets));
- ut_ad(!offsets
- || rec_offs_validate(split_rec, cursor->index, offsets));
-
- insert_size = rec_get_converted_size(cursor->index, tuple);
- free_space = page_get_free_space_of_empty(page_is_comp(page));
-
- /* free_space is now the free space of a created new page */
-
- total_data = page_get_data_size(page) + insert_size;
- total_n_recs = page_get_n_recs(page) + 1;
-
- /* We determine which records (from rec to end_rec, not including
- end_rec) will end up on the other half page from tuple when it is
- inserted. */
-
- if (split_rec == NULL) {
- rec = page_rec_get_next(page_get_infimum_rec(page));
- end_rec = page_rec_get_next(btr_cur_get_rec(cursor));
-
- } else if (cmp_dtuple_rec(tuple, split_rec, offsets) >= 0) {
-
- rec = page_rec_get_next(page_get_infimum_rec(page));
- end_rec = split_rec;
- } else {
- rec = split_rec;
- end_rec = page_get_supremum_rec(page);
- }
-
- if (total_data + page_dir_calc_reserved_space(total_n_recs)
- <= free_space) {
-
- /* Ok, there will be enough available space on the
- half page where the tuple is inserted */
-
- return(TRUE);
- }
-
- offs = NULL;
-
- while (rec != end_rec) {
- /* In this loop we calculate the amount of reserved
- space after rec is removed from page. */
-
- offs = rec_get_offsets(rec, cursor->index, offs,
- ULINT_UNDEFINED, &heap);
-
- total_data -= rec_offs_size(offs);
- total_n_recs--;
-
- if (total_data + page_dir_calc_reserved_space(total_n_recs)
- <= free_space) {
-
- /* Ok, there will be enough available space on the
- half page where the tuple is inserted */
-
- return(TRUE);
- }
-
- rec = page_rec_get_next(rec);
- }
-
- return(FALSE);
-}
-
-/***********************************************************
-Inserts a data tuple to a tree on a non-leaf level. It is assumed
-that mtr holds an x-latch on the tree. */
-
-void
-btr_insert_on_non_leaf_level(
-/*=========================*/
- dict_index_t* index, /* in: index */
- ulint level, /* in: level, must be > 0 */
- dtuple_t* tuple, /* in: the record to be inserted */
- mtr_t* mtr) /* in: mtr */
-{
- big_rec_t* dummy_big_rec;
- btr_cur_t cursor;
- ulint err;
- rec_t* rec;
-
- ut_ad(level > 0);
-
- btr_cur_search_to_nth_level(index, level, tuple, PAGE_CUR_LE,
- BTR_CONT_MODIFY_TREE,
- &cursor, 0, mtr);
-
- err = btr_cur_pessimistic_insert(BTR_NO_LOCKING_FLAG
- | BTR_KEEP_SYS_FLAG
- | BTR_NO_UNDO_LOG_FLAG,
- &cursor, tuple, &rec,
- &dummy_big_rec, NULL, mtr);
- ut_a(err == DB_SUCCESS);
-}
-
-/******************************************************************
-Attaches the halves of an index page on the appropriate level in an
-index tree. */
-static
-void
-btr_attach_half_pages(
-/*==================*/
- dict_index_t* index, /* in: the index tree */
- page_t* page, /* in: page to be split */
- rec_t* split_rec, /* in: first record on upper
- half page */
- page_t* new_page, /* in: the new half page */
- ulint direction, /* in: FSP_UP or FSP_DOWN */
- mtr_t* mtr) /* in: mtr */
-{
- ulint space;
- rec_t* node_ptr;
- page_t* prev_page;
- page_t* next_page;
- ulint prev_page_no;
- ulint next_page_no;
- ulint level;
- page_t* lower_page;
- page_t* upper_page;
- ulint lower_page_no;
- ulint upper_page_no;
- dtuple_t* node_ptr_upper;
- mem_heap_t* heap;
-
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(new_page),
- MTR_MEMO_PAGE_X_FIX));
- ut_a(page_is_comp(page) == page_is_comp(new_page));
-
- /* Create a memory heap where the data tuple is stored */
- heap = mem_heap_create(1024);
-
- /* Based on split direction, decide upper and lower pages */
- if (direction == FSP_DOWN) {
-
- lower_page_no = buf_frame_get_page_no(new_page);
- upper_page_no = buf_frame_get_page_no(page);
- lower_page = new_page;
- upper_page = page;
-
- /* Look up the index for the node pointer to page */
- node_ptr = btr_page_get_father_node_ptr(index, page, mtr);
-
- /* Replace the address of the old child node (= page) with the
- address of the new lower half */
-
- btr_node_ptr_set_child_page_no(node_ptr,
- rec_get_offsets(
- node_ptr, index,
- NULL, ULINT_UNDEFINED,
- &heap),
- lower_page_no, mtr);
- mem_heap_empty(heap);
- } else {
- lower_page_no = buf_frame_get_page_no(page);
- upper_page_no = buf_frame_get_page_no(new_page);
- lower_page = page;
- upper_page = new_page;
- }
-
- /* Get the level of the split pages */
- level = btr_page_get_level(page, mtr);
-
- /* Build the node pointer (= node key and page address) for the upper
- half */
-
- node_ptr_upper = dict_index_build_node_ptr(index, split_rec,
- upper_page_no, heap, level);
-
- /* Insert it next to the pointer to the lower half. Note that this
- may generate recursion leading to a split on the higher level. */
-
- btr_insert_on_non_leaf_level(index, level + 1, node_ptr_upper, mtr);
-
- /* Free the memory heap */
- mem_heap_free(heap);
-
- /* Get the previous and next pages of page */
-
- prev_page_no = btr_page_get_prev(page, mtr);
- next_page_no = btr_page_get_next(page, mtr);
- space = buf_frame_get_space_id(page);
-
- /* Update page links of the level */
-
- if (prev_page_no != FIL_NULL) {
-
- prev_page = btr_page_get(space, prev_page_no, RW_X_LATCH, mtr);
- ut_a(page_is_comp(prev_page) == page_is_comp(page));
-#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_next(prev_page, mtr)
- == buf_frame_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-
- btr_page_set_next(prev_page, lower_page_no, mtr);
- }
-
- if (next_page_no != FIL_NULL) {
-
- next_page = btr_page_get(space, next_page_no, RW_X_LATCH, mtr);
- ut_a(page_is_comp(next_page) == page_is_comp(page));
-
- btr_page_set_prev(next_page, upper_page_no, mtr);
- }
-
- btr_page_set_prev(lower_page, prev_page_no, mtr);
- btr_page_set_next(lower_page, upper_page_no, mtr);
- btr_page_set_level(lower_page, level, mtr);
-
- btr_page_set_prev(upper_page, lower_page_no, mtr);
- btr_page_set_next(upper_page, next_page_no, mtr);
- btr_page_set_level(upper_page, level, mtr);
-}
-
-/*****************************************************************
-Splits an index page to halves and inserts the tuple. It is assumed
-that mtr holds an x-latch to the index tree. NOTE: the tree x-latch
-is released within this function! NOTE that the operation of this
-function must always succeed, we cannot reverse it: therefore
-enough free disk space must be guaranteed to be available before
-this function is called. */
-
-rec_t*
-btr_page_split_and_insert(
-/*======================*/
- /* out: inserted record; NOTE: the tree
- x-latch is released! NOTE: 2 free disk
- pages must be available! */
- btr_cur_t* cursor, /* in: cursor at which to insert; when the
- function returns, the cursor is positioned
- on the predecessor of the inserted record */
- dtuple_t* tuple, /* in: tuple to insert */
- mtr_t* mtr) /* in: mtr */
-{
- page_t* page;
- ulint page_no;
- byte direction;
- ulint hint_page_no;
- page_t* new_page;
- rec_t* split_rec;
- page_t* left_page;
- page_t* right_page;
- page_t* insert_page;
- page_cur_t* page_cursor;
- rec_t* first_rec;
- byte* buf = 0; /* remove warning */
- rec_t* move_limit;
- ibool insert_will_fit;
- ulint n_iterations = 0;
- rec_t* rec;
- mem_heap_t* heap;
- ulint n_uniq;
- ulint* offsets;
-
- heap = mem_heap_create(1024);
- n_uniq = dict_index_get_n_unique_in_tree(cursor->index);
-func_start:
- mem_heap_empty(heap);
- offsets = NULL;
-
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index),
- MTR_MEMO_X_LOCK));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(dict_index_get_lock(cursor->index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- page = btr_cur_get_page(cursor);
-
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(page_get_n_recs(page) >= 2);
-
- page_no = buf_frame_get_page_no(page);
-
- /* 1. Decide the split record; split_rec == NULL means that the
- tuple to be inserted should be the first record on the upper
- half-page */
-
- if (n_iterations > 0) {
- direction = FSP_UP;
- hint_page_no = page_no + 1;
- split_rec = btr_page_get_sure_split_rec(cursor, tuple);
-
- } else if (btr_page_get_split_rec_to_right(cursor, &split_rec)) {
- direction = FSP_UP;
- hint_page_no = page_no + 1;
-
- } else if (btr_page_get_split_rec_to_left(cursor, &split_rec)) {
- direction = FSP_DOWN;
- hint_page_no = page_no - 1;
- } else {
- direction = FSP_UP;
- hint_page_no = page_no + 1;
- split_rec = page_get_middle_rec(page);
- }
-
- /* 2. Allocate a new page to the index */
- new_page = btr_page_alloc(cursor->index, hint_page_no, direction,
- btr_page_get_level(page, mtr), mtr);
- btr_page_create(new_page, cursor->index, mtr);
-
- /* 3. Calculate the first record on the upper half-page, and the
- first record (move_limit) on original page which ends up on the
- upper half */
-
- if (split_rec != NULL) {
- first_rec = split_rec;
- move_limit = split_rec;
- } else {
- buf = mem_alloc(rec_get_converted_size(cursor->index, tuple));
-
- first_rec = rec_convert_dtuple_to_rec(buf,
- cursor->index, tuple);
- move_limit = page_rec_get_next(btr_cur_get_rec(cursor));
- }
-
- /* 4. Do first the modifications in the tree structure */
-
- btr_attach_half_pages(cursor->index, page, first_rec,
- new_page, direction, mtr);
-
- if (split_rec == NULL) {
- mem_free(buf);
- }
-
- /* If the split is made on the leaf level and the insert will fit
- on the appropriate half-page, we may release the tree x-latch.
- We can then move the records after releasing the tree latch,
- thus reducing the tree latch contention. */
-
- if (split_rec) {
- offsets = rec_get_offsets(split_rec, cursor->index, offsets,
- n_uniq, &heap);
-
- insert_will_fit = btr_page_insert_fits(cursor,
- split_rec, offsets,
- tuple, heap);
- } else {
- insert_will_fit = btr_page_insert_fits(cursor,
- NULL, NULL,
- tuple, heap);
- }
-
- if (insert_will_fit && (btr_page_get_level(page, mtr) == 0)) {
-
- mtr_memo_release(mtr, dict_index_get_lock(cursor->index),
- MTR_MEMO_X_LOCK);
- }
-
- /* 5. Move then the records to the new page */
- if (direction == FSP_DOWN) {
- /* fputs("Split left\n", stderr); */
-
- page_move_rec_list_start(new_page, page, move_limit,
- cursor->index, mtr);
- left_page = new_page;
- right_page = page;
-
- lock_update_split_left(right_page, left_page);
- } else {
- /* fputs("Split right\n", stderr); */
-
- page_move_rec_list_end(new_page, page, move_limit,
- cursor->index, mtr);
- left_page = page;
- right_page = new_page;
-
- lock_update_split_right(right_page, left_page);
- }
-
- /* 6. The split and the tree modification is now completed. Decide the
- page where the tuple should be inserted */
-
- if (split_rec == NULL) {
- insert_page = right_page;
-
- } else {
- offsets = rec_get_offsets(first_rec, cursor->index,
- offsets, n_uniq, &heap);
-
- if (cmp_dtuple_rec(tuple, first_rec, offsets) >= 0) {
-
- insert_page = right_page;
- } else {
- insert_page = left_page;
- }
- }
-
- /* 7. Reposition the cursor for insert and try insertion */
- page_cursor = btr_cur_get_page_cur(cursor);
-
- page_cur_search(insert_page, cursor->index, tuple,
- PAGE_CUR_LE, page_cursor);
-
- rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr);
-
- if (rec != NULL) {
- /* Insert fit on the page: update the free bits for the
- left and right pages in the same mtr */
-
- ibuf_update_free_bits_for_two_pages_low(cursor->index,
- left_page,
- right_page, mtr);
- /* fprintf(stderr, "Split and insert done %lu %lu\n",
- buf_frame_get_page_no(left_page),
- buf_frame_get_page_no(right_page)); */
- mem_heap_free(heap);
- return(rec);
- }
-
- /* 8. If insert did not fit, try page reorganization */
-
- btr_page_reorganize(insert_page, cursor->index, mtr);
-
- page_cur_search(insert_page, cursor->index, tuple,
- PAGE_CUR_LE, page_cursor);
- rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr);
-
- if (rec == NULL) {
- /* The insert did not fit on the page: loop back to the
- start of the function for a new split */
-
- /* We play safe and reset the free bits for new_page */
- ibuf_reset_free_bits(cursor->index, new_page);
-
- /* fprintf(stderr, "Split second round %lu\n",
- buf_frame_get_page_no(page)); */
- n_iterations++;
- ut_ad(n_iterations < 2);
- ut_ad(!insert_will_fit);
-
- goto func_start;
- }
-
- /* Insert fit on the page: update the free bits for the
- left and right pages in the same mtr */
-
- ibuf_update_free_bits_for_two_pages_low(cursor->index, left_page,
- right_page, mtr);
-#if 0
- fprintf(stderr, "Split and insert done %lu %lu\n",
- buf_frame_get_page_no(left_page),
- buf_frame_get_page_no(right_page));
-#endif
-
- ut_ad(page_validate(left_page, cursor->index));
- ut_ad(page_validate(right_page, cursor->index));
-
- mem_heap_free(heap);
- return(rec);
-}
-
-/*****************************************************************
-Removes a page from the level list of pages. */
-static
-void
-btr_level_list_remove(
-/*==================*/
- page_t* page, /* in: page to remove */
- mtr_t* mtr) /* in: mtr */
-{
- ulint space;
- ulint prev_page_no;
- page_t* prev_page;
- ulint next_page_no;
- page_t* next_page;
-
- ut_ad(page && mtr);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- /* Get the previous and next page numbers of page */
-
- prev_page_no = btr_page_get_prev(page, mtr);
- next_page_no = btr_page_get_next(page, mtr);
- space = buf_frame_get_space_id(page);
-
- /* Update page links of the level */
-
- if (prev_page_no != FIL_NULL) {
-
- prev_page = btr_page_get(space, prev_page_no, RW_X_LATCH, mtr);
- ut_a(page_is_comp(prev_page) == page_is_comp(page));
-#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_next(prev_page, mtr)
- == buf_frame_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-
- btr_page_set_next(prev_page, next_page_no, mtr);
- }
-
- if (next_page_no != FIL_NULL) {
-
- next_page = btr_page_get(space, next_page_no, RW_X_LATCH, mtr);
- ut_a(page_is_comp(next_page) == page_is_comp(page));
-#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_prev(next_page, mtr)
- == buf_frame_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-
- btr_page_set_prev(next_page, prev_page_no, mtr);
- }
-}
-
-/********************************************************************
-Writes the redo log record for setting an index record as the predefined
-minimum record. */
-UNIV_INLINE
-void
-btr_set_min_rec_mark_log(
-/*=====================*/
- rec_t* rec, /* in: record */
- ulint comp, /* nonzero=compact record format */
- mtr_t* mtr) /* in: mtr */
-{
- mlog_write_initial_log_record(
- rec, comp ? MLOG_COMP_REC_MIN_MARK : MLOG_REC_MIN_MARK, mtr);
-
- /* Write rec offset as a 2-byte ulint */
- mlog_catenate_ulint(mtr, page_offset(rec), MLOG_2BYTES);
-}
-
-/********************************************************************
-Parses the redo log record for setting an index record as the predefined
-minimum record. */
-
-byte*
-btr_parse_set_min_rec_mark(
-/*=======================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- ulint comp, /* in: nonzero=compact page format */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr) /* in: mtr or NULL */
-{
- rec_t* rec;
-
- if (end_ptr < ptr + 2) {
-
- return(NULL);
- }
-
- if (page) {
- ut_a(!page_is_comp(page) == !comp);
-
- rec = page + mach_read_from_2(ptr);
-
- btr_set_min_rec_mark(rec, comp, mtr);
- }
-
- return(ptr + 2);
-}
-
-/********************************************************************
-Sets a record as the predefined minimum record. */
-
-void
-btr_set_min_rec_mark(
-/*=================*/
- rec_t* rec, /* in: record */
- ulint comp, /* in: nonzero=compact page format */
- mtr_t* mtr) /* in: mtr */
-{
- ulint info_bits;
-
- info_bits = rec_get_info_bits(rec, comp);
-
- rec_set_info_bits(rec, comp, info_bits | REC_INFO_MIN_REC_FLAG);
-
- btr_set_min_rec_mark_log(rec, comp, mtr);
-}
-
-/*****************************************************************
-Deletes on the upper level the node pointer to a page. */
-
-void
-btr_node_ptr_delete(
-/*================*/
- dict_index_t* index, /* in: index tree */
- page_t* page, /* in: page whose node pointer is deleted */
- mtr_t* mtr) /* in: mtr */
-{
- rec_t* node_ptr;
- btr_cur_t cursor;
- ibool compressed;
- ulint err;
-
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- /* Delete node pointer on father page */
-
- node_ptr = btr_page_get_father_node_ptr(index, page, mtr);
-
- btr_cur_position(index, node_ptr, &cursor);
- compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor, FALSE,
- mtr);
- ut_a(err == DB_SUCCESS);
-
- if (!compressed) {
- btr_cur_compress_if_useful(&cursor, mtr);
- }
-}
-
-/*****************************************************************
-If page is the only on its level, this function moves its records to the
-father page, thus reducing the tree height. */
-static
-void
-btr_lift_page_up(
-/*=============*/
- dict_index_t* index, /* in: index tree */
- page_t* page, /* in: page which is the only on its level;
- must not be empty: use
- btr_discard_only_page_on_level if the last
- record from the page should be removed */
- mtr_t* mtr) /* in: mtr */
-{
- page_t* father_page;
- page_t* iter_page;
- page_t* pages[BTR_MAX_LEVELS];
- ulint page_level;
- ulint root_page_no;
- ulint ancestors;
- ulint i;
-
- ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
- ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- father_page = buf_frame_align(
- btr_page_get_father_node_ptr(index, page, mtr));
-
- page_level = btr_page_get_level(page, mtr);
- root_page_no = dict_index_get_page(index);
-
- ancestors = 1;
- pages[0] = father_page;
-
- /* Store all ancestor pages so we can reset their levels later on.
- We have to do all the searches on the tree now because later on,
- after we've replaced the first level, the tree is in an inconsistent
- state and can not be searched. */
- iter_page = father_page;
- for (;;) {
- if (buf_block_get_page_no(buf_block_align(iter_page))
- == root_page_no) {
-
- break;
- }
-
- ut_a(ancestors < BTR_MAX_LEVELS);
-
- iter_page = buf_frame_align(
- btr_page_get_father_node_ptr(index, iter_page, mtr));
-
- pages[ancestors++] = iter_page;
- }
-
- btr_search_drop_page_hash_index(page);
-
- /* Make the father empty */
- btr_page_empty(father_page, mtr);
-
- /* Move records to the father */
- page_copy_rec_list_end(father_page, page, page_get_infimum_rec(page),
- index, mtr);
- lock_update_copy_and_discard(father_page, page);
-
- /* Go upward to root page, decreasing levels by one. */
- for (i = 0; i < ancestors; i++) {
- iter_page = pages[i];
-
- ut_ad(btr_page_get_level(iter_page, mtr) == (page_level + 1));
-
- btr_page_set_level(iter_page, page_level, mtr);
- page_level++;
- }
-
- /* Free the file page */
- btr_page_free(index, page, mtr);
-
- /* We play safe and reset the free bits for the father */
- ibuf_reset_free_bits(index, father_page);
- ut_ad(page_validate(father_page, index));
- ut_ad(btr_check_node_ptr(index, father_page, mtr));
-}
-
-/*****************************************************************
-Tries to merge the page first to the left immediate brother if such a
-brother exists, and the node pointers to the current page and to the brother
-reside on the same page. If the left brother does not satisfy these
-conditions, looks at the right brother. If the page is the only one on that
-level lifts the records of the page to the father page, thus reducing the
-tree height. It is assumed that mtr holds an x-latch on the tree and on the
-page. If cursor is on the leaf level, mtr must also hold x-latches to the
-brothers, if they exist. NOTE: it is assumed that the caller has reserved
-enough free extents so that the compression will always succeed if done! */
-
-void
-btr_compress(
-/*=========*/
- btr_cur_t* cursor, /* in: cursor on the page to merge or lift;
- the page must not be empty: in record delete
- use btr_discard_page if the page would become
- empty */
- mtr_t* mtr) /* in: mtr */
-{
- dict_index_t* index;
- ulint space;
- ulint left_page_no;
- ulint right_page_no;
- page_t* merge_page;
- page_t* father_page;
- ibool is_left;
- page_t* page;
- rec_t* orig_pred;
- rec_t* orig_succ;
- rec_t* node_ptr;
- ulint data_size;
- ulint n_recs;
- ulint max_ins_size;
- ulint max_ins_size_reorg;
- ulint level;
- ulint comp;
-
- page = btr_cur_get_page(cursor);
- index = btr_cur_get_index(cursor);
- comp = page_is_comp(page);
- ut_a((ibool)!!comp == dict_table_is_comp(index->table));
-
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- level = btr_page_get_level(page, mtr);
- space = dict_index_get_space(index);
-
- left_page_no = btr_page_get_prev(page, mtr);
- right_page_no = btr_page_get_next(page, mtr);
-
-#if 0
- fprintf(stderr, "Merge left page %lu right %lu \n",
- left_page_no, right_page_no);
-#endif
-
- node_ptr = btr_page_get_father_node_ptr(index, page, mtr);
- ut_ad(!comp || rec_get_status(node_ptr) == REC_STATUS_NODE_PTR);
- father_page = buf_frame_align(node_ptr);
- ut_a(comp == page_is_comp(father_page));
-
- /* Decide the page to which we try to merge and which will inherit
- the locks */
-
- is_left = left_page_no != FIL_NULL;
-
- if (is_left) {
-
- merge_page = btr_page_get(space, left_page_no, RW_X_LATCH,
- mtr);
-#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_next(merge_page, mtr)
- == buf_frame_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
- } else if (right_page_no != FIL_NULL) {
-
- merge_page = btr_page_get(space, right_page_no, RW_X_LATCH,
- mtr);
-#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_prev(merge_page, mtr)
- == buf_frame_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
- } else {
- /* The page is the only one on the level, lift the records
- to the father */
- btr_lift_page_up(index, page, mtr);
-
- return;
- }
-
- n_recs = page_get_n_recs(page);
- data_size = page_get_data_size(page);
- ut_a(page_is_comp(merge_page) == comp);
-
- max_ins_size_reorg = page_get_max_insert_size_after_reorganize(
- merge_page, n_recs);
- if (data_size > max_ins_size_reorg) {
-
- /* No space for merge */
-
- return;
- }
-
- ut_ad(page_validate(merge_page, index));
-
- max_ins_size = page_get_max_insert_size(merge_page, n_recs);
-
- if (data_size > max_ins_size) {
-
- /* We have to reorganize merge_page */
-
- btr_page_reorganize(merge_page, index, mtr);
-
- max_ins_size = page_get_max_insert_size(merge_page, n_recs);
-
- ut_ad(page_validate(merge_page, index));
- ut_ad(page_get_max_insert_size(merge_page, n_recs)
- == max_ins_size_reorg);
- }
-
- if (data_size > max_ins_size) {
-
- /* Add fault tolerance, though this should never happen */
-
- return;
- }
-
- btr_search_drop_page_hash_index(page);
-
- /* Remove the page from the level list */
- btr_level_list_remove(page, mtr);
-
- if (is_left) {
- btr_node_ptr_delete(index, page, mtr);
- } else {
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
- /* Replace the address of the old child node (= page) with the
- address of the merge page to the right */
-
- btr_node_ptr_set_child_page_no(node_ptr,
- rec_get_offsets(
- node_ptr, index,
- offsets_,
- ULINT_UNDEFINED,
- &heap),
- right_page_no, mtr);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- btr_node_ptr_delete(index, merge_page, mtr);
- }
-
- /* Move records to the merge page */
- if (is_left) {
- orig_pred = page_rec_get_prev(
- page_get_supremum_rec(merge_page));
- page_copy_rec_list_start(merge_page, page,
- page_get_supremum_rec(page),
- index, mtr);
-
- lock_update_merge_left(merge_page, orig_pred, page);
- } else {
- orig_succ = page_rec_get_next(
- page_get_infimum_rec(merge_page));
- page_copy_rec_list_end(merge_page, page,
- page_get_infimum_rec(page),
- index, mtr);
-
- lock_update_merge_right(orig_succ, page);
- }
-
- /* We have added new records to merge_page: update its free bits */
- ibuf_update_free_bits_if_full(index, merge_page,
- UNIV_PAGE_SIZE, ULINT_UNDEFINED);
-
- ut_ad(page_validate(merge_page, index));
-
- /* Free the file page */
- btr_page_free(index, page, mtr);
-
- ut_ad(btr_check_node_ptr(index, merge_page, mtr));
-}
-
-/*****************************************************************
-Discards a page that is the only page on its level. */
-static
-void
-btr_discard_only_page_on_level(
-/*===========================*/
- dict_index_t* index, /* in: index tree */
- page_t* page, /* in: page which is the only on its level */
- mtr_t* mtr) /* in: mtr */
-{
- rec_t* node_ptr;
- page_t* father_page;
- ulint page_level;
-
- ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
- ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- btr_search_drop_page_hash_index(page);
-
- node_ptr = btr_page_get_father_node_ptr(index, page, mtr);
- father_page = buf_frame_align(node_ptr);
-
- page_level = btr_page_get_level(page, mtr);
-
- lock_update_discard(page_get_supremum_rec(father_page), page);
-
- btr_page_set_level(father_page, page_level, mtr);
-
- /* Free the file page */
- btr_page_free(index, page, mtr);
-
- if (buf_frame_get_page_no(father_page) == dict_index_get_page(index)) {
- /* The father is the root page */
-
- btr_page_empty(father_page, mtr);
-
- /* We play safe and reset the free bits for the father */
- ibuf_reset_free_bits(index, father_page);
- } else {
- ut_ad(page_get_n_recs(father_page) == 1);
-
- btr_discard_only_page_on_level(index, father_page, mtr);
- }
-}
-
-/*****************************************************************
-Discards a page from a B-tree. This is used to remove the last record from
-a B-tree page: the whole page must be removed at the same time. This cannot
-be used for the root page, which is allowed to be empty. */
-
-void
-btr_discard_page(
-/*=============*/
- btr_cur_t* cursor, /* in: cursor on the page to discard: not on
- the root page */
- mtr_t* mtr) /* in: mtr */
-{
- dict_index_t* index;
- ulint space;
- ulint left_page_no;
- ulint right_page_no;
- page_t* merge_page;
- page_t* page;
- rec_t* node_ptr;
-
- page = btr_cur_get_page(cursor);
- index = btr_cur_get_index(cursor);
-
- ut_ad(dict_index_get_page(index) != buf_frame_get_page_no(page));
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- space = dict_index_get_space(index);
-
- /* Decide the page which will inherit the locks */
-
- left_page_no = btr_page_get_prev(page, mtr);
- right_page_no = btr_page_get_next(page, mtr);
-
- if (left_page_no != FIL_NULL) {
- merge_page = btr_page_get(space, left_page_no, RW_X_LATCH,
- mtr);
-#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_next(merge_page, mtr)
- == buf_frame_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
- } else if (right_page_no != FIL_NULL) {
- merge_page = btr_page_get(space, right_page_no, RW_X_LATCH,
- mtr);
-#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_prev(merge_page, mtr)
- == buf_frame_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
- } else {
- btr_discard_only_page_on_level(index, page, mtr);
-
- return;
- }
-
- ut_a(page_is_comp(merge_page) == page_is_comp(page));
- btr_search_drop_page_hash_index(page);
-
- if (left_page_no == FIL_NULL && btr_page_get_level(page, mtr) > 0) {
-
- /* We have to mark the leftmost node pointer on the right
- side page as the predefined minimum record */
-
- node_ptr = page_rec_get_next(page_get_infimum_rec(merge_page));
-
- ut_ad(page_rec_is_user_rec(node_ptr));
-
- btr_set_min_rec_mark(node_ptr, page_is_comp(merge_page), mtr);
- }
-
- btr_node_ptr_delete(index, page, mtr);
-
- /* Remove the page from the level list */
- btr_level_list_remove(page, mtr);
-
- if (left_page_no != FIL_NULL) {
- lock_update_discard(page_get_supremum_rec(merge_page), page);
- } else {
- lock_update_discard(page_rec_get_next(
- page_get_infimum_rec(merge_page)),
- page);
- }
-
- /* Free the file page */
- btr_page_free(index, page, mtr);
-
- ut_ad(btr_check_node_ptr(index, merge_page, mtr));
-}
-
-#ifdef UNIV_BTR_PRINT
-/*****************************************************************
-Prints size info of a B-tree. */
-
-void
-btr_print_size(
-/*===========*/
- dict_index_t* index) /* in: index tree */
-{
- page_t* root;
- fseg_header_t* seg;
- mtr_t mtr;
-
- if (index->type & DICT_IBUF) {
- fputs("Sorry, cannot print info of an ibuf tree:"
- " use ibuf functions\n", stderr);
-
- return;
- }
-
- mtr_start(&mtr);
-
- root = btr_root_get(index, &mtr);
-
- seg = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
-
- fputs("INFO OF THE NON-LEAF PAGE SEGMENT\n", stderr);
- fseg_print(seg, &mtr);
-
- if (!(index->type & DICT_UNIVERSAL)) {
-
- seg = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
-
- fputs("INFO OF THE LEAF PAGE SEGMENT\n", stderr);
- fseg_print(seg, &mtr);
- }
-
- mtr_commit(&mtr);
-}
-
-/****************************************************************
-Prints recursively index tree pages. */
-static
-void
-btr_print_recursive(
-/*================*/
- dict_index_t* index, /* in: index tree */
- page_t* page, /* in: index page */
- ulint width, /* in: print this many entries from start
- and end */
- mem_heap_t** heap, /* in/out: heap for rec_get_offsets() */
- ulint** offsets,/* in/out: buffer for rec_get_offsets() */
- mtr_t* mtr) /* in: mtr */
-{
- page_cur_t cursor;
- ulint n_recs;
- ulint i = 0;
- mtr_t mtr2;
- rec_t* node_ptr;
- page_t* child;
-
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- fprintf(stderr, "NODE ON LEVEL %lu page number %lu\n",
- (ulong) btr_page_get_level(page, mtr),
- (ulong) buf_frame_get_page_no(page));
-
- page_print(page, index, width, width);
-
- n_recs = page_get_n_recs(page);
-
- page_cur_set_before_first(page, &cursor);
- page_cur_move_to_next(&cursor);
-
- while (!page_cur_is_after_last(&cursor)) {
-
- if (0 == btr_page_get_level(page, mtr)) {
-
- /* If this is the leaf level, do nothing */
-
- } else if ((i <= width) || (i >= n_recs - width)) {
-
- mtr_start(&mtr2);
-
- node_ptr = page_cur_get_rec(&cursor);
-
- *offsets = rec_get_offsets(node_ptr, index, *offsets,
- ULINT_UNDEFINED, heap);
- child = btr_node_ptr_get_child(node_ptr,
- *offsets, &mtr2);
- btr_print_recursive(index, child, width,
- heap, offsets, &mtr2);
- mtr_commit(&mtr2);
- }
-
- page_cur_move_to_next(&cursor);
- i++;
- }
-}
-
-/******************************************************************
-Prints directories and other info of all nodes in the tree. */
-
-void
-btr_print_index(
-/*============*/
- dict_index_t* index, /* in: index */
- ulint width) /* in: print this many entries from start
- and end */
-{
- mtr_t mtr;
- page_t* root;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- fputs("--------------------------\n"
- "INDEX TREE PRINT\n", stderr);
-
- mtr_start(&mtr);
-
- root = btr_root_get(index, &mtr);
-
- btr_print_recursive(index, root, width, &heap, &offsets, &mtr);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- mtr_commit(&mtr);
-
- btr_validate_index(index, NULL);
-}
-#endif /* UNIV_BTR_PRINT */
-
-#ifdef UNIV_DEBUG
-/****************************************************************
-Checks that the node pointer to a page is appropriate. */
-
-ibool
-btr_check_node_ptr(
-/*===============*/
- /* out: TRUE */
- dict_index_t* index, /* in: index tree */
- page_t* page, /* in: index page */
- mtr_t* mtr) /* in: mtr */
-{
- mem_heap_t* heap;
- rec_t* node_ptr;
- dtuple_t* node_ptr_tuple;
-
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- if (dict_index_get_page(index) == buf_frame_get_page_no(page)) {
-
- return(TRUE);
- }
-
- node_ptr = btr_page_get_father_node_ptr(index, page, mtr);
-
- if (btr_page_get_level(page, mtr) == 0) {
-
- return(TRUE);
- }
-
- heap = mem_heap_create(256);
-
- node_ptr_tuple = dict_index_build_node_ptr(
- index, page_rec_get_next(page_get_infimum_rec(page)), 0, heap,
- btr_page_get_level(page, mtr));
-
- ut_a(!cmp_dtuple_rec(node_ptr_tuple, node_ptr,
- rec_get_offsets(node_ptr, index,
- NULL, ULINT_UNDEFINED, &heap)));
-
- mem_heap_free(heap);
-
- return(TRUE);
-}
-#endif /* UNIV_DEBUG */
-
-/****************************************************************
-Display identification information for a record. */
-static
-void
-btr_index_rec_validate_report(
-/*==========================*/
- page_t* page, /* in: index page */
- rec_t* rec, /* in: index record */
- dict_index_t* index) /* in: index */
-{
- fputs("InnoDB: Record in ", stderr);
- dict_index_name_print(stderr, NULL, index);
- fprintf(stderr, ", page %lu, at offset %lu\n",
- buf_frame_get_page_no(page), (ulint)(rec - page));
-}
-
-/****************************************************************
-Checks the size and number of fields in a record based on the definition of
-the index. */
-
-ibool
-btr_index_rec_validate(
-/*===================*/
- /* out: TRUE if ok */
- rec_t* rec, /* in: index record */
- dict_index_t* index, /* in: index */
- ibool dump_on_error) /* in: TRUE if the function
- should print hex dump of record
- and page on error */
-{
- ulint len;
- ulint n;
- ulint i;
- page_t* page;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- page = buf_frame_align(rec);
-
- if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) {
- /* The insert buffer index tree can contain records from any
- other index: we cannot check the number of fields or
- their length */
-
- return(TRUE);
- }
-
- if (UNIV_UNLIKELY((ibool)!!page_is_comp(page)
- != dict_table_is_comp(index->table))) {
- btr_index_rec_validate_report(page, rec, index);
- fprintf(stderr, "InnoDB: compact flag=%lu, should be %lu\n",
- (ulong) !!page_is_comp(page),
- (ulong) dict_table_is_comp(index->table));
-
- return(FALSE);
- }
-
- n = dict_index_get_n_fields(index);
-
- if (!page_is_comp(page)
- && UNIV_UNLIKELY(rec_get_n_fields_old(rec) != n)) {
- btr_index_rec_validate_report(page, rec, index);
- fprintf(stderr, "InnoDB: has %lu fields, should have %lu\n",
- (ulong) rec_get_n_fields_old(rec), (ulong) n);
-
- if (dump_on_error) {
- buf_page_print(page);
-
- fputs("InnoDB: corrupt record ", stderr);
- rec_print_old(stderr, rec);
- putc('\n', stderr);
- }
- return(FALSE);
- }
-
- offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
-
- for (i = 0; i < n; i++) {
- ulint fixed_size = dict_col_get_fixed_size(
- dict_index_get_nth_col(index, i));
-
- rec_get_nth_field(rec, offsets, i, &len);
-
- /* Note that if fixed_size != 0, it equals the
- length of a fixed-size column in the clustered index.
- A prefix index of the column is of fixed, but different
- length. When fixed_size == 0, prefix_len is the maximum
- length of the prefix index column. */
-
- if ((dict_index_get_nth_field(index, i)->prefix_len == 0
- && len != UNIV_SQL_NULL && fixed_size
- && len != fixed_size)
- || (dict_index_get_nth_field(index, i)->prefix_len > 0
- && len != UNIV_SQL_NULL
- && len
- > dict_index_get_nth_field(index, i)->prefix_len)) {
-
- btr_index_rec_validate_report(page, rec, index);
- fprintf(stderr,
- "InnoDB: field %lu len is %lu,"
- " should be %lu\n",
- (ulong) i, (ulong) len, (ulong) fixed_size);
-
- if (dump_on_error) {
- buf_page_print(page);
-
- fputs("InnoDB: corrupt record ", stderr);
- rec_print_new(stderr, rec, offsets);
- putc('\n', stderr);
- }
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(FALSE);
- }
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(TRUE);
-}
-
-/****************************************************************
-Checks the size and number of fields in records based on the definition of
-the index. */
-static
-ibool
-btr_index_page_validate(
-/*====================*/
- /* out: TRUE if ok */
- page_t* page, /* in: index page */
- dict_index_t* index) /* in: index */
-{
- page_cur_t cur;
- ibool ret = TRUE;
-
- page_cur_set_before_first(page, &cur);
- page_cur_move_to_next(&cur);
-
- for (;;) {
- if (page_cur_is_after_last(&cur)) {
-
- break;
- }
-
- if (!btr_index_rec_validate(cur.rec, index, TRUE)) {
-
- return(FALSE);
- }
-
- page_cur_move_to_next(&cur);
- }
-
- return(ret);
-}
-
-/****************************************************************
-Report an error on one page of an index tree. */
-static
-void
-btr_validate_report1(
-/*=================*/
- /* out: TRUE if ok */
- dict_index_t* index, /* in: index */
- ulint level, /* in: B-tree level */
- page_t* page) /* in: index page */
-{
- fprintf(stderr, "InnoDB: Error in page %lu of ",
- buf_frame_get_page_no(page));
- dict_index_name_print(stderr, NULL, index);
- if (level) {
- fprintf(stderr, ", index tree level %lu", level);
- }
- putc('\n', stderr);
-}
-
-/****************************************************************
-Report an error on two pages of an index tree. */
-static
-void
-btr_validate_report2(
-/*=================*/
- /* out: TRUE if ok */
- dict_index_t* index, /* in: index */
- ulint level, /* in: B-tree level */
- page_t* page1, /* in: first index page */
- page_t* page2) /* in: second index page */
-{
- fprintf(stderr, "InnoDB: Error in pages %lu and %lu of ",
- buf_frame_get_page_no(page1),
- buf_frame_get_page_no(page2));
- dict_index_name_print(stderr, NULL, index);
- if (level) {
- fprintf(stderr, ", index tree level %lu", level);
- }
- putc('\n', stderr);
-}
-
-/****************************************************************
-Validates index tree level. */
-static
-ibool
-btr_validate_level(
-/*===============*/
- /* out: TRUE if ok */
- dict_index_t* index, /* in: index tree */
- trx_t* trx, /* in: transaction or NULL */
- ulint level) /* in: level number */
-{
- ulint space;
- page_t* page;
- page_t* right_page = 0; /* remove warning */
- page_t* father_page;
- page_t* right_father_page;
- rec_t* node_ptr;
- rec_t* right_node_ptr;
- rec_t* rec;
- ulint right_page_no;
- ulint left_page_no;
- page_cur_t cursor;
- dtuple_t* node_ptr_tuple;
- ibool ret = TRUE;
- mtr_t mtr;
- mem_heap_t* heap = mem_heap_create(256);
- ulint* offsets = NULL;
- ulint* offsets2= NULL;
-
- mtr_start(&mtr);
-
- mtr_x_lock(dict_index_get_lock(index), &mtr);
-
- page = btr_root_get(index, &mtr);
-
- space = buf_frame_get_space_id(page);
-
- while (level != btr_page_get_level(page, &mtr)) {
-
- ut_a(btr_page_get_level(page, &mtr) > 0);
-
- page_cur_set_before_first(page, &cursor);
- page_cur_move_to_next(&cursor);
-
- node_ptr = page_cur_get_rec(&cursor);
- offsets = rec_get_offsets(node_ptr, index, offsets,
- ULINT_UNDEFINED, &heap);
- page = btr_node_ptr_get_child(node_ptr, offsets, &mtr);
- }
-
- /* Now we are on the desired level. Loop through the pages on that
- level. */
-loop:
- if (trx_is_interrupted(trx)) {
- mtr_commit(&mtr);
- mem_heap_free(heap);
- return(ret);
- }
- mem_heap_empty(heap);
- offsets = offsets2 = NULL;
- mtr_x_lock(dict_index_get_lock(index), &mtr);
-
- /* Check ordering etc. of records */
-
- if (!page_validate(page, index)) {
- btr_validate_report1(index, level, page);
-
- ret = FALSE;
- } else if (level == 0) {
- /* We are on level 0. Check that the records have the right
- number of fields, and field lengths are right. */
-
- if (!btr_index_page_validate(page, index)) {
-
- ret = FALSE;
- }
- }
-
- ut_a(btr_page_get_level(page, &mtr) == level);
-
- right_page_no = btr_page_get_next(page, &mtr);
- left_page_no = btr_page_get_prev(page, &mtr);
-
- ut_a((page_get_n_recs(page) > 0)
- || ((level == 0)
- && (buf_frame_get_page_no(page)
- == dict_index_get_page(index))));
-
- if (right_page_no != FIL_NULL) {
- rec_t* right_rec;
- right_page = btr_page_get(space, right_page_no, RW_X_LATCH,
- &mtr);
- if (UNIV_UNLIKELY(btr_page_get_prev(right_page, &mtr)
- != buf_frame_get_page_no(page))) {
- btr_validate_report2(index, level, page, right_page);
- fputs("InnoDB: broken FIL_PAGE_NEXT"
- " or FIL_PAGE_PREV links\n", stderr);
- buf_page_print(page);
- buf_page_print(right_page);
-
- ret = FALSE;
- }
-
- if (UNIV_UNLIKELY(page_is_comp(right_page)
- != page_is_comp(page))) {
- btr_validate_report2(index, level, page, right_page);
- fputs("InnoDB: 'compact' flag mismatch\n", stderr);
- buf_page_print(page);
- buf_page_print(right_page);
-
- ret = FALSE;
-
- goto node_ptr_fails;
- }
-
- rec = page_rec_get_prev(page_get_supremum_rec(page));
- right_rec = page_rec_get_next(page_get_infimum_rec(
- right_page));
- offsets = rec_get_offsets(rec, index,
- offsets, ULINT_UNDEFINED, &heap);
- offsets2 = rec_get_offsets(right_rec, index,
- offsets2, ULINT_UNDEFINED, &heap);
- if (UNIV_UNLIKELY(cmp_rec_rec(rec, right_rec,
- offsets, offsets2,
- index) >= 0)) {
-
- btr_validate_report2(index, level, page, right_page);
-
- fputs("InnoDB: records in wrong order"
- " on adjacent pages\n", stderr);
-
- buf_page_print(page);
- buf_page_print(right_page);
-
- fputs("InnoDB: record ", stderr);
- rec = page_rec_get_prev(page_get_supremum_rec(page));
- rec_print(stderr, rec, index);
- putc('\n', stderr);
- fputs("InnoDB: record ", stderr);
- rec = page_rec_get_next(
- page_get_infimum_rec(right_page));
- rec_print(stderr, rec, index);
- putc('\n', stderr);
-
- ret = FALSE;
- }
- }
-
- if (level > 0 && left_page_no == FIL_NULL) {
- ut_a(REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
- page_rec_get_next(page_get_infimum_rec(page)),
- page_is_comp(page)));
- }
-
- if (buf_frame_get_page_no(page) != dict_index_get_page(index)) {
-
- /* Check father node pointers */
-
- node_ptr = btr_page_get_father_node_ptr(index, page, &mtr);
- father_page = buf_frame_align(node_ptr);
- offsets = rec_get_offsets(node_ptr, index,
- offsets, ULINT_UNDEFINED, &heap);
-
- if (btr_node_ptr_get_child_page_no(node_ptr, offsets)
- != buf_frame_get_page_no(page)
- || node_ptr != btr_page_get_father_for_rec(
- index, page,
- page_rec_get_prev(page_get_supremum_rec(page)),
- &mtr)) {
- btr_validate_report1(index, level, page);
-
- fputs("InnoDB: node pointer to the page is wrong\n",
- stderr);
-
- buf_page_print(father_page);
- buf_page_print(page);
-
- fputs("InnoDB: node ptr ", stderr);
- rec_print_new(stderr, node_ptr, offsets);
-
- fprintf(stderr, "\n"
- "InnoDB: node ptr child page n:o %lu\n",
- (unsigned long) btr_node_ptr_get_child_page_no
- (node_ptr, offsets));
-
- fputs("InnoDB: record on page ", stderr);
- rec = btr_page_get_father_for_rec(
- index, page,
- page_rec_get_prev(page_get_supremum_rec(page)),
- &mtr);
- rec_print(stderr, rec, index);
- putc('\n', stderr);
- ret = FALSE;
-
- goto node_ptr_fails;
- }
-
- if (btr_page_get_level(page, &mtr) > 0) {
- offsets = rec_get_offsets(node_ptr, index,
- offsets, ULINT_UNDEFINED,
- &heap);
-
- node_ptr_tuple = dict_index_build_node_ptr(
- index,
- page_rec_get_next(page_get_infimum_rec(page)),
- 0, heap, btr_page_get_level(page, &mtr));
-
- if (cmp_dtuple_rec(node_ptr_tuple, node_ptr,
- offsets)) {
- rec_t* first_rec = page_rec_get_next(
- page_get_infimum_rec(page));
-
- btr_validate_report1(index, level, page);
-
- buf_page_print(father_page);
- buf_page_print(page);
-
- fputs("InnoDB: Error: node ptrs differ"
- " on levels > 0\n"
- "InnoDB: node ptr ", stderr);
- rec_print_new(stderr, node_ptr, offsets);
- fputs("InnoDB: first rec ", stderr);
- rec_print(stderr, first_rec, index);
- putc('\n', stderr);
- ret = FALSE;
-
- goto node_ptr_fails;
- }
- }
-
- if (left_page_no == FIL_NULL) {
- ut_a(node_ptr == page_rec_get_next(
- page_get_infimum_rec(father_page)));
- ut_a(btr_page_get_prev(father_page, &mtr) == FIL_NULL);
- }
-
- if (right_page_no == FIL_NULL) {
- ut_a(node_ptr == page_rec_get_prev(
- page_get_supremum_rec(father_page)));
- ut_a(btr_page_get_next(father_page, &mtr) == FIL_NULL);
- } else {
- right_node_ptr = btr_page_get_father_node_ptr(
- index, right_page, &mtr);
- if (page_rec_get_next(node_ptr)
- != page_get_supremum_rec(father_page)) {
-
- if (right_node_ptr
- != page_rec_get_next(node_ptr)) {
- ret = FALSE;
- fputs("InnoDB: node pointer to"
- " the right page is wrong\n",
- stderr);
-
- btr_validate_report1(index, level,
- page);
-
- buf_page_print(father_page);
- buf_page_print(page);
- buf_page_print(right_page);
- }
- } else {
- right_father_page = buf_frame_align(
- right_node_ptr);
-
- if (right_node_ptr != page_rec_get_next(
- page_get_infimum_rec(
- right_father_page))) {
- ret = FALSE;
- fputs("InnoDB: node pointer 2 to"
- " the right page is wrong\n",
- stderr);
-
- btr_validate_report1(index, level,
- page);
-
- buf_page_print(father_page);
- buf_page_print(right_father_page);
- buf_page_print(page);
- buf_page_print(right_page);
- }
-
- if (buf_frame_get_page_no(right_father_page)
- != btr_page_get_next(father_page, &mtr)) {
-
- ret = FALSE;
- fputs("InnoDB: node pointer 3 to"
- " the right page is wrong\n",
- stderr);
-
- btr_validate_report1(index, level,
- page);
-
- buf_page_print(father_page);
- buf_page_print(right_father_page);
- buf_page_print(page);
- buf_page_print(right_page);
- }
- }
- }
- }
-
-node_ptr_fails:
- /* Commit the mini-transaction to release the latch on 'page'.
- Re-acquire the latch on right_page, which will become 'page'
- on the next loop. The page has already been checked. */
- mtr_commit(&mtr);
-
- if (right_page_no != FIL_NULL) {
- mtr_start(&mtr);
-
- page = btr_page_get(space, right_page_no, RW_X_LATCH, &mtr);
-
- goto loop;
- }
-
- mem_heap_free(heap);
- return(ret);
-}
-
-/******************************************************************
-Checks the consistency of an index tree. */
-
-ibool
-btr_validate_index(
-/*===============*/
- /* out: TRUE if ok */
- dict_index_t* index, /* in: index */
- trx_t* trx) /* in: transaction or NULL */
-{
- mtr_t mtr;
- page_t* root;
- ulint i;
- ulint n;
-
- mtr_start(&mtr);
- mtr_x_lock(dict_index_get_lock(index), &mtr);
-
- root = btr_root_get(index, &mtr);
- n = btr_page_get_level(root, &mtr);
-
- for (i = 0; i <= n && !trx_is_interrupted(trx); i++) {
- if (!btr_validate_level(index, trx, n - i)) {
-
- mtr_commit(&mtr);
-
- return(FALSE);
- }
- }
-
- mtr_commit(&mtr);
-
- return(TRUE);
-}
diff --git a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c
deleted file mode 100644
index a2f62255dd6..00000000000
--- a/storage/innobase/btr/btr0cur.c
+++ /dev/null
@@ -1,3848 +0,0 @@
-/******************************************************
-The index tree cursor
-
-All changes that row operations make to a B-tree or the records
-there must go through this module! Undo log records are written here
-of every modify or insert of a clustered index record.
-
- NOTE!!!
-To make sure we do not run out of disk space during a pessimistic
-insert or update, we have to reserve 2 x the height of the index tree
-many pages in the tablespace before we start the operation, because
-if leaf splitting has been started, it is difficult to undo, except
-by crashing the database and doing a roll-forward.
-
-(c) 1994-2001 Innobase Oy
-
-Created 10/16/1994 Heikki Tuuri
-*******************************************************/
-
-#include "btr0cur.h"
-
-#ifdef UNIV_NONINL
-#include "btr0cur.ic"
-#endif
-
-#include "page0page.h"
-#include "rem0rec.h"
-#include "rem0cmp.h"
-#include "btr0btr.h"
-#include "btr0sea.h"
-#include "row0upd.h"
-#include "trx0rec.h"
-#include "que0que.h"
-#include "row0row.h"
-#include "srv0srv.h"
-#include "ibuf0ibuf.h"
-#include "lock0lock.h"
-
-#ifdef UNIV_DEBUG
-/* If the following is set to TRUE, this module prints a lot of
-trace information of individual record operations */
-ibool btr_cur_print_record_ops = FALSE;
-#endif /* UNIV_DEBUG */
-
-ulint btr_cur_n_non_sea = 0;
-ulint btr_cur_n_sea = 0;
-ulint btr_cur_n_non_sea_old = 0;
-ulint btr_cur_n_sea_old = 0;
-
-/* In the optimistic insert, if the insert does not fit, but this much space
-can be released by page reorganize, then it is reorganized */
-
-#define BTR_CUR_PAGE_REORGANIZE_LIMIT (UNIV_PAGE_SIZE / 32)
-
-/* When estimating number of different key values in an index, sample
-this many index pages */
-#define BTR_KEY_VAL_ESTIMATE_N_PAGES 8
-
-/* The structure of a BLOB part header */
-/*--------------------------------------*/
-#define BTR_BLOB_HDR_PART_LEN 0 /* BLOB part len on this
- page */
-#define BTR_BLOB_HDR_NEXT_PAGE_NO 4 /* next BLOB part page no,
- FIL_NULL if none */
-/*--------------------------------------*/
-#define BTR_BLOB_HDR_SIZE 8
-
-/***********************************************************************
-Marks all extern fields in a record as owned by the record. This function
-should be called if the delete mark of a record is removed: a not delete
-marked record always owns all its extern fields. */
-static
-void
-btr_cur_unmark_extern_fields(
-/*=========================*/
- rec_t* rec, /* in: record in a clustered index */
- mtr_t* mtr, /* in: mtr */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-/***********************************************************************
-Adds path information to the cursor for the current page, for which
-the binary search has been performed. */
-static
-void
-btr_cur_add_path_info(
-/*==================*/
- btr_cur_t* cursor, /* in: cursor positioned on a page */
- ulint height, /* in: height of the page in tree;
- 0 means leaf node */
- ulint root_height); /* in: root node height in tree */
-/***************************************************************
-Frees the externally stored fields for a record, if the field is mentioned
-in the update vector. */
-static
-void
-btr_rec_free_updated_extern_fields(
-/*===============================*/
- dict_index_t* index, /* in: index of rec; the index tree MUST be
- X-latched */
- rec_t* rec, /* in: record */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- upd_t* update, /* in: update vector */
- ibool do_not_free_inherited,/* in: TRUE if called in a
- rollback and we do not want to free
- inherited fields */
- mtr_t* mtr); /* in: mini-transaction handle which contains
- an X-latch to record page and to the tree */
-/***************************************************************
-Gets the externally stored size of a record, in units of a database page. */
-static
-ulint
-btr_rec_get_externally_stored_len(
-/*==============================*/
- /* out: externally stored part,
- in units of a database page */
- rec_t* rec, /* in: record */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-
-/*==================== B-TREE SEARCH =========================*/
-
-/************************************************************************
-Latches the leaf page or pages requested. */
-static
-void
-btr_cur_latch_leaves(
-/*=================*/
- page_t* page, /* in: leaf page where the search
- converged */
- ulint space, /* in: space id */
- ulint page_no, /* in: page number of the leaf */
- ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
- btr_cur_t* cursor, /* in: cursor */
- mtr_t* mtr) /* in: mtr */
-{
- ulint left_page_no;
- ulint right_page_no;
- page_t* get_page;
-
- ut_ad(page && mtr);
-
- if (latch_mode == BTR_SEARCH_LEAF) {
-
- get_page = btr_page_get(space, page_no, RW_S_LATCH, mtr);
- ut_a(page_is_comp(get_page) == page_is_comp(page));
- buf_block_align(get_page)->check_index_page_at_flush = TRUE;
-
- } else if (latch_mode == BTR_MODIFY_LEAF) {
-
- get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr);
- ut_a(page_is_comp(get_page) == page_is_comp(page));
- buf_block_align(get_page)->check_index_page_at_flush = TRUE;
-
- } else if (latch_mode == BTR_MODIFY_TREE) {
-
- /* x-latch also brothers from left to right */
- left_page_no = btr_page_get_prev(page, mtr);
-
- if (left_page_no != FIL_NULL) {
- get_page = btr_page_get(space, left_page_no,
- RW_X_LATCH, mtr);
-#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_next(get_page, mtr)
- == buf_frame_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
- ut_a(page_is_comp(get_page) == page_is_comp(page));
- buf_block_align(get_page)->check_index_page_at_flush
- = TRUE;
- }
-
- get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr);
- ut_a(page_is_comp(get_page) == page_is_comp(page));
- buf_block_align(get_page)->check_index_page_at_flush = TRUE;
-
- right_page_no = btr_page_get_next(page, mtr);
-
- if (right_page_no != FIL_NULL) {
- get_page = btr_page_get(space, right_page_no,
- RW_X_LATCH, mtr);
-#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_prev(get_page, mtr)
- == buf_frame_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
- buf_block_align(get_page)->check_index_page_at_flush
- = TRUE;
- }
-
- } else if (latch_mode == BTR_SEARCH_PREV) {
-
- /* s-latch also left brother */
- left_page_no = btr_page_get_prev(page, mtr);
-
- if (left_page_no != FIL_NULL) {
- cursor->left_page = btr_page_get(space, left_page_no,
- RW_S_LATCH, mtr);
-#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_next(cursor->left_page, mtr)
- == buf_frame_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
- ut_a(page_is_comp(cursor->left_page)
- == page_is_comp(page));
- buf_block_align(cursor->left_page)
- ->check_index_page_at_flush = TRUE;
- }
-
- get_page = btr_page_get(space, page_no, RW_S_LATCH, mtr);
- ut_a(page_is_comp(get_page) == page_is_comp(page));
- buf_block_align(get_page)->check_index_page_at_flush = TRUE;
-
- } else if (latch_mode == BTR_MODIFY_PREV) {
-
- /* x-latch also left brother */
- left_page_no = btr_page_get_prev(page, mtr);
-
- if (left_page_no != FIL_NULL) {
- cursor->left_page = btr_page_get(space, left_page_no,
- RW_X_LATCH, mtr);
-#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_next(cursor->left_page, mtr)
- == buf_frame_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
- ut_a(page_is_comp(cursor->left_page)
- == page_is_comp(page));
- buf_block_align(cursor->left_page)
- ->check_index_page_at_flush = TRUE;
- }
-
- get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr);
- ut_a(page_is_comp(get_page) == page_is_comp(page));
- buf_block_align(get_page)->check_index_page_at_flush = TRUE;
- } else {
- ut_error;
- }
-}
-
-/************************************************************************
-Searches an index tree and positions a tree cursor on a given level.
-NOTE: n_fields_cmp in tuple must be set so that it cannot be compared
-to node pointer page number fields on the upper levels of the tree!
-Note that if mode is PAGE_CUR_LE, which is used in inserts, then
-cursor->up_match and cursor->low_match both will have sensible values.
-If mode is PAGE_CUR_GE, then up_match will a have a sensible value.
-
-If mode is PAGE_CUR_LE , cursor is left at the place where an insert of the
-search tuple should be performed in the B-tree. InnoDB does an insert
-immediately after the cursor. Thus, the cursor may end up on a user record,
-or on a page infimum record. */
-
-void
-btr_cur_search_to_nth_level(
-/*========================*/
- dict_index_t* index, /* in: index */
- ulint level, /* in: the tree level of search */
- dtuple_t* tuple, /* in: data tuple; NOTE: n_fields_cmp in
- tuple must be set so that it cannot get
- compared to the node ptr page number field! */
- ulint mode, /* in: PAGE_CUR_L, ...;
- Inserts should always be made using
- PAGE_CUR_LE to search the position! */
- ulint latch_mode, /* in: BTR_SEARCH_LEAF, ..., ORed with
- BTR_INSERT and BTR_ESTIMATE;
- cursor->left_page is used to store a pointer
- to the left neighbor page, in the cases
- BTR_SEARCH_PREV and BTR_MODIFY_PREV;
- NOTE that if has_search_latch
- is != 0, we maybe do not have a latch set
- on the cursor page, we assume
- the caller uses his search latch
- to protect the record! */
- btr_cur_t* cursor, /* in/out: tree cursor; the cursor page is
- s- or x-latched, but see also above! */
- ulint has_search_latch,/* in: info on the latch mode the
- caller currently has on btr_search_latch:
- RW_S_LATCH, or 0 */
- mtr_t* mtr) /* in: mtr */
-{
- page_cur_t* page_cursor;
- page_t* page;
- page_t* guess;
- rec_t* node_ptr;
- ulint page_no;
- ulint space;
- ulint up_match;
- ulint up_bytes;
- ulint low_match;
- ulint low_bytes;
- ulint height;
- ulint savepoint;
- ulint rw_latch;
- ulint page_mode;
- ulint insert_planned;
- ulint buf_mode;
- ulint estimate;
- ulint ignore_sec_unique;
- ulint root_height = 0; /* remove warning */
-#ifdef BTR_CUR_ADAPT
- btr_search_t* info;
-#endif
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
- /* Currently, PAGE_CUR_LE is the only search mode used for searches
- ending to upper levels */
-
- ut_ad(level == 0 || mode == PAGE_CUR_LE);
- ut_ad(dict_index_check_search_tuple(index, tuple));
- ut_ad(!(index->type & DICT_IBUF) || ibuf_inside());
- ut_ad(dtuple_check_typed(tuple));
-
-#ifdef UNIV_DEBUG
- cursor->up_match = ULINT_UNDEFINED;
- cursor->low_match = ULINT_UNDEFINED;
-#endif
- insert_planned = latch_mode & BTR_INSERT;
- estimate = latch_mode & BTR_ESTIMATE;
- ignore_sec_unique = latch_mode & BTR_IGNORE_SEC_UNIQUE;
- latch_mode = latch_mode & ~(BTR_INSERT | BTR_ESTIMATE
- | BTR_IGNORE_SEC_UNIQUE);
-
- ut_ad(!insert_planned || (mode == PAGE_CUR_LE));
-
- cursor->flag = BTR_CUR_BINARY;
- cursor->index = index;
-
-#ifndef BTR_CUR_ADAPT
- guess = NULL;
-#else
- info = btr_search_get_info(index);
-
- guess = info->root_guess;
-
-#ifdef BTR_CUR_HASH_ADAPT
-
-#ifdef UNIV_SEARCH_PERF_STAT
- info->n_searches++;
-#endif
- if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_NOT_LOCKED
- && latch_mode <= BTR_MODIFY_LEAF && info->last_hash_succ
- && !estimate
-#ifdef PAGE_CUR_LE_OR_EXTENDS
- && mode != PAGE_CUR_LE_OR_EXTENDS
-#endif /* PAGE_CUR_LE_OR_EXTENDS */
- && srv_use_adaptive_hash_indexes
- && btr_search_guess_on_hash(index, info, tuple, mode,
- latch_mode, cursor,
- has_search_latch, mtr)) {
-
- /* Search using the hash index succeeded */
-
- ut_ad(cursor->up_match != ULINT_UNDEFINED
- || mode != PAGE_CUR_GE);
- ut_ad(cursor->up_match != ULINT_UNDEFINED
- || mode != PAGE_CUR_LE);
- ut_ad(cursor->low_match != ULINT_UNDEFINED
- || mode != PAGE_CUR_LE);
- btr_cur_n_sea++;
-
- return;
- }
-#endif
-#endif
- btr_cur_n_non_sea++;
-
- /* If the hash search did not succeed, do binary search down the
- tree */
-
- if (has_search_latch) {
- /* Release possible search latch to obey latching order */
- rw_lock_s_unlock(&btr_search_latch);
- }
-
- /* Store the position of the tree latch we push to mtr so that we
- know how to release it when we have latched leaf node(s) */
-
- savepoint = mtr_set_savepoint(mtr);
-
- if (latch_mode == BTR_MODIFY_TREE) {
- mtr_x_lock(dict_index_get_lock(index), mtr);
-
- } else if (latch_mode == BTR_CONT_MODIFY_TREE) {
- /* Do nothing */
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
- } else {
- mtr_s_lock(dict_index_get_lock(index), mtr);
- }
-
- page_cursor = btr_cur_get_page_cur(cursor);
-
- space = dict_index_get_space(index);
- page_no = dict_index_get_page(index);
-
- up_match = 0;
- up_bytes = 0;
- low_match = 0;
- low_bytes = 0;
-
- height = ULINT_UNDEFINED;
- rw_latch = RW_NO_LATCH;
- buf_mode = BUF_GET;
-
- /* We use these modified search modes on non-leaf levels of the
- B-tree. These let us end up in the right B-tree leaf. In that leaf
- we use the original search mode. */
-
- switch (mode) {
- case PAGE_CUR_GE:
- page_mode = PAGE_CUR_L;
- break;
- case PAGE_CUR_G:
- page_mode = PAGE_CUR_LE;
- break;
- default:
-#ifdef PAGE_CUR_LE_OR_EXTENDS
- ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE
- || mode == PAGE_CUR_LE_OR_EXTENDS);
-#else /* PAGE_CUR_LE_OR_EXTENDS */
- ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE);
-#endif /* PAGE_CUR_LE_OR_EXTENDS */
- page_mode = mode;
- break;
- }
-
- /* Loop and search until we arrive at the desired level */
-
- for (;;) {
- if ((height == 0) && (latch_mode <= BTR_MODIFY_LEAF)) {
-
- rw_latch = latch_mode;
-
- if (insert_planned
- && ibuf_should_try(index, ignore_sec_unique)) {
-
- /* Try insert to the insert buffer if the
- page is not in the buffer pool */
-
- buf_mode = BUF_GET_IF_IN_POOL;
- }
- }
-retry_page_get:
- page = buf_page_get_gen(space, page_no, rw_latch, guess,
- buf_mode,
- __FILE__, __LINE__,
- mtr);
- if (page == NULL) {
- /* This must be a search to perform an insert;
- try insert to the insert buffer */
-
- ut_ad(buf_mode == BUF_GET_IF_IN_POOL);
- ut_ad(insert_planned);
- ut_ad(cursor->thr);
-
- if (ibuf_should_try(index, ignore_sec_unique)
- && ibuf_insert(tuple, index, space, page_no,
- cursor->thr)) {
- /* Insertion to the insert buffer succeeded */
- cursor->flag = BTR_CUR_INSERT_TO_IBUF;
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- goto func_exit;
- }
-
- /* Insert to the insert buffer did not succeed:
- retry page get */
-
- buf_mode = BUF_GET;
-
- goto retry_page_get;
- }
-
- buf_block_align(page)->check_index_page_at_flush = TRUE;
-
-#ifdef UNIV_SYNC_DEBUG
- if (rw_latch != RW_NO_LATCH) {
- buf_page_dbg_add_level(page, SYNC_TREE_NODE);
- }
-#endif
- ut_ad(0 == ut_dulint_cmp(index->id,
- btr_page_get_index_id(page)));
-
- if (height == ULINT_UNDEFINED) {
- /* We are in the root node */
-
- height = btr_page_get_level(page, mtr);
- root_height = height;
- cursor->tree_height = root_height + 1;
-#ifdef BTR_CUR_ADAPT
- if (page != guess) {
- info->root_guess = page;
- }
-#endif
- }
-
- if (height == 0) {
- if (rw_latch == RW_NO_LATCH) {
-
- btr_cur_latch_leaves(page, space,
- page_no, latch_mode,
- cursor, mtr);
- }
-
- if ((latch_mode != BTR_MODIFY_TREE)
- && (latch_mode != BTR_CONT_MODIFY_TREE)) {
-
- /* Release the tree s-latch */
-
- mtr_release_s_latch_at_savepoint(
- mtr, savepoint,
- dict_index_get_lock(index));
- }
-
- page_mode = mode;
- }
-
- page_cur_search_with_match(page, index, tuple, page_mode,
- &up_match, &up_bytes,
- &low_match, &low_bytes,
- page_cursor);
- if (estimate) {
- btr_cur_add_path_info(cursor, height, root_height);
- }
-
- /* If this is the desired level, leave the loop */
-
- ut_ad(height == btr_page_get_level(
- page_cur_get_page(page_cursor), mtr));
-
- if (level == height) {
-
- if (level > 0) {
- /* x-latch the page */
- page = btr_page_get(space,
- page_no, RW_X_LATCH, mtr);
- ut_a((ibool)!!page_is_comp(page)
- == dict_table_is_comp(index->table));
- }
-
- break;
- }
-
- ut_ad(height > 0);
-
- height--;
- guess = NULL;
-
- node_ptr = page_cur_get_rec(page_cursor);
- offsets = rec_get_offsets(node_ptr, cursor->index, offsets,
- ULINT_UNDEFINED, &heap);
- /* Go to the child node */
- page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- if (level == 0) {
- cursor->low_match = low_match;
- cursor->low_bytes = low_bytes;
- cursor->up_match = up_match;
- cursor->up_bytes = up_bytes;
-
-#ifdef BTR_CUR_ADAPT
- if (srv_use_adaptive_hash_indexes) {
-
- btr_search_info_update(index, cursor);
- }
-#endif
- ut_ad(cursor->up_match != ULINT_UNDEFINED
- || mode != PAGE_CUR_GE);
- ut_ad(cursor->up_match != ULINT_UNDEFINED
- || mode != PAGE_CUR_LE);
- ut_ad(cursor->low_match != ULINT_UNDEFINED
- || mode != PAGE_CUR_LE);
- }
-
-func_exit:
- if (has_search_latch) {
-
- rw_lock_s_lock(&btr_search_latch);
- }
-}
-
-/*********************************************************************
-Opens a cursor at either end of an index. */
-
-void
-btr_cur_open_at_index_side(
-/*=======================*/
- ibool from_left, /* in: TRUE if open to the low end,
- FALSE if to the high end */
- dict_index_t* index, /* in: index */
- ulint latch_mode, /* in: latch mode */
- btr_cur_t* cursor, /* in: cursor */
- mtr_t* mtr) /* in: mtr */
-{
- page_cur_t* page_cursor;
- page_t* page;
- ulint page_no;
- ulint space;
- ulint height;
- ulint root_height = 0; /* remove warning */
- rec_t* node_ptr;
- ulint estimate;
- ulint savepoint;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- estimate = latch_mode & BTR_ESTIMATE;
- latch_mode = latch_mode & ~BTR_ESTIMATE;
-
- /* Store the position of the tree latch we push to mtr so that we
- know how to release it when we have latched the leaf node */
-
- savepoint = mtr_set_savepoint(mtr);
-
- if (latch_mode == BTR_MODIFY_TREE) {
- mtr_x_lock(dict_index_get_lock(index), mtr);
- } else {
- mtr_s_lock(dict_index_get_lock(index), mtr);
- }
-
- page_cursor = btr_cur_get_page_cur(cursor);
- cursor->index = index;
-
- space = dict_index_get_space(index);
- page_no = dict_index_get_page(index);
-
- height = ULINT_UNDEFINED;
-
- for (;;) {
- page = buf_page_get_gen(space, page_no, RW_NO_LATCH, NULL,
- BUF_GET,
- __FILE__, __LINE__,
- mtr);
- ut_ad(0 == ut_dulint_cmp(index->id,
- btr_page_get_index_id(page)));
-
- buf_block_align(page)->check_index_page_at_flush = TRUE;
-
- if (height == ULINT_UNDEFINED) {
- /* We are in the root node */
-
- height = btr_page_get_level(page, mtr);
- root_height = height;
- }
-
- if (height == 0) {
- btr_cur_latch_leaves(page, space, page_no,
- latch_mode, cursor, mtr);
-
- /* In versions <= 3.23.52 we had forgotten to
- release the tree latch here. If in an index scan
- we had to scan far to find a record visible to the
- current transaction, that could starve others
- waiting for the tree latch. */
-
- if ((latch_mode != BTR_MODIFY_TREE)
- && (latch_mode != BTR_CONT_MODIFY_TREE)) {
-
- /* Release the tree s-latch */
-
- mtr_release_s_latch_at_savepoint(
- mtr, savepoint,
- dict_index_get_lock(index));
- }
- }
-
- if (from_left) {
- page_cur_set_before_first(page, page_cursor);
- } else {
- page_cur_set_after_last(page, page_cursor);
- }
-
- if (height == 0) {
- if (estimate) {
- btr_cur_add_path_info(cursor, height,
- root_height);
- }
-
- break;
- }
-
- ut_ad(height > 0);
-
- if (from_left) {
- page_cur_move_to_next(page_cursor);
- } else {
- page_cur_move_to_prev(page_cursor);
- }
-
- if (estimate) {
- btr_cur_add_path_info(cursor, height, root_height);
- }
-
- height--;
-
- node_ptr = page_cur_get_rec(page_cursor);
- offsets = rec_get_offsets(node_ptr, cursor->index, offsets,
- ULINT_UNDEFINED, &heap);
- /* Go to the child node */
- page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-}
-
-/**************************************************************************
-Positions a cursor at a randomly chosen position within a B-tree. */
-
-void
-btr_cur_open_at_rnd_pos(
-/*====================*/
- dict_index_t* index, /* in: index */
- ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
- btr_cur_t* cursor, /* in/out: B-tree cursor */
- mtr_t* mtr) /* in: mtr */
-{
- page_cur_t* page_cursor;
- page_t* page;
- ulint page_no;
- ulint space;
- ulint height;
- rec_t* node_ptr;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- if (latch_mode == BTR_MODIFY_TREE) {
- mtr_x_lock(dict_index_get_lock(index), mtr);
- } else {
- mtr_s_lock(dict_index_get_lock(index), mtr);
- }
-
- page_cursor = btr_cur_get_page_cur(cursor);
- cursor->index = index;
-
- space = dict_index_get_space(index);
- page_no = dict_index_get_page(index);
-
- height = ULINT_UNDEFINED;
-
- for (;;) {
- page = buf_page_get_gen(space, page_no, RW_NO_LATCH, NULL,
- BUF_GET,
- __FILE__, __LINE__,
- mtr);
- ut_ad(0 == ut_dulint_cmp(index->id,
- btr_page_get_index_id(page)));
-
- if (height == ULINT_UNDEFINED) {
- /* We are in the root node */
-
- height = btr_page_get_level(page, mtr);
- }
-
- if (height == 0) {
- btr_cur_latch_leaves(page, space, page_no,
- latch_mode, cursor, mtr);
- }
-
- page_cur_open_on_rnd_user_rec(page, page_cursor);
-
- if (height == 0) {
-
- break;
- }
-
- ut_ad(height > 0);
-
- height--;
-
- node_ptr = page_cur_get_rec(page_cursor);
- offsets = rec_get_offsets(node_ptr, cursor->index, offsets,
- ULINT_UNDEFINED, &heap);
- /* Go to the child node */
- page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-}
-
-/*==================== B-TREE INSERT =========================*/
-
-/*****************************************************************
-Inserts a record if there is enough space, or if enough space can
-be freed by reorganizing. Differs from _optimistic_insert because
-no heuristics is applied to whether it pays to use CPU time for
-reorganizing the page or not. */
-static
-rec_t*
-btr_cur_insert_if_possible(
-/*=======================*/
- /* out: pointer to inserted record if succeed,
- else NULL */
- btr_cur_t* cursor, /* in: cursor on page after which to insert;
- cursor stays valid */
- dtuple_t* tuple, /* in: tuple to insert; the size info need not
- have been stored to tuple */
- ibool* reorg, /* out: TRUE if reorganization occurred */
- mtr_t* mtr) /* in: mtr */
-{
- page_cur_t* page_cursor;
- page_t* page;
- rec_t* rec;
-
- ut_ad(dtuple_check_typed(tuple));
-
- *reorg = FALSE;
-
- page = btr_cur_get_page(cursor);
-
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- page_cursor = btr_cur_get_page_cur(cursor);
-
- /* Now, try the insert */
- rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr);
-
- if (!rec) {
- /* If record did not fit, reorganize */
-
- btr_page_reorganize(page, cursor->index, mtr);
-
- *reorg = TRUE;
-
- page_cur_search(page, cursor->index, tuple,
- PAGE_CUR_LE, page_cursor);
-
- rec = page_cur_tuple_insert(page_cursor, tuple,
- cursor->index, mtr);
- }
-
- return(rec);
-}
-
-/*****************************************************************
-For an insert, checks the locks and does the undo logging if desired. */
-UNIV_INLINE
-ulint
-btr_cur_ins_lock_and_undo(
-/*======================*/
- /* out: DB_SUCCESS, DB_WAIT_LOCK,
- DB_FAIL, or error number */
- ulint flags, /* in: undo logging and locking flags: if
- not zero, the parameters index and thr
- should be specified */
- btr_cur_t* cursor, /* in: cursor on page after which to insert */
- dtuple_t* entry, /* in: entry to insert */
- que_thr_t* thr, /* in: query thread or NULL */
- ibool* inherit)/* out: TRUE if the inserted new record maybe
- should inherit LOCK_GAP type locks from the
- successor record */
-{
- dict_index_t* index;
- ulint err;
- rec_t* rec;
- dulint roll_ptr;
-
- /* Check if we have to wait for a lock: enqueue an explicit lock
- request if yes */
-
- rec = btr_cur_get_rec(cursor);
- index = cursor->index;
-
- err = lock_rec_insert_check_and_lock(flags, rec, index, thr, inherit);
-
- if (err != DB_SUCCESS) {
-
- return(err);
- }
-
- if ((index->type & DICT_CLUSTERED) && !(index->type & DICT_IBUF)) {
-
- err = trx_undo_report_row_operation(flags, TRX_UNDO_INSERT_OP,
- thr, index, entry,
- NULL, 0, NULL,
- &roll_ptr);
- if (err != DB_SUCCESS) {
-
- return(err);
- }
-
- /* Now we can fill in the roll ptr field in entry */
-
- if (!(flags & BTR_KEEP_SYS_FLAG)) {
-
- row_upd_index_entry_sys_field(entry, index,
- DATA_ROLL_PTR, roll_ptr);
- }
- }
-
- return(DB_SUCCESS);
-}
-
-#ifdef UNIV_DEBUG
-/*****************************************************************
-Report information about a transaction. */
-static
-void
-btr_cur_trx_report(
-/*===============*/
- trx_t* trx, /* in: transaction */
- const dict_index_t* index, /* in: index */
- const char* op) /* in: operation */
-{
- fprintf(stderr, "Trx with id %lu %lu going to ",
- ut_dulint_get_high(trx->id),
- ut_dulint_get_low(trx->id));
- fputs(op, stderr);
- dict_index_name_print(stderr, trx, index);
- putc('\n', stderr);
-}
-#endif /* UNIV_DEBUG */
-
-/*****************************************************************
-Tries to perform an insert to a page in an index tree, next to cursor.
-It is assumed that mtr holds an x-latch on the page. The operation does
-not succeed if there is too little space on the page. If there is just
-one record on the page, the insert will always succeed; this is to
-prevent trying to split a page with just one record. */
-
-ulint
-btr_cur_optimistic_insert(
-/*======================*/
- /* out: DB_SUCCESS, DB_WAIT_LOCK,
- DB_FAIL, or error number */
- ulint flags, /* in: undo logging and locking flags: if not
- zero, the parameters index and thr should be
- specified */
- btr_cur_t* cursor, /* in: cursor on page after which to insert;
- cursor stays valid */
- dtuple_t* entry, /* in: entry to insert */
- rec_t** rec, /* out: pointer to inserted record if
- succeed */
- big_rec_t** big_rec,/* out: big rec vector whose fields have to
- be stored externally by the caller, or
- NULL */
- que_thr_t* thr, /* in: query thread or NULL */
- mtr_t* mtr) /* in: mtr */
-{
- big_rec_t* big_rec_vec = NULL;
- dict_index_t* index;
- page_cur_t* page_cursor;
- page_t* page;
- ulint max_size;
- rec_t* dummy_rec;
- ulint level;
- ibool reorg;
- ibool inherit;
- ulint rec_size;
- ulint type;
- ulint err;
-
- *big_rec = NULL;
-
- page = btr_cur_get_page(cursor);
- index = cursor->index;
-
- if (!dtuple_check_typed_no_assert(entry)) {
- fputs("InnoDB: Error in a tuple to insert into ", stderr);
- dict_index_name_print(stderr, thr_get_trx(thr), index);
- }
-#ifdef UNIV_DEBUG
- if (btr_cur_print_record_ops && thr) {
- btr_cur_trx_report(thr_get_trx(thr), index, "insert into ");
- dtuple_print(stderr, entry);
- }
-#endif /* UNIV_DEBUG */
-
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- max_size = page_get_max_insert_size_after_reorganize(page, 1);
- level = btr_page_get_level(page, mtr);
-
-calculate_sizes_again:
- /* Calculate the record size when entry is converted to a record */
- rec_size = rec_get_converted_size(index, entry);
-
- if (rec_size
- >= ut_min(page_get_free_space_of_empty(page_is_comp(page)) / 2,
- REC_MAX_DATA_SIZE)) {
-
- /* The record is so big that we have to store some fields
- externally on separate database pages */
-
- big_rec_vec = dtuple_convert_big_rec(index, entry, NULL, 0);
-
- if (big_rec_vec == NULL) {
-
- return(DB_TOO_BIG_RECORD);
- }
-
- goto calculate_sizes_again;
- }
-
- /* If there have been many consecutive inserts, and we are on the leaf
- level, check if we have to split the page to reserve enough free space
- for future updates of records. */
-
- type = index->type;
-
- if ((type & DICT_CLUSTERED)
- && (dict_index_get_space_reserve() + rec_size > max_size)
- && (page_get_n_recs(page) >= 2)
- && (0 == level)
- && (btr_page_get_split_rec_to_right(cursor, &dummy_rec)
- || btr_page_get_split_rec_to_left(cursor, &dummy_rec))) {
-
- if (big_rec_vec) {
- dtuple_convert_back_big_rec(index, entry, big_rec_vec);
- }
-
- return(DB_FAIL);
- }
-
- if (!(((max_size >= rec_size)
- && (max_size >= BTR_CUR_PAGE_REORGANIZE_LIMIT))
- || (page_get_max_insert_size(page, 1) >= rec_size)
- || (page_get_n_recs(page) <= 1))) {
-
- if (big_rec_vec) {
- dtuple_convert_back_big_rec(index, entry, big_rec_vec);
- }
- return(DB_FAIL);
- }
-
- /* Check locks and write to the undo log, if specified */
- err = btr_cur_ins_lock_and_undo(flags, cursor, entry, thr, &inherit);
-
- if (err != DB_SUCCESS) {
-
- if (big_rec_vec) {
- dtuple_convert_back_big_rec(index, entry, big_rec_vec);
- }
- return(err);
- }
-
- page_cursor = btr_cur_get_page_cur(cursor);
-
- reorg = FALSE;
-
- /* Now, try the insert */
-
- *rec = page_cur_insert_rec_low(page_cursor, entry, index,
- NULL, NULL, mtr);
- if (UNIV_UNLIKELY(!(*rec))) {
- /* If the record did not fit, reorganize */
- btr_page_reorganize(page, index, mtr);
-
- ut_ad(page_get_max_insert_size(page, 1) == max_size);
-
- reorg = TRUE;
-
- page_cur_search(page, index, entry, PAGE_CUR_LE, page_cursor);
-
- *rec = page_cur_tuple_insert(page_cursor, entry, index, mtr);
-
- if (UNIV_UNLIKELY(!*rec)) {
- fputs("InnoDB: Error: cannot insert tuple ", stderr);
- dtuple_print(stderr, entry);
- fputs(" into ", stderr);
- dict_index_name_print(stderr, thr_get_trx(thr), index);
- fprintf(stderr, "\nInnoDB: max insert size %lu\n",
- (ulong) max_size);
- ut_error;
- }
- }
-
-#ifdef BTR_CUR_HASH_ADAPT
- if (!reorg && (0 == level) && (cursor->flag == BTR_CUR_HASH)) {
- btr_search_update_hash_node_on_insert(cursor);
- } else {
- btr_search_update_hash_on_insert(cursor);
- }
-#endif
-
- if (!(flags & BTR_NO_LOCKING_FLAG) && inherit) {
-
- lock_update_insert(*rec);
- }
-
-#if 0
- fprintf(stderr, "Insert into page %lu, max ins size %lu,"
- " rec %lu ind type %lu\n",
- buf_frame_get_page_no(page), max_size,
- rec_size + PAGE_DIR_SLOT_SIZE, type);
-#endif
- if (!(type & DICT_CLUSTERED)) {
- /* We have added a record to page: update its free bits */
- ibuf_update_free_bits_if_full(cursor->index, page, max_size,
- rec_size + PAGE_DIR_SLOT_SIZE);
- }
-
- *big_rec = big_rec_vec;
-
- return(DB_SUCCESS);
-}
-
-/*****************************************************************
-Performs an insert on a page of an index tree. It is assumed that mtr
-holds an x-latch on the tree and on the cursor page. If the insert is
-made on the leaf level, to avoid deadlocks, mtr must also own x-latches
-to brothers of page, if those brothers exist. */
-
-ulint
-btr_cur_pessimistic_insert(
-/*=======================*/
- /* out: DB_SUCCESS or error number */
- ulint flags, /* in: undo logging and locking flags: if not
- zero, the parameter thr should be
- specified; if no undo logging is specified,
- then the caller must have reserved enough
- free extents in the file space so that the
- insertion will certainly succeed */
- btr_cur_t* cursor, /* in: cursor after which to insert;
- cursor stays valid */
- dtuple_t* entry, /* in: entry to insert */
- rec_t** rec, /* out: pointer to inserted record if
- succeed */
- big_rec_t** big_rec,/* out: big rec vector whose fields have to
- be stored externally by the caller, or
- NULL */
- que_thr_t* thr, /* in: query thread or NULL */
- mtr_t* mtr) /* in: mtr */
-{
- dict_index_t* index = cursor->index;
- big_rec_t* big_rec_vec = NULL;
- page_t* page;
- ulint err;
- ibool dummy_inh;
- ibool success;
- ulint n_extents = 0;
- ulint n_reserved;
-
- ut_ad(dtuple_check_typed(entry));
-
- *big_rec = NULL;
-
- page = btr_cur_get_page(cursor);
-
- ut_ad(mtr_memo_contains(mtr,
- dict_index_get_lock(btr_cur_get_index(cursor)),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
-
- /* Try first an optimistic insert; reset the cursor flag: we do not
- assume anything of how it was positioned */
-
- cursor->flag = BTR_CUR_BINARY;
-
- err = btr_cur_optimistic_insert(flags, cursor, entry, rec, big_rec,
- thr, mtr);
- if (err != DB_FAIL) {
-
- return(err);
- }
-
- /* Retry with a pessimistic insert. Check locks and write to undo log,
- if specified */
-
- err = btr_cur_ins_lock_and_undo(flags, cursor, entry, thr, &dummy_inh);
-
- if (err != DB_SUCCESS) {
-
- return(err);
- }
-
- if (!(flags & BTR_NO_UNDO_LOG_FLAG)) {
- /* First reserve enough free space for the file segments
- of the index tree, so that the insert will not fail because
- of lack of space */
-
- n_extents = cursor->tree_height / 16 + 3;
-
- success = fsp_reserve_free_extents(&n_reserved, index->space,
- n_extents, FSP_NORMAL, mtr);
- if (!success) {
- err = DB_OUT_OF_FILE_SPACE;
-
- return(err);
- }
- }
-
- if (rec_get_converted_size(index, entry)
- >= ut_min(page_get_free_space_of_empty(page_is_comp(page)) / 2,
- REC_MAX_DATA_SIZE)) {
-
- /* The record is so big that we have to store some fields
- externally on separate database pages */
-
- big_rec_vec = dtuple_convert_big_rec(index, entry, NULL, 0);
-
- if (big_rec_vec == NULL) {
-
- if (n_extents > 0) {
- fil_space_release_free_extents(index->space,
- n_reserved);
- }
- return(DB_TOO_BIG_RECORD);
- }
- }
-
- if (dict_index_get_page(index) == buf_frame_get_page_no(page)) {
-
- /* The page is the root page */
- *rec = btr_root_raise_and_insert(cursor, entry, mtr);
- } else {
- *rec = btr_page_split_and_insert(cursor, entry, mtr);
- }
-
- btr_cur_position(index, page_rec_get_prev(*rec), cursor);
-
-#ifdef BTR_CUR_ADAPT
- btr_search_update_hash_on_insert(cursor);
-#endif
- if (!(flags & BTR_NO_LOCKING_FLAG)) {
-
- lock_update_insert(*rec);
- }
-
- err = DB_SUCCESS;
-
- if (n_extents > 0) {
- fil_space_release_free_extents(index->space, n_reserved);
- }
-
- *big_rec = big_rec_vec;
-
- return(err);
-}
-
-/*==================== B-TREE UPDATE =========================*/
-
-/*****************************************************************
-For an update, checks the locks and does the undo logging. */
-UNIV_INLINE
-ulint
-btr_cur_upd_lock_and_undo(
-/*======================*/
- /* out: DB_SUCCESS, DB_WAIT_LOCK, or error
- number */
- ulint flags, /* in: undo logging and locking flags */
- btr_cur_t* cursor, /* in: cursor on record to update */
- upd_t* update, /* in: update vector */
- ulint cmpl_info,/* in: compiler info on secondary index
- updates */
- que_thr_t* thr, /* in: query thread */
- dulint* roll_ptr)/* out: roll pointer */
-{
- dict_index_t* index;
- rec_t* rec;
- ulint err;
-
- ut_ad(cursor && update && thr && roll_ptr);
-
- rec = btr_cur_get_rec(cursor);
- index = cursor->index;
-
- if (!(index->type & DICT_CLUSTERED)) {
- /* We do undo logging only when we update a clustered index
- record */
- return(lock_sec_rec_modify_check_and_lock(flags, rec, index,
- thr));
- }
-
- /* Check if we have to wait for a lock: enqueue an explicit lock
- request if yes */
-
- err = DB_SUCCESS;
-
- if (!(flags & BTR_NO_LOCKING_FLAG)) {
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- err = lock_clust_rec_modify_check_and_lock(
- flags, rec, index,
- rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap), thr);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- if (err != DB_SUCCESS) {
-
- return(err);
- }
- }
-
- /* Append the info about the update in the undo log */
-
- err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr,
- index, NULL, update,
- cmpl_info, rec, roll_ptr);
- return(err);
-}
-
-/***************************************************************
-Writes a redo log record of updating a record in-place. */
-UNIV_INLINE
-void
-btr_cur_update_in_place_log(
-/*========================*/
- ulint flags, /* in: flags */
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: index where cursor positioned */
- upd_t* update, /* in: update vector */
- trx_t* trx, /* in: transaction */
- dulint roll_ptr, /* in: roll ptr */
- mtr_t* mtr) /* in: mtr */
-{
- byte* log_ptr;
- page_t* page = page_align(rec);
- ut_ad(flags < 256);
- ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
-
- log_ptr = mlog_open_and_write_index(mtr, rec, index, page_is_comp(page)
- ? MLOG_COMP_REC_UPDATE_IN_PLACE
- : MLOG_REC_UPDATE_IN_PLACE,
- 1 + DATA_ROLL_PTR_LEN + 14 + 2
- + MLOG_BUF_MARGIN);
-
- if (!log_ptr) {
- /* Logging in mtr is switched off during crash recovery */
- return;
- }
-
- /* The code below assumes index is a clustered index: change index to
- the clustered index if we are updating a secondary index record (or we
- could as well skip writing the sys col values to the log in this case
- because they are not needed for a secondary index record update) */
-
- index = dict_table_get_first_index(index->table);
-
- mach_write_to_1(log_ptr, flags);
- log_ptr++;
-
- log_ptr = row_upd_write_sys_vals_to_log(index, trx, roll_ptr, log_ptr,
- mtr);
- mach_write_to_2(log_ptr, page_offset(rec));
- log_ptr += 2;
-
- row_upd_index_write_log(update, log_ptr, mtr);
-}
-
-/***************************************************************
-Parses a redo log record of updating a record in-place. */
-
-byte*
-btr_cur_parse_update_in_place(
-/*==========================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page, /* in: page or NULL */
- dict_index_t* index) /* in: index corresponding to page */
-{
- ulint flags;
- rec_t* rec;
- upd_t* update;
- ulint pos;
- dulint trx_id;
- dulint roll_ptr;
- ulint rec_offset;
- mem_heap_t* heap;
- ulint* offsets;
-
- if (end_ptr < ptr + 1) {
-
- return(NULL);
- }
-
- flags = mach_read_from_1(ptr);
- ptr++;
-
- ptr = row_upd_parse_sys_vals(ptr, end_ptr, &pos, &trx_id, &roll_ptr);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- if (end_ptr < ptr + 2) {
-
- return(NULL);
- }
-
- rec_offset = mach_read_from_2(ptr);
- ptr += 2;
-
- ut_a(rec_offset <= UNIV_PAGE_SIZE);
-
- heap = mem_heap_create(256);
-
- ptr = row_upd_index_parse(ptr, end_ptr, heap, &update);
-
- if (!ptr || !page) {
-
- goto func_exit;
- }
-
- ut_a((ibool)!!page_is_comp(page) == dict_table_is_comp(index->table));
- rec = page + rec_offset;
-
- /* We do not need to reserve btr_search_latch, as the page is only
- being recovered, and there cannot be a hash index to it. */
-
- offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
-
- if (!(flags & BTR_KEEP_SYS_FLAG)) {
- row_upd_rec_sys_fields_in_recovery(rec, offsets,
- pos, trx_id, roll_ptr);
- }
-
- row_upd_rec_in_place(rec, offsets, update);
-
-func_exit:
- mem_heap_free(heap);
-
- return(ptr);
-}
-
-/*****************************************************************
-Updates a record when the update causes no size changes in its fields.
-We assume here that the ordering fields of the record do not change. */
-
-ulint
-btr_cur_update_in_place(
-/*====================*/
- /* out: DB_SUCCESS or error number */
- ulint flags, /* in: undo logging and locking flags */
- btr_cur_t* cursor, /* in: cursor on the record to update;
- cursor stays valid and positioned on the
- same record */
- upd_t* update, /* in: update vector */
- ulint cmpl_info,/* in: compiler info on secondary index
- updates */
- que_thr_t* thr, /* in: query thread */
- mtr_t* mtr) /* in: mtr */
-{
- dict_index_t* index;
- buf_block_t* block;
- ulint err;
- rec_t* rec;
- dulint roll_ptr = ut_dulint_zero;
- trx_t* trx;
- ulint was_delete_marked;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- rec = btr_cur_get_rec(cursor);
- index = cursor->index;
- ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
- trx = thr_get_trx(thr);
- offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
-#ifdef UNIV_DEBUG
- if (btr_cur_print_record_ops && thr) {
- btr_cur_trx_report(trx, index, "update ");
- rec_print_new(stderr, rec, offsets);
- }
-#endif /* UNIV_DEBUG */
-
- /* Do lock checking and undo logging */
- err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info,
- thr, &roll_ptr);
- if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(err);
- }
-
- block = buf_block_align(rec);
- ut_ad(!!page_is_comp(buf_block_get_frame(block))
- == dict_table_is_comp(index->table));
-
- if (block->is_hashed) {
- /* The function row_upd_changes_ord_field_binary works only
- if the update vector was built for a clustered index, we must
- NOT call it if index is secondary */
-
- if (!(index->type & DICT_CLUSTERED)
- || row_upd_changes_ord_field_binary(NULL, index, update)) {
-
- /* Remove possible hash index pointer to this record */
- btr_search_update_hash_on_delete(cursor);
- }
-
- rw_lock_x_lock(&btr_search_latch);
- }
-
- if (!(flags & BTR_KEEP_SYS_FLAG)) {
- row_upd_rec_sys_fields(rec, index, offsets, trx, roll_ptr);
- }
-
- was_delete_marked = rec_get_deleted_flag(
- rec, page_is_comp(buf_block_get_frame(block)));
-
- row_upd_rec_in_place(rec, offsets, update);
-
- if (block->is_hashed) {
- rw_lock_x_unlock(&btr_search_latch);
- }
-
- btr_cur_update_in_place_log(flags, rec, index, update, trx, roll_ptr,
- mtr);
- if (was_delete_marked
- && !rec_get_deleted_flag(rec, page_is_comp(
- buf_block_get_frame(block)))) {
- /* The new updated record owns its possible externally
- stored fields */
-
- btr_cur_unmark_extern_fields(rec, mtr, offsets);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(DB_SUCCESS);
-}
-
-/*****************************************************************
-Tries to update a record on a page in an index tree. It is assumed that mtr
-holds an x-latch on the page. The operation does not succeed if there is too
-little space on the page or if the update would result in too empty a page,
-so that tree compression is recommended. We assume here that the ordering
-fields of the record do not change. */
-
-ulint
-btr_cur_optimistic_update(
-/*======================*/
- /* out: DB_SUCCESS, or DB_OVERFLOW if the
- updated record does not fit, DB_UNDERFLOW
- if the page would become too empty */
- ulint flags, /* in: undo logging and locking flags */
- btr_cur_t* cursor, /* in: cursor on the record to update;
- cursor stays valid and positioned on the
- same record */
- upd_t* update, /* in: update vector; this must also
- contain trx id and roll ptr fields */
- ulint cmpl_info,/* in: compiler info on secondary index
- updates */
- que_thr_t* thr, /* in: query thread */
- mtr_t* mtr) /* in: mtr */
-{
- dict_index_t* index;
- page_cur_t* page_cursor;
- ulint err;
- page_t* page;
- rec_t* rec;
- ulint max_size;
- ulint new_rec_size;
- ulint old_rec_size;
- dtuple_t* new_entry;
- dulint roll_ptr;
- trx_t* trx;
- mem_heap_t* heap;
- ibool reorganized = FALSE;
- ulint i;
- ulint* offsets;
-
- page = btr_cur_get_page(cursor);
- rec = btr_cur_get_rec(cursor);
- index = cursor->index;
- ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
-
- heap = mem_heap_create(1024);
- offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
-
-#ifdef UNIV_DEBUG
- if (btr_cur_print_record_ops && thr) {
- btr_cur_trx_report(thr_get_trx(thr), index, "update ");
- rec_print_new(stderr, rec, offsets);
- }
-#endif /* UNIV_DEBUG */
-
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- if (!row_upd_changes_field_size_or_external(index, offsets, update)) {
-
- /* The simplest and the most common case: the update does not
- change the size of any field and none of the updated fields is
- externally stored in rec or update */
- mem_heap_free(heap);
- return(btr_cur_update_in_place(flags, cursor, update,
- cmpl_info, thr, mtr));
- }
-
- for (i = 0; i < upd_get_n_fields(update); i++) {
- if (upd_get_nth_field(update, i)->extern_storage) {
-
- /* Externally stored fields are treated in pessimistic
- update */
-
- mem_heap_free(heap);
- return(DB_OVERFLOW);
- }
- }
-
- if (rec_offs_any_extern(offsets)) {
- /* Externally stored fields are treated in pessimistic
- update */
-
- mem_heap_free(heap);
- return(DB_OVERFLOW);
- }
-
- page_cursor = btr_cur_get_page_cur(cursor);
-
- new_entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap);
-
- row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update,
- FALSE, NULL);
- old_rec_size = rec_offs_size(offsets);
- new_rec_size = rec_get_converted_size(index, new_entry);
-
- if (UNIV_UNLIKELY(new_rec_size
- >= (page_get_free_space_of_empty(page_is_comp(page))
- / 2))) {
-
- mem_heap_free(heap);
-
- return(DB_OVERFLOW);
- }
-
- max_size = old_rec_size
- + page_get_max_insert_size_after_reorganize(page, 1);
-
- if (UNIV_UNLIKELY(page_get_data_size(page)
- - old_rec_size + new_rec_size
- < BTR_CUR_PAGE_COMPRESS_LIMIT)) {
-
- /* The page would become too empty */
-
- mem_heap_free(heap);
-
- return(DB_UNDERFLOW);
- }
-
- if (!(((max_size >= BTR_CUR_PAGE_REORGANIZE_LIMIT)
- && (max_size >= new_rec_size))
- || (page_get_n_recs(page) <= 1))) {
-
- /* There was not enough space, or it did not pay to
- reorganize: for simplicity, we decide what to do assuming a
- reorganization is needed, though it might not be necessary */
-
- mem_heap_free(heap);
-
- return(DB_OVERFLOW);
- }
-
- /* Do lock checking and undo logging */
- err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info, thr,
- &roll_ptr);
- if (err != DB_SUCCESS) {
-
- mem_heap_free(heap);
-
- return(err);
- }
-
- /* Ok, we may do the replacement. Store on the page infimum the
- explicit locks on rec, before deleting rec (see the comment in
- .._pessimistic_update). */
-
- lock_rec_store_on_page_infimum(page, rec);
-
- btr_search_update_hash_on_delete(cursor);
-
- page_cur_delete_rec(page_cursor, index, offsets, mtr);
-
- page_cur_move_to_prev(page_cursor);
-
- trx = thr_get_trx(thr);
-
- if (!(flags & BTR_KEEP_SYS_FLAG)) {
- row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
- roll_ptr);
- row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
- trx->id);
- }
-
- rec = btr_cur_insert_if_possible(cursor, new_entry, &reorganized, mtr);
-
- ut_a(rec); /* <- We calculated above the insert would fit */
-
- if (!rec_get_deleted_flag(rec, page_is_comp(page))) {
- /* The new inserted record owns its possible externally
- stored fields */
-
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- btr_cur_unmark_extern_fields(rec, mtr, offsets);
- }
-
- /* Restore the old explicit lock state on the record */
-
- lock_rec_restore_from_page_infimum(rec, page);
-
- page_cur_move_to_next(page_cursor);
-
- mem_heap_free(heap);
-
- return(DB_SUCCESS);
-}
-
-/*****************************************************************
-If, in a split, a new supremum record was created as the predecessor of the
-updated record, the supremum record must inherit exactly the locks on the
-updated record. In the split it may have inherited locks from the successor
-of the updated record, which is not correct. This function restores the
-right locks for the new supremum. */
-static
-void
-btr_cur_pess_upd_restore_supremum(
-/*==============================*/
- rec_t* rec, /* in: updated record */
- mtr_t* mtr) /* in: mtr */
-{
- page_t* page;
- page_t* prev_page;
- ulint space;
- ulint prev_page_no;
-
- page = buf_frame_align(rec);
-
- if (page_rec_get_next(page_get_infimum_rec(page)) != rec) {
- /* Updated record is not the first user record on its page */
-
- return;
- }
-
- space = buf_frame_get_space_id(page);
- prev_page_no = btr_page_get_prev(page, mtr);
-
- ut_ad(prev_page_no != FIL_NULL);
- prev_page = buf_page_get_with_no_latch(space, prev_page_no, mtr);
-#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_next(prev_page, mtr)
- == buf_frame_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-
- /* We must already have an x-latch to prev_page! */
- ut_ad(mtr_memo_contains(mtr, buf_block_align(prev_page),
- MTR_MEMO_PAGE_X_FIX));
-
- lock_rec_reset_and_inherit_gap_locks(page_get_supremum_rec(prev_page),
- rec);
-}
-
-/*****************************************************************
-Performs an update of a record on a page of a tree. It is assumed
-that mtr holds an x-latch on the tree and on the cursor page. If the
-update is made on the leaf level, to avoid deadlocks, mtr must also
-own x-latches to brothers of page, if those brothers exist. We assume
-here that the ordering fields of the record do not change. */
-
-ulint
-btr_cur_pessimistic_update(
-/*=======================*/
- /* out: DB_SUCCESS or error code */
- ulint flags, /* in: undo logging, locking, and rollback
- flags */
- btr_cur_t* cursor, /* in: cursor on the record to update */
- big_rec_t** big_rec,/* out: big rec vector whose fields have to
- be stored externally by the caller, or NULL */
- upd_t* update, /* in: update vector; this is allowed also
- contain trx id and roll ptr fields, but
- the values in update vector have no effect */
- ulint cmpl_info,/* in: compiler info on secondary index
- updates */
- que_thr_t* thr, /* in: query thread */
- mtr_t* mtr) /* in: mtr */
-{
- big_rec_t* big_rec_vec = NULL;
- big_rec_t* dummy_big_rec;
- dict_index_t* index;
- page_t* page;
- rec_t* rec;
- page_cur_t* page_cursor;
- dtuple_t* new_entry;
- mem_heap_t* heap;
- ulint err;
- ulint optim_err;
- ibool dummy_reorganized;
- dulint roll_ptr;
- trx_t* trx;
- ibool was_first;
- ibool success;
- ulint n_extents = 0;
- ulint n_reserved;
- ulint* ext_vect;
- ulint n_ext_vect;
- ulint reserve_flag;
- ulint* offsets = NULL;
-
- *big_rec = NULL;
-
- page = btr_cur_get_page(cursor);
- rec = btr_cur_get_rec(cursor);
- index = cursor->index;
-
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
-
- optim_err = btr_cur_optimistic_update(flags, cursor, update,
- cmpl_info, thr, mtr);
-
- if (optim_err != DB_UNDERFLOW && optim_err != DB_OVERFLOW) {
-
- return(optim_err);
- }
-
- /* Do lock checking and undo logging */
- err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info,
- thr, &roll_ptr);
- if (err != DB_SUCCESS) {
-
- return(err);
- }
-
- if (optim_err == DB_OVERFLOW) {
- /* First reserve enough free space for the file segments
- of the index tree, so that the update will not fail because
- of lack of space */
-
- n_extents = cursor->tree_height / 16 + 3;
-
- if (flags & BTR_NO_UNDO_LOG_FLAG) {
- reserve_flag = FSP_CLEANING;
- } else {
- reserve_flag = FSP_NORMAL;
- }
-
- success = fsp_reserve_free_extents(&n_reserved, index->space,
- n_extents,
- reserve_flag, mtr);
- if (!success) {
- err = DB_OUT_OF_FILE_SPACE;
-
- return(err);
- }
- }
-
- heap = mem_heap_create(1024);
- offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
-
- trx = thr_get_trx(thr);
-
- new_entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap);
-
- row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update,
- FALSE, heap);
- if (!(flags & BTR_KEEP_SYS_FLAG)) {
- row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
- roll_ptr);
- row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
- trx->id);
- }
-
- if (flags & BTR_NO_UNDO_LOG_FLAG) {
- /* We are in a transaction rollback undoing a row
- update: we must free possible externally stored fields
- which got new values in the update, if they are not
- inherited values. They can be inherited if we have
- updated the primary key to another value, and then
- update it back again. */
-
- ut_a(big_rec_vec == NULL);
-
- btr_rec_free_updated_extern_fields(index, rec, offsets,
- update, TRUE, mtr);
- }
-
- /* We have to set appropriate extern storage bits in the new
- record to be inserted: we have to remember which fields were such */
-
- ext_vect = mem_heap_alloc(heap, sizeof(ulint)
- * dict_index_get_n_fields(index));
- ut_ad(!page_is_comp(page) || !rec_get_node_ptr_flag(rec));
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- n_ext_vect = btr_push_update_extern_fields(ext_vect, offsets, update);
-
- if (UNIV_UNLIKELY(rec_get_converted_size(index, new_entry)
- >= ut_min(page_get_free_space_of_empty(
- page_is_comp(page)) / 2,
- REC_MAX_DATA_SIZE))) {
-
- big_rec_vec = dtuple_convert_big_rec(index, new_entry,
- ext_vect, n_ext_vect);
- if (big_rec_vec == NULL) {
-
- err = DB_TOO_BIG_RECORD;
- goto return_after_reservations;
- }
- }
-
- page_cursor = btr_cur_get_page_cur(cursor);
-
- /* Store state of explicit locks on rec on the page infimum record,
- before deleting rec. The page infimum acts as a dummy carrier of the
- locks, taking care also of lock releases, before we can move the locks
- back on the actual record. There is a special case: if we are
- inserting on the root page and the insert causes a call of
- btr_root_raise_and_insert. Therefore we cannot in the lock system
- delete the lock structs set on the root page even if the root
- page carries just node pointers. */
-
- lock_rec_store_on_page_infimum(buf_frame_align(rec), rec);
-
- btr_search_update_hash_on_delete(cursor);
-
- page_cur_delete_rec(page_cursor, index, offsets, mtr);
-
- page_cur_move_to_prev(page_cursor);
-
- rec = btr_cur_insert_if_possible(cursor, new_entry,
- &dummy_reorganized, mtr);
- ut_a(rec || optim_err != DB_UNDERFLOW);
-
- if (rec) {
- lock_rec_restore_from_page_infimum(rec, page);
- rec_set_field_extern_bits(rec, index,
- ext_vect, n_ext_vect, mtr);
-
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
-
- if (!rec_get_deleted_flag(rec, rec_offs_comp(offsets))) {
- /* The new inserted record owns its possible externally
- stored fields */
- btr_cur_unmark_extern_fields(rec, mtr, offsets);
- }
-
- btr_cur_compress_if_useful(cursor, mtr);
-
- err = DB_SUCCESS;
- goto return_after_reservations;
- }
-
- if (page_cur_is_before_first(page_cursor)) {
- /* The record to be updated was positioned as the first user
- record on its page */
-
- was_first = TRUE;
- } else {
- was_first = FALSE;
- }
-
- /* The first parameter means that no lock checking and undo logging
- is made in the insert */
-
- err = btr_cur_pessimistic_insert(BTR_NO_UNDO_LOG_FLAG
- | BTR_NO_LOCKING_FLAG
- | BTR_KEEP_SYS_FLAG,
- cursor, new_entry, &rec,
- &dummy_big_rec, NULL, mtr);
- ut_a(rec);
- ut_a(err == DB_SUCCESS);
- ut_a(dummy_big_rec == NULL);
-
- rec_set_field_extern_bits(rec, index, ext_vect, n_ext_vect, mtr);
- offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
-
- if (!rec_get_deleted_flag(rec, rec_offs_comp(offsets))) {
- /* The new inserted record owns its possible externally
- stored fields */
-
- btr_cur_unmark_extern_fields(rec, mtr, offsets);
- }
-
- lock_rec_restore_from_page_infimum(rec, page);
-
- /* If necessary, restore also the correct lock state for a new,
- preceding supremum record created in a page split. While the old
- record was nonexistent, the supremum might have inherited its locks
- from a wrong record. */
-
- if (!was_first) {
- btr_cur_pess_upd_restore_supremum(rec, mtr);
- }
-
-return_after_reservations:
- mem_heap_free(heap);
-
- if (n_extents > 0) {
- fil_space_release_free_extents(index->space, n_reserved);
- }
-
- *big_rec = big_rec_vec;
-
- return(err);
-}
-
-/*==================== B-TREE DELETE MARK AND UNMARK ===============*/
-
-/********************************************************************
-Writes the redo log record for delete marking or unmarking of an index
-record. */
-UNIV_INLINE
-void
-btr_cur_del_mark_set_clust_rec_log(
-/*===============================*/
- ulint flags, /* in: flags */
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: index of the record */
- ibool val, /* in: value to set */
- trx_t* trx, /* in: deleting transaction */
- dulint roll_ptr,/* in: roll ptr to the undo log record */
- mtr_t* mtr) /* in: mtr */
-{
- byte* log_ptr;
- ut_ad(flags < 256);
- ut_ad(val <= 1);
-
- ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
-
- log_ptr = mlog_open_and_write_index(mtr, rec, index,
- page_rec_is_comp(rec)
- ? MLOG_COMP_REC_CLUST_DELETE_MARK
- : MLOG_REC_CLUST_DELETE_MARK,
- 1 + 1 + DATA_ROLL_PTR_LEN
- + 14 + 2);
-
- if (!log_ptr) {
- /* Logging in mtr is switched off during crash recovery */
- return;
- }
-
- mach_write_to_1(log_ptr, flags);
- log_ptr++;
- mach_write_to_1(log_ptr, val);
- log_ptr++;
-
- log_ptr = row_upd_write_sys_vals_to_log(index, trx, roll_ptr, log_ptr,
- mtr);
- mach_write_to_2(log_ptr, page_offset(rec));
- log_ptr += 2;
-
- mlog_close(mtr, log_ptr);
-}
-
-/********************************************************************
-Parses the redo log record for delete marking or unmarking of a clustered
-index record. */
-
-byte*
-btr_cur_parse_del_mark_set_clust_rec(
-/*=================================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- dict_index_t* index, /* in: index corresponding to page */
- page_t* page) /* in: page or NULL */
-{
- ulint flags;
- ulint val;
- ulint pos;
- dulint trx_id;
- dulint roll_ptr;
- ulint offset;
- rec_t* rec;
-
- ut_ad(!page
- || !!page_is_comp(page) == dict_table_is_comp(index->table));
-
- if (end_ptr < ptr + 2) {
-
- return(NULL);
- }
-
- flags = mach_read_from_1(ptr);
- ptr++;
- val = mach_read_from_1(ptr);
- ptr++;
-
- ptr = row_upd_parse_sys_vals(ptr, end_ptr, &pos, &trx_id, &roll_ptr);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- if (end_ptr < ptr + 2) {
-
- return(NULL);
- }
-
- offset = mach_read_from_2(ptr);
- ptr += 2;
-
- ut_a(offset <= UNIV_PAGE_SIZE);
-
- if (page) {
- rec = page + offset;
-
- if (!(flags & BTR_KEEP_SYS_FLAG)) {
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- row_upd_rec_sys_fields_in_recovery(
- rec, rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap),
- pos, trx_id, roll_ptr);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- }
-
- /* We do not need to reserve btr_search_latch, as the page
- is only being recovered, and there cannot be a hash index to
- it. */
-
- rec_set_deleted_flag(rec, page_is_comp(page), val);
- }
-
- return(ptr);
-}
-
-/***************************************************************
-Marks a clustered index record deleted. Writes an undo log record to
-undo log on this delete marking. Writes in the trx id field the id
-of the deleting transaction, and in the roll ptr field pointer to the
-undo log record created. */
-
-ulint
-btr_cur_del_mark_set_clust_rec(
-/*===========================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT, or error
- number */
- ulint flags, /* in: undo logging and locking flags */
- btr_cur_t* cursor, /* in: cursor */
- ibool val, /* in: value to set */
- que_thr_t* thr, /* in: query thread */
- mtr_t* mtr) /* in: mtr */
-{
- dict_index_t* index;
- buf_block_t* block;
- dulint roll_ptr;
- ulint err;
- rec_t* rec;
- trx_t* trx;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- rec = btr_cur_get_rec(cursor);
- index = cursor->index;
- ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
- offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
-
-#ifdef UNIV_DEBUG
- if (btr_cur_print_record_ops && thr) {
- btr_cur_trx_report(thr_get_trx(thr), index, "del mark ");
- rec_print_new(stderr, rec, offsets);
- }
-#endif /* UNIV_DEBUG */
-
- ut_ad(index->type & DICT_CLUSTERED);
- ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
-
- err = lock_clust_rec_modify_check_and_lock(flags,
- rec, index, offsets, thr);
-
- if (err != DB_SUCCESS) {
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(err);
- }
-
- err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr,
- index, NULL, NULL, 0, rec,
- &roll_ptr);
- if (err != DB_SUCCESS) {
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(err);
- }
-
- block = buf_block_align(rec);
-
- if (block->is_hashed) {
- rw_lock_x_lock(&btr_search_latch);
- }
-
- rec_set_deleted_flag(rec, rec_offs_comp(offsets), val);
-
- trx = thr_get_trx(thr);
-
- if (!(flags & BTR_KEEP_SYS_FLAG)) {
- row_upd_rec_sys_fields(rec, index, offsets, trx, roll_ptr);
- }
-
- if (block->is_hashed) {
- rw_lock_x_unlock(&btr_search_latch);
- }
-
- btr_cur_del_mark_set_clust_rec_log(flags, rec, index, val, trx,
- roll_ptr, mtr);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(DB_SUCCESS);
-}
-
-/********************************************************************
-Writes the redo log record for a delete mark setting of a secondary
-index record. */
-UNIV_INLINE
-void
-btr_cur_del_mark_set_sec_rec_log(
-/*=============================*/
- rec_t* rec, /* in: record */
- ibool val, /* in: value to set */
- mtr_t* mtr) /* in: mtr */
-{
- byte* log_ptr;
- ut_ad(val <= 1);
-
- log_ptr = mlog_open(mtr, 11 + 1 + 2);
-
- if (!log_ptr) {
- /* Logging in mtr is switched off during crash recovery:
- in that case mlog_open returns NULL */
- return;
- }
-
- log_ptr = mlog_write_initial_log_record_fast(
- rec, MLOG_REC_SEC_DELETE_MARK, log_ptr, mtr);
- mach_write_to_1(log_ptr, val);
- log_ptr++;
-
- mach_write_to_2(log_ptr, page_offset(rec));
- log_ptr += 2;
-
- mlog_close(mtr, log_ptr);
-}
-
-/********************************************************************
-Parses the redo log record for delete marking or unmarking of a secondary
-index record. */
-
-byte*
-btr_cur_parse_del_mark_set_sec_rec(
-/*===============================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page) /* in: page or NULL */
-{
- ulint val;
- ulint offset;
- rec_t* rec;
-
- if (end_ptr < ptr + 3) {
-
- return(NULL);
- }
-
- val = mach_read_from_1(ptr);
- ptr++;
-
- offset = mach_read_from_2(ptr);
- ptr += 2;
-
- ut_a(offset <= UNIV_PAGE_SIZE);
-
- if (page) {
- rec = page + offset;
-
- /* We do not need to reserve btr_search_latch, as the page
- is only being recovered, and there cannot be a hash index to
- it. */
-
- rec_set_deleted_flag(rec, page_is_comp(page), val);
- }
-
- return(ptr);
-}
-
-/***************************************************************
-Sets a secondary index record delete mark to TRUE or FALSE. */
-
-ulint
-btr_cur_del_mark_set_sec_rec(
-/*=========================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT, or error
- number */
- ulint flags, /* in: locking flag */
- btr_cur_t* cursor, /* in: cursor */
- ibool val, /* in: value to set */
- que_thr_t* thr, /* in: query thread */
- mtr_t* mtr) /* in: mtr */
-{
- buf_block_t* block;
- rec_t* rec;
- ulint err;
-
- rec = btr_cur_get_rec(cursor);
-
-#ifdef UNIV_DEBUG
- if (btr_cur_print_record_ops && thr) {
- btr_cur_trx_report(thr_get_trx(thr), cursor->index,
- "del mark ");
- rec_print(stderr, rec, cursor->index);
- }
-#endif /* UNIV_DEBUG */
-
- err = lock_sec_rec_modify_check_and_lock(flags, rec, cursor->index,
- thr);
- if (err != DB_SUCCESS) {
-
- return(err);
- }
-
- block = buf_block_align(rec);
- ut_ad(!!page_is_comp(buf_block_get_frame(block))
- == dict_table_is_comp(cursor->index->table));
-
- if (block->is_hashed) {
- rw_lock_x_lock(&btr_search_latch);
- }
-
- rec_set_deleted_flag(rec, page_is_comp(buf_block_get_frame(block)),
- val);
-
- if (block->is_hashed) {
- rw_lock_x_unlock(&btr_search_latch);
- }
-
- btr_cur_del_mark_set_sec_rec_log(rec, val, mtr);
-
- return(DB_SUCCESS);
-}
-
-/***************************************************************
-Sets a secondary index record delete mark to FALSE. This function is only
-used by the insert buffer insert merge mechanism. */
-
-void
-btr_cur_del_unmark_for_ibuf(
-/*========================*/
- rec_t* rec, /* in: record to delete unmark */
- mtr_t* mtr) /* in: mtr */
-{
- /* We do not need to reserve btr_search_latch, as the page has just
- been read to the buffer pool and there cannot be a hash index to it. */
-
- rec_set_deleted_flag(rec, page_is_comp(buf_frame_align(rec)), FALSE);
-
- btr_cur_del_mark_set_sec_rec_log(rec, FALSE, mtr);
-}
-
-/*==================== B-TREE RECORD REMOVE =========================*/
-
-/*****************************************************************
-Tries to compress a page of the tree on the leaf level. It is assumed
-that mtr holds an x-latch on the tree and on the cursor page. To avoid
-deadlocks, mtr must also own x-latches to brothers of page, if those
-brothers exist. NOTE: it is assumed that the caller has reserved enough
-free extents so that the compression will always succeed if done! */
-
-void
-btr_cur_compress(
-/*=============*/
- btr_cur_t* cursor, /* in: cursor on the page to compress;
- cursor does not stay valid */
- mtr_t* mtr) /* in: mtr */
-{
- ut_ad(mtr_memo_contains(mtr,
- dict_index_get_lock(btr_cur_get_index(cursor)),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(btr_cur_get_rec(cursor)),
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(btr_page_get_level(btr_cur_get_page(cursor), mtr) == 0);
-
- btr_compress(cursor, mtr);
-}
-
-/*****************************************************************
-Tries to compress a page of the tree if it seems useful. It is assumed
-that mtr holds an x-latch on the tree and on the cursor page. To avoid
-deadlocks, mtr must also own x-latches to brothers of page, if those
-brothers exist. NOTE: it is assumed that the caller has reserved enough
-free extents so that the compression will always succeed if done! */
-
-ibool
-btr_cur_compress_if_useful(
-/*=======================*/
- /* out: TRUE if compression occurred */
- btr_cur_t* cursor, /* in: cursor on the page to compress;
- cursor does not stay valid if compression
- occurs */
- mtr_t* mtr) /* in: mtr */
-{
- ut_ad(mtr_memo_contains(mtr,
- dict_index_get_lock(btr_cur_get_index(cursor)),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(btr_cur_get_rec(cursor)),
- MTR_MEMO_PAGE_X_FIX));
-
- if (btr_cur_compress_recommendation(cursor, mtr)) {
-
- btr_compress(cursor, mtr);
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/***********************************************************
-Removes the record on which the tree cursor is positioned on a leaf page.
-It is assumed that the mtr has an x-latch on the page where the cursor is
-positioned, but no latch on the whole tree. */
-
-ibool
-btr_cur_optimistic_delete(
-/*======================*/
- /* out: TRUE if success, i.e., the page
- did not become too empty */
- btr_cur_t* cursor, /* in: cursor on leaf page, on the record to
- delete; cursor stays valid: if deletion
- succeeds, on function exit it points to the
- successor of the deleted record */
- mtr_t* mtr) /* in: mtr */
-{
- page_t* page;
- ulint max_ins_size;
- rec_t* rec;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- ibool no_compress_needed;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- ut_ad(mtr_memo_contains(mtr, buf_block_align(btr_cur_get_rec(cursor)),
- MTR_MEMO_PAGE_X_FIX));
- /* This is intended only for leaf page deletions */
-
- page = btr_cur_get_page(cursor);
-
- ut_ad(btr_page_get_level(page, mtr) == 0);
-
- rec = btr_cur_get_rec(cursor);
- offsets = rec_get_offsets(rec, cursor->index, offsets,
- ULINT_UNDEFINED, &heap);
-
- no_compress_needed = !rec_offs_any_extern(offsets)
- && btr_cur_can_delete_without_compress(
- cursor, rec_offs_size(offsets), mtr);
-
- if (no_compress_needed) {
-
- lock_update_delete(rec);
-
- btr_search_update_hash_on_delete(cursor);
-
- max_ins_size = page_get_max_insert_size_after_reorganize(
- page, 1);
- page_cur_delete_rec(btr_cur_get_page_cur(cursor),
- cursor->index, offsets, mtr);
-
- ibuf_update_free_bits_low(cursor->index, page, max_ins_size,
- mtr);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- return(no_compress_needed);
-}
-
-/*****************************************************************
-Removes the record on which the tree cursor is positioned. Tries
-to compress the page if its fillfactor drops below a threshold
-or if it is the only page on the level. It is assumed that mtr holds
-an x-latch on the tree and on the cursor page. To avoid deadlocks,
-mtr must also own x-latches to brothers of page, if those brothers
-exist. */
-
-ibool
-btr_cur_pessimistic_delete(
-/*=======================*/
- /* out: TRUE if compression occurred */
- ulint* err, /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
- the latter may occur because we may have
- to update node pointers on upper levels,
- and in the case of variable length keys
- these may actually grow in size */
- ibool has_reserved_extents, /* in: TRUE if the
- caller has already reserved enough free
- extents so that he knows that the operation
- will succeed */
- btr_cur_t* cursor, /* in: cursor on the record to delete;
- if compression does not occur, the cursor
- stays valid: it points to successor of
- deleted record on function exit */
- ibool in_rollback,/* in: TRUE if called in rollback */
- mtr_t* mtr) /* in: mtr */
-{
- page_t* page;
- dict_index_t* index;
- rec_t* rec;
- dtuple_t* node_ptr;
- ulint n_extents = 0;
- ulint n_reserved;
- ibool success;
- ibool ret = FALSE;
- ulint level;
- mem_heap_t* heap;
- ulint* offsets;
-
- page = btr_cur_get_page(cursor);
- index = btr_cur_get_index(cursor);
-
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- if (!has_reserved_extents) {
- /* First reserve enough free space for the file segments
- of the index tree, so that the node pointer updates will
- not fail because of lack of space */
-
- n_extents = cursor->tree_height / 32 + 1;
-
- success = fsp_reserve_free_extents(&n_reserved,
- index->space,
- n_extents,
- FSP_CLEANING, mtr);
- if (!success) {
- *err = DB_OUT_OF_FILE_SPACE;
-
- return(FALSE);
- }
- }
-
- heap = mem_heap_create(1024);
- rec = btr_cur_get_rec(cursor);
-
- offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
-
- /* Free externally stored fields if the record is neither
- a node pointer nor in two-byte format.
- This avoids an unnecessary loop. */
- if (page_is_comp(page)
- ? !rec_get_node_ptr_flag(rec)
- : !rec_get_1byte_offs_flag(rec)) {
- btr_rec_free_externally_stored_fields(index,
- rec, offsets,
- in_rollback, mtr);
- }
-
- if (UNIV_UNLIKELY(page_get_n_recs(page) < 2)
- && UNIV_UNLIKELY(dict_index_get_page(btr_cur_get_index(cursor))
- != buf_frame_get_page_no(page))) {
-
- /* If there is only one record, drop the whole page in
- btr_discard_page, if this is not the root page */
-
- btr_discard_page(cursor, mtr);
-
- *err = DB_SUCCESS;
- ret = TRUE;
-
- goto return_after_reservations;
- }
-
- lock_update_delete(rec);
- level = btr_page_get_level(page, mtr);
-
- if (level > 0
- && UNIV_UNLIKELY(rec == page_rec_get_next(
- page_get_infimum_rec(page)))) {
-
- rec_t* next_rec = page_rec_get_next(rec);
-
- if (btr_page_get_prev(page, mtr) == FIL_NULL) {
-
- /* If we delete the leftmost node pointer on a
- non-leaf level, we must mark the new leftmost node
- pointer as the predefined minimum record */
-
- btr_set_min_rec_mark(next_rec, page_is_comp(page),
- mtr);
- } else {
- /* Otherwise, if we delete the leftmost node pointer
- on a page, we have to change the father node pointer
- so that it is equal to the new leftmost node pointer
- on the page */
-
- btr_node_ptr_delete(index, page, mtr);
-
- node_ptr = dict_index_build_node_ptr(
- index, next_rec, buf_frame_get_page_no(page),
- heap, level);
-
- btr_insert_on_non_leaf_level(index,
- level + 1, node_ptr, mtr);
- }
- }
-
- btr_search_update_hash_on_delete(cursor);
-
- page_cur_delete_rec(btr_cur_get_page_cur(cursor), index, offsets, mtr);
-
- ut_ad(btr_check_node_ptr(index, page, mtr));
-
- *err = DB_SUCCESS;
-
-return_after_reservations:
- mem_heap_free(heap);
-
- if (ret == FALSE) {
- ret = btr_cur_compress_if_useful(cursor, mtr);
- }
-
- if (n_extents > 0) {
- fil_space_release_free_extents(index->space, n_reserved);
- }
-
- return(ret);
-}
-
-/***********************************************************************
-Adds path information to the cursor for the current page, for which
-the binary search has been performed. */
-static
-void
-btr_cur_add_path_info(
-/*==================*/
- btr_cur_t* cursor, /* in: cursor positioned on a page */
- ulint height, /* in: height of the page in tree;
- 0 means leaf node */
- ulint root_height) /* in: root node height in tree */
-{
- btr_path_t* slot;
- rec_t* rec;
-
- ut_a(cursor->path_arr);
-
- if (root_height >= BTR_PATH_ARRAY_N_SLOTS - 1) {
- /* Do nothing; return empty path */
-
- slot = cursor->path_arr;
- slot->nth_rec = ULINT_UNDEFINED;
-
- return;
- }
-
- if (height == 0) {
- /* Mark end of slots for path */
- slot = cursor->path_arr + root_height + 1;
- slot->nth_rec = ULINT_UNDEFINED;
- }
-
- rec = btr_cur_get_rec(cursor);
-
- slot = cursor->path_arr + (root_height - height);
-
- slot->nth_rec = page_rec_get_n_recs_before(rec);
- slot->n_recs = page_get_n_recs(buf_frame_align(rec));
-}
-
-/***********************************************************************
-Estimates the number of rows in a given index range. */
-
-ib_longlong
-btr_estimate_n_rows_in_range(
-/*=========================*/
- /* out: estimated number of rows */
- dict_index_t* index, /* in: index */
- dtuple_t* tuple1, /* in: range start, may also be empty tuple */
- ulint mode1, /* in: search mode for range start */
- dtuple_t* tuple2, /* in: range end, may also be empty tuple */
- ulint mode2) /* in: search mode for range end */
-{
- btr_path_t path1[BTR_PATH_ARRAY_N_SLOTS];
- btr_path_t path2[BTR_PATH_ARRAY_N_SLOTS];
- btr_cur_t cursor;
- btr_path_t* slot1;
- btr_path_t* slot2;
- ibool diverged;
- ibool diverged_lot;
- ulint divergence_level;
- ib_longlong n_rows;
- ulint i;
- mtr_t mtr;
-
- mtr_start(&mtr);
-
- cursor.path_arr = path1;
-
- if (dtuple_get_n_fields(tuple1) > 0) {
-
- btr_cur_search_to_nth_level(index, 0, tuple1, mode1,
- BTR_SEARCH_LEAF | BTR_ESTIMATE,
- &cursor, 0, &mtr);
- } else {
- btr_cur_open_at_index_side(TRUE, index,
- BTR_SEARCH_LEAF | BTR_ESTIMATE,
- &cursor, &mtr);
- }
-
- mtr_commit(&mtr);
-
- mtr_start(&mtr);
-
- cursor.path_arr = path2;
-
- if (dtuple_get_n_fields(tuple2) > 0) {
-
- btr_cur_search_to_nth_level(index, 0, tuple2, mode2,
- BTR_SEARCH_LEAF | BTR_ESTIMATE,
- &cursor, 0, &mtr);
- } else {
- btr_cur_open_at_index_side(FALSE, index,
- BTR_SEARCH_LEAF | BTR_ESTIMATE,
- &cursor, &mtr);
- }
-
- mtr_commit(&mtr);
-
- /* We have the path information for the range in path1 and path2 */
-
- n_rows = 1;
- diverged = FALSE; /* This becomes true when the path is not
- the same any more */
- diverged_lot = FALSE; /* This becomes true when the paths are
- not the same or adjacent any more */
- divergence_level = 1000000; /* This is the level where paths diverged
- a lot */
- for (i = 0; ; i++) {
- ut_ad(i < BTR_PATH_ARRAY_N_SLOTS);
-
- slot1 = path1 + i;
- slot2 = path2 + i;
-
- if (slot1->nth_rec == ULINT_UNDEFINED
- || slot2->nth_rec == ULINT_UNDEFINED) {
-
- if (i > divergence_level + 1) {
- /* In trees whose height is > 1 our algorithm
- tends to underestimate: multiply the estimate
- by 2: */
-
- n_rows = n_rows * 2;
- }
-
- /* Do not estimate the number of rows in the range
- to over 1 / 2 of the estimated rows in the whole
- table */
-
- if (n_rows > index->table->stat_n_rows / 2) {
- n_rows = index->table->stat_n_rows / 2;
-
- /* If there are just 0 or 1 rows in the table,
- then we estimate all rows are in the range */
-
- if (n_rows == 0) {
- n_rows = index->table->stat_n_rows;
- }
- }
-
- return(n_rows);
- }
-
- if (!diverged && slot1->nth_rec != slot2->nth_rec) {
-
- diverged = TRUE;
-
- if (slot1->nth_rec < slot2->nth_rec) {
- n_rows = slot2->nth_rec - slot1->nth_rec;
-
- if (n_rows > 1) {
- diverged_lot = TRUE;
- divergence_level = i;
- }
- } else {
- /* Maybe the tree has changed between
- searches */
-
- return(10);
- }
-
- } else if (diverged && !diverged_lot) {
-
- if (slot1->nth_rec < slot1->n_recs
- || slot2->nth_rec > 1) {
-
- diverged_lot = TRUE;
- divergence_level = i;
-
- n_rows = 0;
-
- if (slot1->nth_rec < slot1->n_recs) {
- n_rows += slot1->n_recs
- - slot1->nth_rec;
- }
-
- if (slot2->nth_rec > 1) {
- n_rows += slot2->nth_rec - 1;
- }
- }
- } else if (diverged_lot) {
-
- n_rows = (n_rows * (slot1->n_recs + slot2->n_recs))
- / 2;
- }
- }
-}
-
-/***********************************************************************
-Estimates the number of different key values in a given index, for
-each n-column prefix of the index where n <= dict_index_get_n_unique(index).
-The estimates are stored in the array index->stat_n_diff_key_vals. */
-
-void
-btr_estimate_number_of_different_key_vals(
-/*======================================*/
- dict_index_t* index) /* in: index */
-{
- btr_cur_t cursor;
- page_t* page;
- rec_t* rec;
- ulint n_cols;
- ulint matched_fields;
- ulint matched_bytes;
- ib_longlong* n_diff;
- ulint not_empty_flag = 0;
- ulint total_external_size = 0;
- ulint i;
- ulint j;
- ulint add_on;
- mtr_t mtr;
- mem_heap_t* heap = NULL;
- ulint offsets_rec_[REC_OFFS_NORMAL_SIZE];
- ulint offsets_next_rec_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets_rec = offsets_rec_;
- ulint* offsets_next_rec= offsets_next_rec_;
- *offsets_rec_ = (sizeof offsets_rec_) / sizeof *offsets_rec_;
- *offsets_next_rec_
- = (sizeof offsets_next_rec_) / sizeof *offsets_next_rec_;
-
- n_cols = dict_index_get_n_unique(index);
-
- n_diff = mem_alloc((n_cols + 1) * sizeof(ib_longlong));
-
- memset(n_diff, 0, (n_cols + 1) * sizeof(ib_longlong));
-
- /* We sample some pages in the index to get an estimate */
-
- for (i = 0; i < BTR_KEY_VAL_ESTIMATE_N_PAGES; i++) {
- rec_t* supremum;
- mtr_start(&mtr);
-
- btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, &cursor, &mtr);
-
- /* Count the number of different key values for each prefix of
- the key on this index page. If the prefix does not determine
- the index record uniquely in te B-tree, then we subtract one
- because otherwise our algorithm would give a wrong estimate
- for an index where there is just one key value. */
-
- page = btr_cur_get_page(&cursor);
-
- supremum = page_get_supremum_rec(page);
- rec = page_rec_get_next(page_get_infimum_rec(page));
-
- if (rec != supremum) {
- not_empty_flag = 1;
- offsets_rec = rec_get_offsets(rec, index, offsets_rec,
- ULINT_UNDEFINED, &heap);
- }
-
- while (rec != supremum) {
- rec_t* next_rec = page_rec_get_next(rec);
- if (next_rec == supremum) {
- break;
- }
-
- matched_fields = 0;
- matched_bytes = 0;
- offsets_next_rec = rec_get_offsets(next_rec, index,
- offsets_next_rec,
- n_cols, &heap);
-
- cmp_rec_rec_with_match(rec, next_rec,
- offsets_rec, offsets_next_rec,
- index, &matched_fields,
- &matched_bytes);
-
- for (j = matched_fields + 1; j <= n_cols; j++) {
- /* We add one if this index record has
- a different prefix from the previous */
-
- n_diff[j]++;
- }
-
- total_external_size
- += btr_rec_get_externally_stored_len(
- rec, offsets_rec);
-
- rec = next_rec;
- /* Initialize offsets_rec for the next round
- and assign the old offsets_rec buffer to
- offsets_next_rec. */
- {
- ulint* offsets_tmp = offsets_rec;
- offsets_rec = offsets_next_rec;
- offsets_next_rec = offsets_tmp;
- }
- }
-
-
- if (n_cols == dict_index_get_n_unique_in_tree(index)) {
-
- /* If there is more than one leaf page in the tree,
- we add one because we know that the first record
- on the page certainly had a different prefix than the
- last record on the previous index page in the
- alphabetical order. Before this fix, if there was
- just one big record on each clustered index page, the
- algorithm grossly underestimated the number of rows
- in the table. */
-
- if (btr_page_get_prev(page, &mtr) != FIL_NULL
- || btr_page_get_next(page, &mtr) != FIL_NULL) {
-
- n_diff[n_cols]++;
- }
- }
-
- offsets_rec = rec_get_offsets(rec, index, offsets_rec,
- ULINT_UNDEFINED, &heap);
- total_external_size += btr_rec_get_externally_stored_len(
- rec, offsets_rec);
- mtr_commit(&mtr);
- }
-
- /* If we saw k borders between different key values on
- BTR_KEY_VAL_ESTIMATE_N_PAGES leaf pages, we can estimate how many
- there will be in index->stat_n_leaf_pages */
-
- /* We must take into account that our sample actually represents
- also the pages used for external storage of fields (those pages are
- included in index->stat_n_leaf_pages) */
-
- for (j = 0; j <= n_cols; j++) {
- index->stat_n_diff_key_vals[j]
- = ((n_diff[j]
- * (ib_longlong)index->stat_n_leaf_pages
- + BTR_KEY_VAL_ESTIMATE_N_PAGES - 1
- + total_external_size
- + not_empty_flag)
- / (BTR_KEY_VAL_ESTIMATE_N_PAGES
- + total_external_size));
-
- /* If the tree is small, smaller than
- 10 * BTR_KEY_VAL_ESTIMATE_N_PAGES + total_external_size, then
- the above estimate is ok. For bigger trees it is common that we
- do not see any borders between key values in the few pages
- we pick. But still there may be BTR_KEY_VAL_ESTIMATE_N_PAGES
- different key values, or even more. Let us try to approximate
- that: */
-
- add_on = index->stat_n_leaf_pages
- / (10 * (BTR_KEY_VAL_ESTIMATE_N_PAGES
- + total_external_size));
-
- if (add_on > BTR_KEY_VAL_ESTIMATE_N_PAGES) {
- add_on = BTR_KEY_VAL_ESTIMATE_N_PAGES;
- }
-
- index->stat_n_diff_key_vals[j] += add_on;
- }
-
- mem_free(n_diff);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-}
-
-/*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/
-
-/***************************************************************
-Gets the externally stored size of a record, in units of a database page. */
-static
-ulint
-btr_rec_get_externally_stored_len(
-/*==============================*/
- /* out: externally stored part,
- in units of a database page */
- rec_t* rec, /* in: record */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
-{
- ulint n_fields;
- byte* data;
- ulint local_len;
- ulint extern_len;
- ulint total_extern_len = 0;
- ulint i;
-
- ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec));
- n_fields = rec_offs_n_fields(offsets);
-
- for (i = 0; i < n_fields; i++) {
- if (rec_offs_nth_extern(offsets, i)) {
-
- data = rec_get_nth_field(rec, offsets, i, &local_len);
-
- local_len -= BTR_EXTERN_FIELD_REF_SIZE;
-
- extern_len = mach_read_from_4(data + local_len
- + BTR_EXTERN_LEN + 4);
-
- total_extern_len += ut_calc_align(extern_len,
- UNIV_PAGE_SIZE);
- }
- }
-
- return(total_extern_len / UNIV_PAGE_SIZE);
-}
-
-/***********************************************************************
-Sets the ownership bit of an externally stored field in a record. */
-static
-void
-btr_cur_set_ownership_of_extern_field(
-/*==================================*/
- rec_t* rec, /* in: clustered index record */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint i, /* in: field number */
- ibool val, /* in: value to set */
- mtr_t* mtr) /* in: mtr */
-{
- byte* data;
- ulint local_len;
- ulint byte_val;
-
- data = rec_get_nth_field(rec, offsets, i, &local_len);
-
- ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
-
- local_len -= BTR_EXTERN_FIELD_REF_SIZE;
-
- byte_val = mach_read_from_1(data + local_len + BTR_EXTERN_LEN);
-
- if (val) {
- byte_val = byte_val & (~BTR_EXTERN_OWNER_FLAG);
- } else {
- byte_val = byte_val | BTR_EXTERN_OWNER_FLAG;
- }
-
- mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, byte_val,
- MLOG_1BYTE, mtr);
-}
-
-/***********************************************************************
-Marks not updated extern fields as not-owned by this record. The ownership
-is transferred to the updated record which is inserted elsewhere in the
-index tree. In purge only the owner of externally stored field is allowed
-to free the field. */
-
-void
-btr_cur_mark_extern_inherited_fields(
-/*=================================*/
- rec_t* rec, /* in: record in a clustered index */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- upd_t* update, /* in: update vector */
- mtr_t* mtr) /* in: mtr */
-{
- ibool is_updated;
- ulint n;
- ulint j;
- ulint i;
-
- ut_ad(rec_offs_validate(rec, NULL, offsets));
- ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec));
- n = rec_offs_n_fields(offsets);
-
- for (i = 0; i < n; i++) {
- if (rec_offs_nth_extern(offsets, i)) {
-
- /* Check it is not in updated fields */
- is_updated = FALSE;
-
- if (update) {
- for (j = 0; j < upd_get_n_fields(update);
- j++) {
- if (upd_get_nth_field(update, j)
- ->field_no == i) {
- is_updated = TRUE;
- }
- }
- }
-
- if (!is_updated) {
- btr_cur_set_ownership_of_extern_field(
- rec, offsets, i, FALSE, mtr);
- }
- }
- }
-}
-
-/***********************************************************************
-The complement of the previous function: in an update entry may inherit
-some externally stored fields from a record. We must mark them as inherited
-in entry, so that they are not freed in a rollback. */
-
-void
-btr_cur_mark_dtuple_inherited_extern(
-/*=================================*/
- dtuple_t* entry, /* in: updated entry to be inserted to
- clustered index */
- ulint* ext_vec, /* in: array of extern fields in the
- original record */
- ulint n_ext_vec, /* in: number of elements in ext_vec */
- upd_t* update) /* in: update vector */
-{
- dfield_t* dfield;
- ulint byte_val;
- byte* data;
- ulint len;
- ibool is_updated;
- ulint j;
- ulint i;
-
- if (ext_vec == NULL) {
-
- return;
- }
-
- for (i = 0; i < n_ext_vec; i++) {
-
- /* Check ext_vec[i] is in updated fields */
- is_updated = FALSE;
-
- for (j = 0; j < upd_get_n_fields(update); j++) {
- if (upd_get_nth_field(update, j)->field_no
- == ext_vec[i]) {
- is_updated = TRUE;
- }
- }
-
- if (!is_updated) {
- dfield = dtuple_get_nth_field(entry, ext_vec[i]);
-
- data = (byte*) dfield_get_data(dfield);
- len = dfield_get_len(dfield);
-
- len -= BTR_EXTERN_FIELD_REF_SIZE;
-
- byte_val = mach_read_from_1(data + len
- + BTR_EXTERN_LEN);
-
- byte_val = byte_val | BTR_EXTERN_INHERITED_FLAG;
-
- mach_write_to_1(data + len + BTR_EXTERN_LEN, byte_val);
- }
- }
-}
-
-/***********************************************************************
-Marks all extern fields in a record as owned by the record. This function
-should be called if the delete mark of a record is removed: a not delete
-marked record always owns all its extern fields. */
-static
-void
-btr_cur_unmark_extern_fields(
-/*=========================*/
- rec_t* rec, /* in: record in a clustered index */
- mtr_t* mtr, /* in: mtr */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
-{
- ulint n;
- ulint i;
-
- ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec));
- n = rec_offs_n_fields(offsets);
-
- for (i = 0; i < n; i++) {
- if (rec_offs_nth_extern(offsets, i)) {
-
- btr_cur_set_ownership_of_extern_field(rec, offsets, i,
- TRUE, mtr);
- }
- }
-}
-
-/***********************************************************************
-Marks all extern fields in a dtuple as owned by the record. */
-
-void
-btr_cur_unmark_dtuple_extern_fields(
-/*================================*/
- dtuple_t* entry, /* in: clustered index entry */
- ulint* ext_vec, /* in: array of numbers of fields
- which have been stored externally */
- ulint n_ext_vec) /* in: number of elements in ext_vec */
-{
- dfield_t* dfield;
- ulint byte_val;
- byte* data;
- ulint len;
- ulint i;
-
- for (i = 0; i < n_ext_vec; i++) {
- dfield = dtuple_get_nth_field(entry, ext_vec[i]);
-
- data = (byte*) dfield_get_data(dfield);
- len = dfield_get_len(dfield);
-
- len -= BTR_EXTERN_FIELD_REF_SIZE;
-
- byte_val = mach_read_from_1(data + len + BTR_EXTERN_LEN);
-
- byte_val = byte_val & (~BTR_EXTERN_OWNER_FLAG);
-
- mach_write_to_1(data + len + BTR_EXTERN_LEN, byte_val);
- }
-}
-
-/***********************************************************************
-Stores the positions of the fields marked as extern storage in the update
-vector, and also those fields who are marked as extern storage in rec
-and not mentioned in updated fields. We use this function to remember
-which fields we must mark as extern storage in a record inserted for an
-update. */
-
-ulint
-btr_push_update_extern_fields(
-/*==========================*/
- /* out: number of values stored in ext_vect */
- ulint* ext_vect,/* in: array of ulints, must be preallocated
- to have space for all fields in rec */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- upd_t* update) /* in: update vector or NULL */
-{
- ulint n_pushed = 0;
- ibool is_updated;
- ulint n;
- ulint j;
- ulint i;
-
- if (update) {
- n = upd_get_n_fields(update);
-
- for (i = 0; i < n; i++) {
-
- if (upd_get_nth_field(update, i)->extern_storage) {
-
- ext_vect[n_pushed] = upd_get_nth_field(
- update, i)->field_no;
-
- n_pushed++;
- }
- }
- }
-
- n = rec_offs_n_fields(offsets);
-
- for (i = 0; i < n; i++) {
- if (rec_offs_nth_extern(offsets, i)) {
-
- /* Check it is not in updated fields */
- is_updated = FALSE;
-
- if (update) {
- for (j = 0; j < upd_get_n_fields(update);
- j++) {
- if (upd_get_nth_field(update, j)
- ->field_no == i) {
- is_updated = TRUE;
- }
- }
- }
-
- if (!is_updated) {
- ext_vect[n_pushed] = i;
- n_pushed++;
- }
- }
- }
-
- return(n_pushed);
-}
-
-/***********************************************************************
-Returns the length of a BLOB part stored on the header page. */
-static
-ulint
-btr_blob_get_part_len(
-/*==================*/
- /* out: part length */
- byte* blob_header) /* in: blob header */
-{
- return(mach_read_from_4(blob_header + BTR_BLOB_HDR_PART_LEN));
-}
-
-/***********************************************************************
-Returns the page number where the next BLOB part is stored. */
-static
-ulint
-btr_blob_get_next_page_no(
-/*======================*/
- /* out: page number or FIL_NULL if
- no more pages */
- byte* blob_header) /* in: blob header */
-{
- return(mach_read_from_4(blob_header + BTR_BLOB_HDR_NEXT_PAGE_NO));
-}
-
-/***********************************************************************
-Stores the fields in big_rec_vec to the tablespace and puts pointers to
-them in rec. The fields are stored on pages allocated from leaf node
-file segment of the index tree. */
-
-ulint
-btr_store_big_rec_extern_fields(
-/*============================*/
- /* out: DB_SUCCESS or error */
- dict_index_t* index, /* in: index of rec; the index tree
- MUST be X-latched */
- rec_t* rec, /* in: record */
- const ulint* offsets, /* in: rec_get_offsets(rec, index);
- the "external storage" flags in offsets
- will not correspond to rec when
- this function returns */
- big_rec_t* big_rec_vec, /* in: vector containing fields
- to be stored externally */
- mtr_t* local_mtr __attribute__((unused))) /* in: mtr
- containing the latch to rec and to the
- tree */
-{
- byte* data;
- ulint local_len;
- ulint extern_len;
- ulint store_len;
- ulint page_no;
- page_t* page;
- ulint space_id;
- page_t* prev_page;
- page_t* rec_page;
- ulint prev_page_no;
- ulint hint_page_no;
- ulint i;
- mtr_t mtr;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
- ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(local_mtr, buf_block_align(rec),
- MTR_MEMO_PAGE_X_FIX));
- ut_a(index->type & DICT_CLUSTERED);
-
- space_id = buf_frame_get_space_id(rec);
-
- /* We have to create a file segment to the tablespace
- for each field and put the pointer to the field in rec */
-
- for (i = 0; i < big_rec_vec->n_fields; i++) {
-
- data = rec_get_nth_field(rec, offsets,
- big_rec_vec->fields[i].field_no,
- &local_len);
- ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
- local_len -= BTR_EXTERN_FIELD_REF_SIZE;
- extern_len = big_rec_vec->fields[i].len;
-
- ut_a(extern_len > 0);
-
- prev_page_no = FIL_NULL;
-
- while (extern_len > 0) {
- mtr_start(&mtr);
-
- if (prev_page_no == FIL_NULL) {
- hint_page_no = buf_frame_get_page_no(rec) + 1;
- } else {
- hint_page_no = prev_page_no + 1;
- }
-
- page = btr_page_alloc(index, hint_page_no,
- FSP_NO_DIR, 0, &mtr);
- if (page == NULL) {
-
- mtr_commit(&mtr);
-
- return(DB_OUT_OF_FILE_SPACE);
- }
-
- mlog_write_ulint(page + FIL_PAGE_TYPE,
- FIL_PAGE_TYPE_BLOB,
- MLOG_2BYTES, &mtr);
-
- page_no = buf_frame_get_page_no(page);
-
- if (prev_page_no != FIL_NULL) {
- prev_page = buf_page_get(space_id,
- prev_page_no,
- RW_X_LATCH, &mtr);
-
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(prev_page,
- SYNC_EXTERN_STORAGE);
-#endif /* UNIV_SYNC_DEBUG */
-
- mlog_write_ulint(prev_page + FIL_PAGE_DATA
- + BTR_BLOB_HDR_NEXT_PAGE_NO,
- page_no, MLOG_4BYTES, &mtr);
- }
-
- if (extern_len > (UNIV_PAGE_SIZE - FIL_PAGE_DATA
- - BTR_BLOB_HDR_SIZE
- - FIL_PAGE_DATA_END)) {
- store_len = UNIV_PAGE_SIZE - FIL_PAGE_DATA
- - BTR_BLOB_HDR_SIZE
- - FIL_PAGE_DATA_END;
- } else {
- store_len = extern_len;
- }
-
- mlog_write_string(page + FIL_PAGE_DATA
- + BTR_BLOB_HDR_SIZE,
- big_rec_vec->fields[i].data
- + big_rec_vec->fields[i].len
- - extern_len,
- store_len, &mtr);
- mlog_write_ulint(page + FIL_PAGE_DATA
- + BTR_BLOB_HDR_PART_LEN,
- store_len, MLOG_4BYTES, &mtr);
- mlog_write_ulint(page + FIL_PAGE_DATA
- + BTR_BLOB_HDR_NEXT_PAGE_NO,
- FIL_NULL, MLOG_4BYTES, &mtr);
-
- extern_len -= store_len;
-
- rec_page = buf_page_get(space_id,
- buf_frame_get_page_no(data),
- RW_X_LATCH, &mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(rec_page, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
- mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, 0,
- MLOG_4BYTES, &mtr);
- mlog_write_ulint(data + local_len + BTR_EXTERN_LEN + 4,
- big_rec_vec->fields[i].len
- - extern_len,
- MLOG_4BYTES, &mtr);
-
- if (prev_page_no == FIL_NULL) {
- mlog_write_ulint(data + local_len
- + BTR_EXTERN_SPACE_ID,
- space_id,
- MLOG_4BYTES, &mtr);
-
- mlog_write_ulint(data + local_len
- + BTR_EXTERN_PAGE_NO,
- page_no,
- MLOG_4BYTES, &mtr);
-
- mlog_write_ulint(data + local_len
- + BTR_EXTERN_OFFSET,
- FIL_PAGE_DATA,
- MLOG_4BYTES, &mtr);
-
- /* Set the bit denoting that this field
- in rec is stored externally */
-
- rec_set_nth_field_extern_bit(
- rec, index,
- big_rec_vec->fields[i].field_no,
- TRUE, &mtr);
- }
-
- prev_page_no = page_no;
-
- mtr_commit(&mtr);
- }
- }
-
- return(DB_SUCCESS);
-}
-
-/***********************************************************************
-Frees the space in an externally stored field to the file space
-management if the field in data is owned the externally stored field,
-in a rollback we may have the additional condition that the field must
-not be inherited. */
-
-void
-btr_free_externally_stored_field(
-/*=============================*/
- dict_index_t* index, /* in: index of the data, the index
- tree MUST be X-latched; if the tree
- height is 1, then also the root page
- must be X-latched! (this is relevant
- in the case this function is called
- from purge where 'data' is located on
- an undo log page, not an index
- page) */
- byte* data, /* in: internally stored data
- + reference to the externally
- stored part */
- ulint local_len, /* in: length of data */
- ibool do_not_free_inherited,/* in: TRUE if called in a
- rollback and we do not want to free
- inherited fields */
- mtr_t* local_mtr __attribute__((unused))) /* in: mtr
- containing the latch to data an an
- X-latch to the index tree */
-{
- page_t* page;
- page_t* rec_page;
- ulint space_id;
- ulint page_no;
- ulint offset;
- ulint extern_len;
- ulint next_page_no;
- ulint part_len;
- mtr_t mtr;
-
- ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
- ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(local_mtr, buf_block_align(data),
- MTR_MEMO_PAGE_X_FIX));
- ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
- local_len -= BTR_EXTERN_FIELD_REF_SIZE;
-
- for (;;) {
- mtr_start(&mtr);
-
- rec_page = buf_page_get(buf_frame_get_space_id(data),
- buf_frame_get_page_no(data),
- RW_X_LATCH, &mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(rec_page, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
- space_id = mach_read_from_4(data + local_len
- + BTR_EXTERN_SPACE_ID);
-
- page_no = mach_read_from_4(data + local_len
- + BTR_EXTERN_PAGE_NO);
-
- offset = mach_read_from_4(data + local_len
- + BTR_EXTERN_OFFSET);
- extern_len = mach_read_from_4(data + local_len
- + BTR_EXTERN_LEN + 4);
-
- /* If extern len is 0, then there is no external storage data
- at all */
-
- if (extern_len == 0) {
-
- mtr_commit(&mtr);
-
- return;
- }
-
- if (mach_read_from_1(data + local_len + BTR_EXTERN_LEN)
- & BTR_EXTERN_OWNER_FLAG) {
- /* This field does not own the externally
- stored field: do not free! */
-
- mtr_commit(&mtr);
-
- return;
- }
-
- if (do_not_free_inherited
- && mach_read_from_1(data + local_len + BTR_EXTERN_LEN)
- & BTR_EXTERN_INHERITED_FLAG) {
- /* Rollback and inherited field: do not free! */
-
- mtr_commit(&mtr);
-
- return;
- }
-
- page = buf_page_get(space_id, page_no, RW_X_LATCH, &mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_EXTERN_STORAGE);
-#endif /* UNIV_SYNC_DEBUG */
- next_page_no = mach_read_from_4(page + FIL_PAGE_DATA
- + BTR_BLOB_HDR_NEXT_PAGE_NO);
-
- part_len = btr_blob_get_part_len(page + FIL_PAGE_DATA);
-
- ut_a(extern_len >= part_len);
-
- /* We must supply the page level (= 0) as an argument
- because we did not store it on the page (we save the space
- overhead from an index page header. */
-
- btr_page_free_low(index, page, 0, &mtr);
-
- mlog_write_ulint(data + local_len + BTR_EXTERN_PAGE_NO,
- next_page_no,
- MLOG_4BYTES, &mtr);
- mlog_write_ulint(data + local_len + BTR_EXTERN_LEN + 4,
- extern_len - part_len,
- MLOG_4BYTES, &mtr);
- if (next_page_no == FIL_NULL) {
- ut_a(extern_len - part_len == 0);
- }
-
- if (extern_len - part_len == 0) {
- ut_a(next_page_no == FIL_NULL);
- }
-
- mtr_commit(&mtr);
- }
-}
-
-/***************************************************************
-Frees the externally stored fields for a record. */
-
-void
-btr_rec_free_externally_stored_fields(
-/*==================================*/
- dict_index_t* index, /* in: index of the data, the index
- tree MUST be X-latched */
- rec_t* rec, /* in: record */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- ibool do_not_free_inherited,/* in: TRUE if called in a
- rollback and we do not want to free
- inherited fields */
- mtr_t* mtr) /* in: mini-transaction handle which contains
- an X-latch to record page and to the index
- tree */
-{
- ulint n_fields;
- byte* data;
- ulint len;
- ulint i;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(rec),
- MTR_MEMO_PAGE_X_FIX));
- /* Free possible externally stored fields in the record */
-
- ut_ad(dict_table_is_comp(index->table) == !!rec_offs_comp(offsets));
- n_fields = rec_offs_n_fields(offsets);
-
- for (i = 0; i < n_fields; i++) {
- if (rec_offs_nth_extern(offsets, i)) {
-
- data = rec_get_nth_field(rec, offsets, i, &len);
- btr_free_externally_stored_field(index, data, len,
- do_not_free_inherited,
- mtr);
- }
- }
-}
-
-/***************************************************************
-Frees the externally stored fields for a record, if the field is mentioned
-in the update vector. */
-static
-void
-btr_rec_free_updated_extern_fields(
-/*===============================*/
- dict_index_t* index, /* in: index of rec; the index tree MUST be
- X-latched */
- rec_t* rec, /* in: record */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- upd_t* update, /* in: update vector */
- ibool do_not_free_inherited,/* in: TRUE if called in a
- rollback and we do not want to free
- inherited fields */
- mtr_t* mtr) /* in: mini-transaction handle which contains
- an X-latch to record page and to the tree */
-{
- upd_field_t* ufield;
- ulint n_fields;
- byte* data;
- ulint len;
- ulint i;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(rec),
- MTR_MEMO_PAGE_X_FIX));
-
- /* Free possible externally stored fields in the record */
-
- n_fields = upd_get_n_fields(update);
-
- for (i = 0; i < n_fields; i++) {
- ufield = upd_get_nth_field(update, i);
-
- if (rec_offs_nth_extern(offsets, ufield->field_no)) {
-
- data = rec_get_nth_field(rec, offsets,
- ufield->field_no, &len);
- btr_free_externally_stored_field(index, data, len,
- do_not_free_inherited,
- mtr);
- }
- }
-}
-
-/***********************************************************************
-Copies an externally stored field of a record to mem heap. Parameter
-data contains a pointer to 'internally' stored part of the field:
-possibly some data, and the reference to the externally stored part in
-the last 20 bytes of data. */
-
-byte*
-btr_copy_externally_stored_field(
-/*=============================*/
- /* out: the whole field copied to heap */
- ulint* len, /* out: length of the whole field */
- byte* data, /* in: 'internally' stored part of the
- field containing also the reference to
- the external part */
- ulint local_len,/* in: length of data */
- mem_heap_t* heap) /* in: mem heap */
-{
- page_t* page;
- ulint space_id;
- ulint page_no;
- ulint offset;
- ulint extern_len;
- byte* blob_header;
- ulint part_len;
- byte* buf;
- ulint copied_len;
- mtr_t mtr;
-
- ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
-
- local_len -= BTR_EXTERN_FIELD_REF_SIZE;
-
- space_id = mach_read_from_4(data + local_len + BTR_EXTERN_SPACE_ID);
-
- page_no = mach_read_from_4(data + local_len + BTR_EXTERN_PAGE_NO);
-
- offset = mach_read_from_4(data + local_len + BTR_EXTERN_OFFSET);
-
- /* Currently a BLOB cannot be bigger that 4 GB; we
- leave the 4 upper bytes in the length field unused */
-
- extern_len = mach_read_from_4(data + local_len + BTR_EXTERN_LEN + 4);
-
- buf = mem_heap_alloc(heap, local_len + extern_len);
-
- ut_memcpy(buf, data, local_len);
- copied_len = local_len;
-
- if (extern_len == 0) {
- *len = copied_len;
-
- return(buf);
- }
-
- for (;;) {
- mtr_start(&mtr);
-
- page = buf_page_get(space_id, page_no, RW_S_LATCH, &mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_EXTERN_STORAGE);
-#endif /* UNIV_SYNC_DEBUG */
- blob_header = page + offset;
-
- part_len = btr_blob_get_part_len(blob_header);
-
- ut_memcpy(buf + copied_len, blob_header + BTR_BLOB_HDR_SIZE,
- part_len);
- copied_len += part_len;
-
- page_no = btr_blob_get_next_page_no(blob_header);
-
- mtr_commit(&mtr);
-
- if (page_no == FIL_NULL) {
- ut_a(copied_len == local_len + extern_len);
-
- *len = copied_len;
-
- return(buf);
- }
-
- /* On other BLOB pages except the first the BLOB header
- always is at the page data start: */
-
- offset = FIL_PAGE_DATA;
-
- ut_a(copied_len < local_len + extern_len);
- }
-}
-
-/***********************************************************************
-Copies an externally stored field of a record to mem heap. */
-
-byte*
-btr_rec_copy_externally_stored_field(
-/*=================================*/
- /* out: the field copied to heap */
- rec_t* rec, /* in: record */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint no, /* in: field number */
- ulint* len, /* out: length of the field */
- mem_heap_t* heap) /* in: mem heap */
-{
- ulint local_len;
- byte* data;
-
- ut_ad(rec_offs_validate(rec, NULL, offsets));
- ut_a(rec_offs_nth_extern(offsets, no));
-
- /* An externally stored field can contain some initial
- data from the field, and in the last 20 bytes it has the
- space id, page number, and offset where the rest of the
- field data is stored, and the data length in addition to
- the data stored locally. We may need to store some data
- locally to get the local record length above the 128 byte
- limit so that field offsets are stored in two bytes, and
- the extern bit is available in those two bytes. */
-
- data = rec_get_nth_field(rec, offsets, no, &local_len);
-
- return(btr_copy_externally_stored_field(len, data, local_len, heap));
-}
diff --git a/storage/innobase/btr/btr0pcur.c b/storage/innobase/btr/btr0pcur.c
deleted file mode 100644
index 65b3c90c809..00000000000
--- a/storage/innobase/btr/btr0pcur.c
+++ /dev/null
@@ -1,565 +0,0 @@
-/******************************************************
-The index tree persistent cursor
-
-(c) 1996 Innobase Oy
-
-Created 2/23/1996 Heikki Tuuri
-*******************************************************/
-
-#include "btr0pcur.h"
-
-#ifdef UNIV_NONINL
-#include "btr0pcur.ic"
-#endif
-
-#include "ut0byte.h"
-#include "rem0cmp.h"
-#include "trx0trx.h"
-
-/******************************************************************
-Allocates memory for a persistent cursor object and initializes the cursor. */
-
-btr_pcur_t*
-btr_pcur_create_for_mysql(void)
-/*============================*/
- /* out, own: persistent cursor */
-{
- btr_pcur_t* pcur;
-
- pcur = mem_alloc(sizeof(btr_pcur_t));
-
- pcur->btr_cur.index = NULL;
- btr_pcur_init(pcur);
-
- return(pcur);
-}
-
-/******************************************************************
-Frees the memory for a persistent cursor object. */
-
-void
-btr_pcur_free_for_mysql(
-/*====================*/
- btr_pcur_t* cursor) /* in, own: persistent cursor */
-{
- if (cursor->old_rec_buf != NULL) {
-
- mem_free(cursor->old_rec_buf);
-
- cursor->old_rec_buf = NULL;
- }
-
- cursor->btr_cur.page_cur.rec = NULL;
- cursor->old_rec = NULL;
- cursor->old_n_fields = 0;
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
-
- cursor->latch_mode = BTR_NO_LATCHES;
- cursor->pos_state = BTR_PCUR_NOT_POSITIONED;
-
- mem_free(cursor);
-}
-
-/******************************************************************
-The position of the cursor is stored by taking an initial segment of the
-record the cursor is positioned on, before, or after, and copying it to the
-cursor data structure, or just setting a flag if the cursor id before the
-first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the
-page where the cursor is positioned must not be empty if the index tree is
-not totally empty! */
-
-void
-btr_pcur_store_position(
-/*====================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr) /* in: mtr */
-{
- page_cur_t* page_cursor;
- rec_t* rec;
- dict_index_t* index;
- page_t* page;
- ulint offs;
-
- ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- index = btr_cur_get_index(btr_pcur_get_btr_cur(cursor));
-
- page_cursor = btr_pcur_get_page_cur(cursor);
-
- rec = page_cur_get_rec(page_cursor);
- page = page_align(rec);
- offs = page_offset(rec);
-
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_S_FIX)
- || mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
- ut_a(cursor->latch_mode != BTR_NO_LATCHES);
-
- if (UNIV_UNLIKELY(page_get_n_recs(page) == 0)) {
- /* It must be an empty index tree; NOTE that in this case
- we do not store the modify_clock, but always do a search
- if we restore the cursor position */
-
- ut_a(btr_page_get_next(page, mtr) == FIL_NULL);
- ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
-
- cursor->old_stored = BTR_PCUR_OLD_STORED;
-
- if (page_rec_is_supremum_low(offs)) {
-
- cursor->rel_pos = BTR_PCUR_AFTER_LAST_IN_TREE;
- } else {
- cursor->rel_pos = BTR_PCUR_BEFORE_FIRST_IN_TREE;
- }
-
- return;
- }
-
- if (page_rec_is_supremum_low(offs)) {
-
- rec = page_rec_get_prev(rec);
-
- cursor->rel_pos = BTR_PCUR_AFTER;
-
- } else if (page_rec_is_infimum_low(offs)) {
-
- rec = page_rec_get_next(rec);
-
- cursor->rel_pos = BTR_PCUR_BEFORE;
- } else {
- cursor->rel_pos = BTR_PCUR_ON;
- }
-
- cursor->old_stored = BTR_PCUR_OLD_STORED;
- cursor->old_rec = dict_index_copy_rec_order_prefix(
- index, rec, &cursor->old_n_fields,
- &cursor->old_rec_buf, &cursor->buf_size);
-
- cursor->block_when_stored = buf_block_align(page);
- cursor->modify_clock = buf_block_get_modify_clock(
- cursor->block_when_stored);
-}
-
-/******************************************************************
-Copies the stored position of a pcur to another pcur. */
-
-void
-btr_pcur_copy_stored_position(
-/*==========================*/
- btr_pcur_t* pcur_receive, /* in: pcur which will receive the
- position info */
- btr_pcur_t* pcur_donate) /* in: pcur from which the info is
- copied */
-{
- if (pcur_receive->old_rec_buf) {
- mem_free(pcur_receive->old_rec_buf);
- }
-
- ut_memcpy(pcur_receive, pcur_donate, sizeof(btr_pcur_t));
-
- if (pcur_donate->old_rec_buf) {
-
- pcur_receive->old_rec_buf = mem_alloc(pcur_donate->buf_size);
-
- ut_memcpy(pcur_receive->old_rec_buf, pcur_donate->old_rec_buf,
- pcur_donate->buf_size);
- pcur_receive->old_rec = pcur_receive->old_rec_buf
- + (pcur_donate->old_rec - pcur_donate->old_rec_buf);
- }
-
- pcur_receive->old_n_fields = pcur_donate->old_n_fields;
-}
-
-/******************************************************************
-Restores the stored position of a persistent cursor bufferfixing the page and
-obtaining the specified latches. If the cursor position was saved when the
-(1) cursor was positioned on a user record: this function restores the position
-to the last record LESS OR EQUAL to the stored record;
-(2) cursor was positioned on a page infimum record: restores the position to
-the last record LESS than the user record which was the successor of the page
-infimum;
-(3) cursor was positioned on the page supremum: restores to the first record
-GREATER than the user record which was the predecessor of the supremum.
-(4) cursor was positioned before the first or after the last in an empty tree:
-restores to before first or after the last in the tree. */
-
-ibool
-btr_pcur_restore_position(
-/*======================*/
- /* out: TRUE if the cursor position
- was stored when it was on a user record
- and it can be restored on a user record
- whose ordering fields are identical to
- the ones of the original user record */
- ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
- btr_pcur_t* cursor, /* in: detached persistent cursor */
- mtr_t* mtr) /* in: mtr */
-{
- dict_index_t* index;
- page_t* page;
- dtuple_t* tuple;
- ulint mode;
- ulint old_mode;
- mem_heap_t* heap;
-
- index = btr_cur_get_index(btr_pcur_get_btr_cur(cursor));
-
- if (UNIV_UNLIKELY(cursor->old_stored != BTR_PCUR_OLD_STORED)
- || UNIV_UNLIKELY(cursor->pos_state != BTR_PCUR_WAS_POSITIONED
- && cursor->pos_state != BTR_PCUR_IS_POSITIONED)) {
- ut_print_buf(stderr, cursor, sizeof(btr_pcur_t));
- if (cursor->trx_if_known) {
- trx_print(stderr, cursor->trx_if_known, 0);
- }
-
- ut_error;
- }
-
- if (UNIV_UNLIKELY(
- cursor->rel_pos == BTR_PCUR_AFTER_LAST_IN_TREE
- || cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE)) {
-
- /* In these cases we do not try an optimistic restoration,
- but always do a search */
-
- btr_cur_open_at_index_side(
- cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE,
- index, latch_mode, btr_pcur_get_btr_cur(cursor), mtr);
-
- cursor->block_when_stored
- = buf_block_align(btr_pcur_get_page(cursor));
-
- return(FALSE);
- }
-
- ut_a(cursor->old_rec);
- ut_a(cursor->old_n_fields);
-
- page = btr_cur_get_page(btr_pcur_get_btr_cur(cursor));
-
- if (UNIV_LIKELY(latch_mode == BTR_SEARCH_LEAF)
- || UNIV_LIKELY(latch_mode == BTR_MODIFY_LEAF)) {
- /* Try optimistic restoration */
-
- if (UNIV_LIKELY(buf_page_optimistic_get(
- latch_mode,
- cursor->block_when_stored, page,
- cursor->modify_clock, mtr))) {
- cursor->pos_state = BTR_PCUR_IS_POSITIONED;
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_TREE_NODE);
-#endif /* UNIV_SYNC_DEBUG */
- if (cursor->rel_pos == BTR_PCUR_ON) {
-#ifdef UNIV_DEBUG
- rec_t* rec;
- ulint* offsets1;
- ulint* offsets2;
-#endif /* UNIV_DEBUG */
- cursor->latch_mode = latch_mode;
-#ifdef UNIV_DEBUG
- rec = btr_pcur_get_rec(cursor);
-
- heap = mem_heap_create(256);
- offsets1 = rec_get_offsets(
- cursor->old_rec, index, NULL,
- cursor->old_n_fields, &heap);
- offsets2 = rec_get_offsets(
- rec, index, NULL,
- cursor->old_n_fields, &heap);
-
- ut_ad(!cmp_rec_rec(cursor->old_rec,
- rec, offsets1, offsets2,
- index));
- mem_heap_free(heap);
-#endif /* UNIV_DEBUG */
- return(TRUE);
- }
-
- return(FALSE);
- }
- }
-
- /* If optimistic restoration did not succeed, open the cursor anew */
-
- heap = mem_heap_create(256);
-
- tuple = dict_index_build_data_tuple(index, cursor->old_rec,
- cursor->old_n_fields, heap);
-
- /* Save the old search mode of the cursor */
- old_mode = cursor->search_mode;
-
- if (UNIV_LIKELY(cursor->rel_pos == BTR_PCUR_ON)) {
- mode = PAGE_CUR_LE;
- } else if (cursor->rel_pos == BTR_PCUR_AFTER) {
- mode = PAGE_CUR_G;
- } else {
- ut_ad(cursor->rel_pos == BTR_PCUR_BEFORE);
- mode = PAGE_CUR_L;
- }
-
- btr_pcur_open_with_no_init(index, tuple, mode, latch_mode,
- cursor, 0, mtr);
-
- /* Restore the old search mode */
- cursor->search_mode = old_mode;
-
- if (cursor->rel_pos == BTR_PCUR_ON
- && btr_pcur_is_on_user_rec(cursor, mtr)
- && 0 == cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor),
- rec_get_offsets(
- btr_pcur_get_rec(cursor), index,
- NULL, ULINT_UNDEFINED, &heap))) {
-
- /* We have to store the NEW value for the modify clock, since
- the cursor can now be on a different page! But we can retain
- the value of old_rec */
-
- cursor->block_when_stored = buf_block_align(
- btr_pcur_get_page(cursor));
- cursor->modify_clock = buf_block_get_modify_clock(
- cursor->block_when_stored);
- cursor->old_stored = BTR_PCUR_OLD_STORED;
-
- mem_heap_free(heap);
-
- return(TRUE);
- }
-
- mem_heap_free(heap);
-
- /* We have to store new position information, modify_clock etc.,
- to the cursor because it can now be on a different page, the record
- under it may have been removed, etc. */
-
- btr_pcur_store_position(cursor, mtr);
-
- return(FALSE);
-}
-
-/******************************************************************
-If the latch mode of the cursor is BTR_LEAF_SEARCH or BTR_LEAF_MODIFY,
-releases the page latch and bufferfix reserved by the cursor.
-NOTE! In the case of BTR_LEAF_MODIFY, there should not exist changes
-made by the current mini-transaction to the data protected by the
-cursor latch, as then the latch must not be released until mtr_commit. */
-
-void
-btr_pcur_release_leaf(
-/*==================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr) /* in: mtr */
-{
- page_t* page;
-
- ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- page = btr_cur_get_page(btr_pcur_get_btr_cur(cursor));
-
- btr_leaf_page_release(page, cursor->latch_mode, mtr);
-
- cursor->latch_mode = BTR_NO_LATCHES;
-
- cursor->pos_state = BTR_PCUR_WAS_POSITIONED;
-}
-
-/*************************************************************
-Moves the persistent cursor to the first record on the next page. Releases the
-latch on the current page, and bufferunfixes it. Note that there must not be
-modifications on the current page, as then the x-latch can be released only in
-mtr_commit. */
-
-void
-btr_pcur_move_to_next_page(
-/*=======================*/
- btr_pcur_t* cursor, /* in: persistent cursor; must be on the
- last record of the current page */
- mtr_t* mtr) /* in: mtr */
-{
- ulint next_page_no;
- ulint space;
- page_t* page;
- page_t* next_page;
-
- ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
- ut_ad(btr_pcur_is_after_last_on_page(cursor, mtr));
-
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
-
- page = btr_pcur_get_page(cursor);
-
- next_page_no = btr_page_get_next(page, mtr);
- space = buf_frame_get_space_id(page);
-
- ut_ad(next_page_no != FIL_NULL);
-
- next_page = btr_page_get(space, next_page_no, cursor->latch_mode, mtr);
-#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_prev(next_page, mtr) == buf_frame_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
- ut_a(page_is_comp(next_page) == page_is_comp(page));
- buf_block_align(next_page)->check_index_page_at_flush = TRUE;
-
- btr_leaf_page_release(page, cursor->latch_mode, mtr);
-
- page_cur_set_before_first(next_page, btr_pcur_get_page_cur(cursor));
-
- page_check_dir(next_page);
-}
-
-/*************************************************************
-Moves the persistent cursor backward if it is on the first record of the page.
-Commits mtr. Note that to prevent a possible deadlock, the operation
-first stores the position of the cursor, commits mtr, acquires the necessary
-latches and restores the cursor position again before returning. The
-alphabetical position of the cursor is guaranteed to be sensible on
-return, but it may happen that the cursor is not positioned on the last
-record of any page, because the structure of the tree may have changed
-during the time when the cursor had no latches. */
-
-void
-btr_pcur_move_backward_from_page(
-/*=============================*/
- btr_pcur_t* cursor, /* in: persistent cursor, must be on the first
- record of the current page */
- mtr_t* mtr) /* in: mtr */
-{
- ulint prev_page_no;
- ulint space;
- page_t* page;
- page_t* prev_page;
- ulint latch_mode;
- ulint latch_mode2;
-
- ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
- ut_ad(btr_pcur_is_before_first_on_page(cursor, mtr));
- ut_ad(!btr_pcur_is_before_first_in_tree(cursor, mtr));
-
- latch_mode = cursor->latch_mode;
-
- if (latch_mode == BTR_SEARCH_LEAF) {
-
- latch_mode2 = BTR_SEARCH_PREV;
-
- } else if (latch_mode == BTR_MODIFY_LEAF) {
-
- latch_mode2 = BTR_MODIFY_PREV;
- } else {
- latch_mode2 = 0; /* To eliminate compiler warning */
- ut_error;
- }
-
- btr_pcur_store_position(cursor, mtr);
-
- mtr_commit(mtr);
-
- mtr_start(mtr);
-
- btr_pcur_restore_position(latch_mode2, cursor, mtr);
-
- page = btr_pcur_get_page(cursor);
-
- prev_page_no = btr_page_get_prev(page, mtr);
- space = buf_frame_get_space_id(page);
-
- if (btr_pcur_is_before_first_on_page(cursor, mtr)
- && (prev_page_no != FIL_NULL)) {
-
- prev_page = btr_pcur_get_btr_cur(cursor)->left_page;
-
- btr_leaf_page_release(page, latch_mode, mtr);
-
- page_cur_set_after_last(prev_page,
- btr_pcur_get_page_cur(cursor));
- } else if (prev_page_no != FIL_NULL) {
-
- /* The repositioned cursor did not end on an infimum record on
- a page. Cursor repositioning acquired a latch also on the
- previous page, but we do not need the latch: release it. */
-
- prev_page = btr_pcur_get_btr_cur(cursor)->left_page;
-
- btr_leaf_page_release(prev_page, latch_mode, mtr);
- }
-
- cursor->latch_mode = latch_mode;
-
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
-}
-
-/*************************************************************
-Moves the persistent cursor to the previous record in the tree. If no records
-are left, the cursor stays 'before first in tree'. */
-
-ibool
-btr_pcur_move_to_prev(
-/*==================*/
- /* out: TRUE if the cursor was not before first
- in tree */
- btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the
- function may release the page latch */
- mtr_t* mtr) /* in: mtr */
-{
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
-
- if (btr_pcur_is_before_first_on_page(cursor, mtr)) {
-
- if (btr_pcur_is_before_first_in_tree(cursor, mtr)) {
-
- return(FALSE);
- }
-
- btr_pcur_move_backward_from_page(cursor, mtr);
-
- return(TRUE);
- }
-
- btr_pcur_move_to_prev_on_page(cursor, mtr);
-
- return(TRUE);
-}
-
-/******************************************************************
-If mode is PAGE_CUR_G or PAGE_CUR_GE, opens a persistent cursor on the first
-user record satisfying the search condition, in the case PAGE_CUR_L or
-PAGE_CUR_LE, on the last user record. If no such user record exists, then
-in the first case sets the cursor after last in tree, and in the latter case
-before first in tree. The latching mode must be BTR_SEARCH_LEAF or
-BTR_MODIFY_LEAF. */
-
-void
-btr_pcur_open_on_user_rec(
-/*======================*/
- dict_index_t* index, /* in: index */
- dtuple_t* tuple, /* in: tuple on which search done */
- ulint mode, /* in: PAGE_CUR_L, ... */
- ulint latch_mode, /* in: BTR_SEARCH_LEAF or
- BTR_MODIFY_LEAF */
- btr_pcur_t* cursor, /* in: memory buffer for persistent
- cursor */
- mtr_t* mtr) /* in: mtr */
-{
- btr_pcur_open(index, tuple, mode, latch_mode, cursor, mtr);
-
- if ((mode == PAGE_CUR_GE) || (mode == PAGE_CUR_G)) {
-
- if (btr_pcur_is_after_last_on_page(cursor, mtr)) {
-
- btr_pcur_move_to_next_user_rec(cursor, mtr);
- }
- } else {
- ut_ad((mode == PAGE_CUR_LE) || (mode == PAGE_CUR_L));
-
- /* Not implemented yet */
-
- ut_error;
- }
-}
diff --git a/storage/innobase/btr/btr0sea.c b/storage/innobase/btr/btr0sea.c
deleted file mode 100644
index 8d296fdd061..00000000000
--- a/storage/innobase/btr/btr0sea.c
+++ /dev/null
@@ -1,1762 +0,0 @@
-/************************************************************************
-The index tree adaptive search
-
-(c) 1996 Innobase Oy
-
-Created 2/17/1996 Heikki Tuuri
-*************************************************************************/
-
-#include "btr0sea.h"
-#ifdef UNIV_NONINL
-#include "btr0sea.ic"
-#endif
-
-#include "buf0buf.h"
-#include "page0page.h"
-#include "page0cur.h"
-#include "btr0cur.h"
-#include "btr0pcur.h"
-#include "btr0btr.h"
-#include "ha0ha.h"
-
-ulint btr_search_this_is_zero = 0; /* A dummy variable to fool the
- compiler */
-
-#ifdef UNIV_SEARCH_PERF_STAT
-ulint btr_search_n_succ = 0;
-ulint btr_search_n_hash_fail = 0;
-#endif /* UNIV_SEARCH_PERF_STAT */
-
-byte btr_sea_pad1[64]; /* padding to prevent other memory update
- hotspots from residing on the same memory
- cache line as btr_search_latch */
-
-/* The latch protecting the adaptive search system: this latch protects the
-(1) positions of records on those pages where a hash index has been built.
-NOTE: It does not protect values of non-ordering fields within a record from
-being updated in-place! We can use fact (1) to perform unique searches to
-indexes. */
-
-rw_lock_t* btr_search_latch_temp; /* We will allocate the latch from
- dynamic memory to get it to the
- same DRAM page as other hotspot
- semaphores */
-
-byte btr_sea_pad2[64]; /* padding to prevent other memory update
- hotspots from residing on the same memory
- cache line */
-
-btr_search_sys_t* btr_search_sys;
-
-/* If the number of records on the page divided by this parameter
-would have been successfully accessed using a hash index, the index
-is then built on the page, assuming the global limit has been reached */
-
-#define BTR_SEARCH_PAGE_BUILD_LIMIT 16
-
-/* The global limit for consecutive potentially successful hash searches,
-before hash index building is started */
-
-#define BTR_SEARCH_BUILD_LIMIT 100
-
-/************************************************************************
-Builds a hash index on a page with the given parameters. If the page already
-has a hash index with different parameters, the old hash index is removed.
-If index is non-NULL, this function checks if n_fields and n_bytes are
-sensible values, and does not build a hash index if not. */
-static
-void
-btr_search_build_page_hash_index(
-/*=============================*/
- dict_index_t* index, /* in: index for which to build, or NULL if
- not known */
- page_t* page, /* in: index page, s- or x-latched */
- ulint n_fields,/* in: hash this many full fields */
- ulint n_bytes,/* in: hash this many bytes from the next
- field */
- ibool left_side);/* in: hash for searches from left side? */
-
-/*********************************************************************
-This function should be called before reserving any btr search mutex, if
-the intended operation might add nodes to the search system hash table.
-Because of the latching order, once we have reserved the btr search system
-latch, we cannot allocate a free frame from the buffer pool. Checks that
-there is a free buffer frame allocated for hash table heap in the btr search
-system. If not, allocates a free frames for the heap. This check makes it
-probable that, when have reserved the btr search system latch and we need to
-allocate a new node to the hash table, it will succeed. However, the check
-will not guarantee success. */
-static
-void
-btr_search_check_free_space_in_heap(void)
-/*=====================================*/
-{
- buf_frame_t* frame;
- hash_table_t* table;
- mem_heap_t* heap;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- table = btr_search_sys->hash_index;
-
- heap = table->heap;
-
- /* Note that we peek the value of heap->free_block without reserving
- the latch: this is ok, because we will not guarantee that there will
- be enough free space in the hash table. */
-
- if (heap->free_block == NULL) {
- frame = buf_frame_alloc();
-
- rw_lock_x_lock(&btr_search_latch);
-
- if (heap->free_block == NULL) {
- heap->free_block = frame;
- } else {
- buf_frame_free(frame);
- }
-
- rw_lock_x_unlock(&btr_search_latch);
- }
-}
-
-/*********************************************************************
-Creates and initializes the adaptive search system at a database start. */
-
-void
-btr_search_sys_create(
-/*==================*/
- ulint hash_size) /* in: hash index hash table size */
-{
- /* We allocate the search latch from dynamic memory:
- see above at the global variable definition */
-
- btr_search_latch_temp = mem_alloc(sizeof(rw_lock_t));
-
- rw_lock_create(&btr_search_latch, SYNC_SEARCH_SYS);
-
- btr_search_sys = mem_alloc(sizeof(btr_search_sys_t));
-
- btr_search_sys->hash_index = ha_create(TRUE, hash_size, 0, 0);
-
-}
-
-/*********************************************************************
-Creates and initializes a search info struct. */
-
-btr_search_t*
-btr_search_info_create(
-/*===================*/
- /* out, own: search info struct */
- mem_heap_t* heap) /* in: heap where created */
-{
- btr_search_t* info;
-
- info = mem_heap_alloc(heap, sizeof(btr_search_t));
-
-#ifdef UNIV_DEBUG
- info->magic_n = BTR_SEARCH_MAGIC_N;
-#endif /* UNIV_DEBUG */
-
- info->ref_count = 0;
- info->root_guess = NULL;
-
- info->hash_analysis = 0;
- info->n_hash_potential = 0;
-
- info->last_hash_succ = FALSE;
-
-#ifdef UNIV_SEARCH_PERF_STAT
- info->n_hash_succ = 0;
- info->n_hash_fail = 0;
- info->n_patt_succ = 0;
- info->n_searches = 0;
-#endif /* UNIV_SEARCH_PERF_STAT */
-
- /* Set some sensible values */
- info->n_fields = 1;
- info->n_bytes = 0;
-
- info->left_side = TRUE;
-
- return(info);
-}
-
-/*********************************************************************
-Returns the value of ref_count. The value is protected by
-btr_search_latch. */
-ulint
-btr_search_info_get_ref_count(
-/*==========================*/
- /* out: ref_count value. */
- btr_search_t* info) /* in: search info. */
-{
- ulint ret;
-
- ut_ad(info);
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- rw_lock_s_lock(&btr_search_latch);
- ret = info->ref_count;
- rw_lock_s_unlock(&btr_search_latch);
-
- return(ret);
-}
-
-/*************************************************************************
-Updates the search info of an index about hash successes. NOTE that info
-is NOT protected by any semaphore, to save CPU time! Do not assume its fields
-are consistent. */
-static
-void
-btr_search_info_update_hash(
-/*========================*/
- btr_search_t* info, /* in/out: search info */
- btr_cur_t* cursor) /* in: cursor which was just positioned */
-{
- dict_index_t* index;
- ulint n_unique;
- int cmp;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- index = cursor->index;
-
- if (index->type & DICT_IBUF) {
- /* So many deletes are performed on an insert buffer tree
- that we do not consider a hash index useful on it: */
-
- return;
- }
-
- n_unique = dict_index_get_n_unique_in_tree(index);
-
- if (info->n_hash_potential == 0) {
-
- goto set_new_recomm;
- }
-
- /* Test if the search would have succeeded using the recommended
- hash prefix */
-
- if (info->n_fields >= n_unique && cursor->up_match >= n_unique) {
-increment_potential:
- info->n_hash_potential++;
-
- return;
- }
-
- cmp = ut_pair_cmp(info->n_fields, info->n_bytes,
- cursor->low_match, cursor->low_bytes);
-
- if (info->left_side ? cmp <= 0 : cmp > 0) {
-
- goto set_new_recomm;
- }
-
- cmp = ut_pair_cmp(info->n_fields, info->n_bytes,
- cursor->up_match, cursor->up_bytes);
-
- if (info->left_side ? cmp <= 0 : cmp > 0) {
-
- goto increment_potential;
- }
-
-set_new_recomm:
- /* We have to set a new recommendation; skip the hash analysis
- for a while to avoid unnecessary CPU time usage when there is no
- chance for success */
-
- info->hash_analysis = 0;
-
- cmp = ut_pair_cmp(cursor->up_match, cursor->up_bytes,
- cursor->low_match, cursor->low_bytes);
- if (cmp == 0) {
- info->n_hash_potential = 0;
-
- /* For extra safety, we set some sensible values here */
-
- info->n_fields = 1;
- info->n_bytes = 0;
-
- info->left_side = TRUE;
-
- } else if (cmp > 0) {
- info->n_hash_potential = 1;
-
- if (cursor->up_match >= n_unique) {
-
- info->n_fields = n_unique;
- info->n_bytes = 0;
-
- } else if (cursor->low_match < cursor->up_match) {
-
- info->n_fields = cursor->low_match + 1;
- info->n_bytes = 0;
- } else {
- info->n_fields = cursor->low_match;
- info->n_bytes = cursor->low_bytes + 1;
- }
-
- info->left_side = TRUE;
- } else {
- info->n_hash_potential = 1;
-
- if (cursor->low_match >= n_unique) {
-
- info->n_fields = n_unique;
- info->n_bytes = 0;
-
- } else if (cursor->low_match > cursor->up_match) {
-
- info->n_fields = cursor->up_match + 1;
- info->n_bytes = 0;
- } else {
- info->n_fields = cursor->up_match;
- info->n_bytes = cursor->up_bytes + 1;
- }
-
- info->left_side = FALSE;
- }
-}
-
-/*************************************************************************
-Updates the block search info on hash successes. NOTE that info and
-block->n_hash_helps, n_fields, n_bytes, side are NOT protected by any
-semaphore, to save CPU time! Do not assume the fields are consistent. */
-static
-ibool
-btr_search_update_block_hash_info(
-/*==============================*/
- /* out: TRUE if building a (new) hash index on
- the block is recommended */
- btr_search_t* info, /* in: search info */
- buf_block_t* block, /* in: buffer block */
- btr_cur_t* cursor __attribute__((unused)))
- /* in: cursor */
-{
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
- ut_ad(rw_lock_own(&((buf_block_t*) block)->lock, RW_LOCK_SHARED)
- || rw_lock_own(&((buf_block_t*) block)->lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(cursor);
-
- info->last_hash_succ = FALSE;
-
- ut_a(block->magic_n == BUF_BLOCK_MAGIC_N);
- ut_ad(info->magic_n == BTR_SEARCH_MAGIC_N);
-
- if ((block->n_hash_helps > 0)
- && (info->n_hash_potential > 0)
- && (block->n_fields == info->n_fields)
- && (block->n_bytes == info->n_bytes)
- && (block->left_side == info->left_side)) {
-
- if ((block->is_hashed)
- && (block->curr_n_fields == info->n_fields)
- && (block->curr_n_bytes == info->n_bytes)
- && (block->curr_left_side == info->left_side)) {
-
- /* The search would presumably have succeeded using
- the hash index */
-
- info->last_hash_succ = TRUE;
- }
-
- block->n_hash_helps++;
- } else {
- block->n_hash_helps = 1;
- block->n_fields = info->n_fields;
- block->n_bytes = info->n_bytes;
- block->left_side = info->left_side;
- }
-
-#ifdef UNIV_DEBUG
- if (cursor->index->table->does_not_fit_in_memory) {
- block->n_hash_helps = 0;
- }
-#endif /* UNIV_DEBUG */
-
- if ((block->n_hash_helps > page_get_n_recs(block->frame)
- / BTR_SEARCH_PAGE_BUILD_LIMIT)
- && (info->n_hash_potential >= BTR_SEARCH_BUILD_LIMIT)) {
-
- if ((!block->is_hashed)
- || (block->n_hash_helps
- > 2 * page_get_n_recs(block->frame))
- || (block->n_fields != block->curr_n_fields)
- || (block->n_bytes != block->curr_n_bytes)
- || (block->left_side != block->curr_left_side)) {
-
- /* Build a new hash index on the page */
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/*************************************************************************
-Updates a hash node reference when it has been unsuccessfully used in a
-search which could have succeeded with the used hash parameters. This can
-happen because when building a hash index for a page, we do not check
-what happens at page boundaries, and therefore there can be misleading
-hash nodes. Also, collisions in the fold value can lead to misleading
-references. This function lazily fixes these imperfections in the hash
-index. */
-static
-void
-btr_search_update_hash_ref(
-/*=======================*/
- btr_search_t* info, /* in: search info */
- buf_block_t* block, /* in: buffer block where cursor positioned */
- btr_cur_t* cursor) /* in: cursor */
-{
- ulint fold;
- rec_t* rec;
- dulint index_id;
-
- ut_ad(cursor->flag == BTR_CUR_HASH_FAIL);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
- || rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(buf_block_align(btr_cur_get_rec(cursor)) == block);
- ut_a(!block->is_hashed || block->index == cursor->index);
-
- if (block->is_hashed
- && (info->n_hash_potential > 0)
- && (block->curr_n_fields == info->n_fields)
- && (block->curr_n_bytes == info->n_bytes)
- && (block->curr_left_side == info->left_side)) {
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- rec = btr_cur_get_rec(cursor);
-
- if (!page_rec_is_user_rec(rec)) {
-
- return;
- }
-
- index_id = cursor->index->id;
- fold = rec_fold(rec,
- rec_get_offsets(rec, cursor->index, offsets_,
- ULINT_UNDEFINED, &heap),
- block->curr_n_fields,
- block->curr_n_bytes, index_id);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- ha_insert_for_fold(btr_search_sys->hash_index, fold, rec);
- }
-}
-
-/*************************************************************************
-Updates the search info. */
-
-void
-btr_search_info_update_slow(
-/*========================*/
- btr_search_t* info, /* in/out: search info */
- btr_cur_t* cursor) /* in: cursor which was just positioned */
-{
- buf_block_t* block;
- ibool build_index;
- ulint* params;
- ulint* params2;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- block = buf_block_align(btr_cur_get_rec(cursor));
-
- /* NOTE that the following two function calls do NOT protect
- info or block->n_fields etc. with any semaphore, to save CPU time!
- We cannot assume the fields are consistent when we return from
- those functions! */
-
- btr_search_info_update_hash(info, cursor);
-
- build_index = btr_search_update_block_hash_info(info, block, cursor);
-
- if (build_index || (cursor->flag == BTR_CUR_HASH_FAIL)) {
-
- btr_search_check_free_space_in_heap();
- }
-
- if (cursor->flag == BTR_CUR_HASH_FAIL) {
- /* Update the hash node reference, if appropriate */
-
-#ifdef UNIV_SEARCH_PERF_STAT
- btr_search_n_hash_fail++;
-#endif /* UNIV_SEARCH_PERF_STAT */
-
- rw_lock_x_lock(&btr_search_latch);
-
- btr_search_update_hash_ref(info, block, cursor);
-
- rw_lock_x_unlock(&btr_search_latch);
- }
-
- if (build_index) {
- /* Note that since we did not protect block->n_fields etc.
- with any semaphore, the values can be inconsistent. We have
- to check inside the function call that they make sense. We
- also malloc an array and store the values there to make sure
- the compiler does not let the function call parameters change
- inside the called function. It might be that the compiler
- would optimize the call just to pass pointers to block. */
-
- params = mem_alloc(3 * sizeof(ulint));
- params[0] = block->n_fields;
- params[1] = block->n_bytes;
- params[2] = block->left_side;
-
- /* Make sure the compiler cannot deduce the values and do
- optimizations */
-
- params2 = params + btr_search_this_is_zero;
-
- btr_search_build_page_hash_index(cursor->index,
- block->frame,
- params2[0],
- params2[1],
- params2[2]);
- mem_free(params);
- }
-}
-
-/**********************************************************************
-Checks if a guessed position for a tree cursor is right. Note that if
-mode is PAGE_CUR_LE, which is used in inserts, and the function returns
-TRUE, then cursor->up_match and cursor->low_match both have sensible values. */
-static
-ibool
-btr_search_check_guess(
-/*===================*/
- /* out: TRUE if success */
- btr_cur_t* cursor, /* in: guessed cursor position */
- ibool can_only_compare_to_cursor_rec,
- /* in: if we do not have a latch on the page
- of cursor, but only a latch on
- btr_search_latch, then ONLY the columns
- of the record UNDER the cursor are
- protected, not the next or previous record
- in the chain: we cannot look at the next or
- previous record to check our guess! */
- dtuple_t* tuple, /* in: data tuple */
- ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
- or PAGE_CUR_GE */
- mtr_t* mtr) /* in: mtr */
-{
- rec_t* rec;
- ulint n_unique;
- ulint match;
- ulint bytes;
- int cmp;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- ibool success = FALSE;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- n_unique = dict_index_get_n_unique_in_tree(cursor->index);
-
- rec = btr_cur_get_rec(cursor);
-
- ut_ad(page_rec_is_user_rec(rec));
-
- match = 0;
- bytes = 0;
-
- offsets = rec_get_offsets(rec, cursor->index, offsets,
- n_unique, &heap);
- cmp = page_cmp_dtuple_rec_with_match(tuple, rec,
- offsets, &match, &bytes);
-
- if (mode == PAGE_CUR_GE) {
- if (cmp == 1) {
- goto exit_func;
- }
-
- cursor->up_match = match;
-
- if (match >= n_unique) {
- success = TRUE;
- goto exit_func;
- }
- } else if (mode == PAGE_CUR_LE) {
- if (cmp == -1) {
- goto exit_func;
- }
-
- cursor->low_match = match;
-
- } else if (mode == PAGE_CUR_G) {
- if (cmp != -1) {
- goto exit_func;
- }
- } else if (mode == PAGE_CUR_L) {
- if (cmp != 1) {
- goto exit_func;
- }
- }
-
- if (can_only_compare_to_cursor_rec) {
- /* Since we could not determine if our guess is right just by
- looking at the record under the cursor, return FALSE */
- goto exit_func;
- }
-
- match = 0;
- bytes = 0;
-
- if ((mode == PAGE_CUR_G) || (mode == PAGE_CUR_GE)) {
- rec_t* prev_rec;
-
- ut_ad(!page_rec_is_infimum(rec));
-
- prev_rec = page_rec_get_prev(rec);
-
- if (page_rec_is_infimum(prev_rec)) {
- success = btr_page_get_prev(
- buf_frame_align(prev_rec), mtr) == FIL_NULL;
-
- goto exit_func;
- }
-
- offsets = rec_get_offsets(prev_rec, cursor->index, offsets,
- n_unique, &heap);
- cmp = page_cmp_dtuple_rec_with_match(tuple, prev_rec,
- offsets, &match, &bytes);
- if (mode == PAGE_CUR_GE) {
- success = cmp == 1;
- } else {
- success = cmp != -1;
- }
-
- goto exit_func;
- } else {
- rec_t* next_rec;
-
- ut_ad(!page_rec_is_supremum(rec));
-
- next_rec = page_rec_get_next(rec);
-
- if (page_rec_is_supremum(next_rec)) {
- if (btr_page_get_next(
- buf_frame_align(next_rec), mtr)
- == FIL_NULL) {
-
- cursor->up_match = 0;
- success = TRUE;
- }
-
- goto exit_func;
- }
-
- offsets = rec_get_offsets(next_rec, cursor->index, offsets,
- n_unique, &heap);
- cmp = page_cmp_dtuple_rec_with_match(tuple, next_rec,
- offsets, &match, &bytes);
- if (mode == PAGE_CUR_LE) {
- success = cmp == -1;
- cursor->up_match = match;
- } else {
- success = cmp != 1;
- }
- }
-exit_func:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(success);
-}
-
-/**********************************************************************
-Tries to guess the right search position based on the hash search info
-of the index. Note that if mode is PAGE_CUR_LE, which is used in inserts,
-and the function returns TRUE, then cursor->up_match and cursor->low_match
-both have sensible values. */
-
-ibool
-btr_search_guess_on_hash(
-/*=====================*/
- /* out: TRUE if succeeded */
- dict_index_t* index, /* in: index */
- btr_search_t* info, /* in: index search info */
- dtuple_t* tuple, /* in: logical record */
- ulint mode, /* in: PAGE_CUR_L, ... */
- ulint latch_mode, /* in: BTR_SEARCH_LEAF, ...;
- NOTE that only if has_search_latch
- is 0, we will have a latch set on
- the cursor page, otherwise we assume
- the caller uses his search latch
- to protect the record! */
- btr_cur_t* cursor, /* out: tree cursor */
- ulint has_search_latch,/* in: latch mode the caller
- currently has on btr_search_latch:
- RW_S_LATCH, RW_X_LATCH, or 0 */
- mtr_t* mtr) /* in: mtr */
-{
- buf_block_t* block;
- rec_t* rec;
- page_t* page;
- ulint fold;
- ulint tuple_n_fields;
- dulint index_id;
- ibool can_only_compare_to_cursor_rec = TRUE;
-#ifdef notdefined
- btr_cur_t cursor2;
- btr_pcur_t pcur;
-#endif
- ut_ad(index && info && tuple && cursor && mtr);
- ut_ad((latch_mode == BTR_SEARCH_LEAF)
- || (latch_mode == BTR_MODIFY_LEAF));
-
- /* Note that, for efficiency, the struct info may not be protected by
- any latch here! */
-
- if (UNIV_UNLIKELY(info->n_hash_potential == 0)) {
-
- return(FALSE);
- }
-
- cursor->n_fields = info->n_fields;
- cursor->n_bytes = info->n_bytes;
-
- tuple_n_fields = dtuple_get_n_fields(tuple);
-
- if (UNIV_UNLIKELY(tuple_n_fields < cursor->n_fields)) {
-
- return(FALSE);
- }
-
- if (UNIV_UNLIKELY(tuple_n_fields == cursor->n_fields)
- && (cursor->n_bytes > 0)) {
-
- return(FALSE);
- }
-
- index_id = index->id;
-
-#ifdef UNIV_SEARCH_PERF_STAT
- info->n_hash_succ++;
-#endif
- fold = dtuple_fold(tuple, cursor->n_fields, cursor->n_bytes, index_id);
-
- cursor->fold = fold;
- cursor->flag = BTR_CUR_HASH;
-
- if (UNIV_LIKELY(!has_search_latch)) {
- rw_lock_s_lock(&btr_search_latch);
- }
-
- ut_ad(rw_lock_get_writer(&btr_search_latch) != RW_LOCK_EX);
- ut_ad(rw_lock_get_reader_count(&btr_search_latch) > 0);
-
- rec = ha_search_and_get_data(btr_search_sys->hash_index, fold);
-
- if (UNIV_UNLIKELY(!rec)) {
- goto failure_unlock;
- }
-
- page = buf_frame_align(rec);
-
- if (UNIV_LIKELY(!has_search_latch)) {
-
- if (UNIV_UNLIKELY(
- !buf_page_get_known_nowait(latch_mode, page,
- BUF_MAKE_YOUNG,
- __FILE__, __LINE__,
- mtr))) {
- goto failure_unlock;
- }
-
- rw_lock_s_unlock(&btr_search_latch);
- can_only_compare_to_cursor_rec = FALSE;
-
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_TREE_NODE_FROM_HASH);
-#endif /* UNIV_SYNC_DEBUG */
- }
-
- block = buf_block_align(page);
-
- if (UNIV_UNLIKELY(block->state == BUF_BLOCK_REMOVE_HASH)) {
- if (UNIV_LIKELY(!has_search_latch)) {
-
- btr_leaf_page_release(page, latch_mode, mtr);
- }
-
- goto failure;
- }
-
- ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
- ut_ad(page_rec_is_user_rec(rec));
-
- btr_cur_position(index, rec, cursor);
-
- /* Check the validity of the guess within the page */
-
- /* If we only have the latch on btr_search_latch, not on the
- page, it only protects the columns of the record the cursor
- is positioned on. We cannot look at the next of the previous
- record to determine if our guess for the cursor position is
- right. */
- if (UNIV_EXPECT(
- ut_dulint_cmp(index_id, btr_page_get_index_id(page)), 0)
- || !btr_search_check_guess(cursor,
- can_only_compare_to_cursor_rec,
- tuple, mode, mtr)) {
- if (UNIV_LIKELY(!has_search_latch)) {
- btr_leaf_page_release(page, latch_mode, mtr);
- }
-
- goto failure;
- }
-
- if (UNIV_LIKELY(info->n_hash_potential < BTR_SEARCH_BUILD_LIMIT + 5)) {
-
- info->n_hash_potential++;
- }
-
-#ifdef notdefined
- /* These lines of code can be used in a debug version to check
- the correctness of the searched cursor position: */
-
- info->last_hash_succ = FALSE;
-
- /* Currently, does not work if the following fails: */
- ut_ad(!has_search_latch);
-
- btr_leaf_page_release(page, latch_mode, mtr);
-
- btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
- &cursor2, 0, mtr);
- if (mode == PAGE_CUR_GE
- && page_rec_is_supremum(btr_cur_get_rec(&cursor2))) {
-
- /* If mode is PAGE_CUR_GE, then the binary search
- in the index tree may actually take us to the supremum
- of the previous page */
-
- info->last_hash_succ = FALSE;
-
- btr_pcur_open_on_user_rec(index, tuple, mode, latch_mode,
- &pcur, mtr);
- ut_ad(btr_pcur_get_rec(&pcur) == btr_cur_get_rec(cursor));
- } else {
- ut_ad(btr_cur_get_rec(&cursor2) == btr_cur_get_rec(cursor));
- }
-
- /* NOTE that it is theoretically possible that the above assertions
- fail if the page of the cursor gets removed from the buffer pool
- meanwhile! Thus it might not be a bug. */
-#endif
- info->last_hash_succ = TRUE;
-
-#ifdef UNIV_SEARCH_PERF_STAT
- btr_search_n_succ++;
-#endif
- if (UNIV_LIKELY(!has_search_latch)
- && buf_block_peek_if_too_old(block)) {
-
- buf_page_make_young(page);
- }
-
- /* Increment the page get statistics though we did not really
- fix the page: for user info only */
-
- buf_pool->n_page_gets++;
-
- return(TRUE);
-
- /*-------------------------------------------*/
-failure_unlock:
- if (UNIV_LIKELY(!has_search_latch)) {
- rw_lock_s_unlock(&btr_search_latch);
- }
-failure:
- cursor->flag = BTR_CUR_HASH_FAIL;
-
-#ifdef UNIV_SEARCH_PERF_STAT
- info->n_hash_fail++;
-
- if (info->n_hash_succ > 0) {
- info->n_hash_succ--;
- }
-#endif
- info->last_hash_succ = FALSE;
-
- return(FALSE);
-}
-
-/************************************************************************
-Drops a page hash index. */
-
-void
-btr_search_drop_page_hash_index(
-/*============================*/
- page_t* page) /* in: index page, s- or x-latched, or an index page
- for which we know that block->buf_fix_count == 0 */
-{
- hash_table_t* table;
- buf_block_t* block;
- ulint n_fields;
- ulint n_bytes;
- rec_t* rec;
- ulint fold;
- ulint prev_fold;
- dulint index_id;
- ulint n_cached;
- ulint n_recs;
- ulint* folds;
- ulint i;
- mem_heap_t* heap;
- dict_index_t* index;
- ulint* offsets;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-retry:
- rw_lock_s_lock(&btr_search_latch);
-
- block = buf_block_align(page);
-
- if (UNIV_LIKELY(!block->is_hashed)) {
-
- rw_lock_s_unlock(&btr_search_latch);
-
- return;
- }
-
- table = btr_search_sys->hash_index;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
- || rw_lock_own(&(block->lock), RW_LOCK_EX)
- || (block->buf_fix_count == 0));
-#endif /* UNIV_SYNC_DEBUG */
-
- n_fields = block->curr_n_fields;
- n_bytes = block->curr_n_bytes;
- index = block->index;
-
- /* NOTE: The fields of block must not be accessed after
- releasing btr_search_latch, as the index page might only
- be s-latched! */
-
- rw_lock_s_unlock(&btr_search_latch);
-
- ut_a(n_fields + n_bytes > 0);
-
- n_recs = page_get_n_recs(page);
-
- /* Calculate and cache fold values into an array for fast deletion
- from the hash index */
-
- folds = mem_alloc(n_recs * sizeof(ulint));
-
- n_cached = 0;
-
- rec = page_get_infimum_rec(page);
- rec = page_rec_get_next(rec);
-
- index_id = btr_page_get_index_id(page);
-
- ut_a(0 == ut_dulint_cmp(index_id, index->id));
-
- prev_fold = 0;
-
- heap = NULL;
- offsets = NULL;
-
- while (!page_rec_is_supremum(rec)) {
- offsets = rec_get_offsets(rec, index, offsets,
- n_fields + (n_bytes > 0), &heap);
- ut_a(rec_offs_n_fields(offsets) == n_fields + (n_bytes > 0));
- fold = rec_fold(rec, offsets, n_fields, n_bytes, index_id);
-
- if (fold == prev_fold && prev_fold != 0) {
-
- goto next_rec;
- }
-
- /* Remove all hash nodes pointing to this page from the
- hash chain */
-
- folds[n_cached] = fold;
- n_cached++;
-next_rec:
- rec = page_rec_get_next(rec);
- prev_fold = fold;
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- rw_lock_x_lock(&btr_search_latch);
-
- if (UNIV_UNLIKELY(!block->is_hashed)) {
- /* Someone else has meanwhile dropped the hash index */
-
- goto cleanup;
- }
-
- ut_a(block->index == index);
-
- if (UNIV_UNLIKELY(block->curr_n_fields != n_fields)
- || UNIV_UNLIKELY(block->curr_n_bytes != n_bytes)) {
-
- /* Someone else has meanwhile built a new hash index on the
- page, with different parameters */
-
- rw_lock_x_unlock(&btr_search_latch);
-
- mem_free(folds);
- goto retry;
- }
-
- for (i = 0; i < n_cached; i++) {
-
- ha_remove_all_nodes_to_page(table, folds[i], page);
- }
-
- ut_a(index->search_info->ref_count > 0);
- index->search_info->ref_count--;
-
- block->is_hashed = FALSE;
- block->index = NULL;
-
-cleanup:
- if (UNIV_UNLIKELY(block->n_pointers)) {
- /* Corruption */
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Corruption of adaptive hash index."
- " After dropping\n"
- "InnoDB: the hash index to a page of %s,"
- " still %lu hash nodes remain.\n",
- index->name, (ulong) block->n_pointers);
- rw_lock_x_unlock(&btr_search_latch);
-
- btr_search_validate();
- } else {
- rw_lock_x_unlock(&btr_search_latch);
- }
-
- mem_free(folds);
-}
-
-/************************************************************************
-Drops a page hash index when a page is freed from a fseg to the file system.
-Drops possible hash index if the page happens to be in the buffer pool. */
-
-void
-btr_search_drop_page_hash_when_freed(
-/*=================================*/
- ulint space, /* in: space id */
- ulint page_no) /* in: page number */
-{
- ibool is_hashed;
- page_t* page;
- mtr_t mtr;
-
- is_hashed = buf_page_peek_if_search_hashed(space, page_no);
-
- if (!is_hashed) {
-
- return;
- }
-
- mtr_start(&mtr);
-
- /* We assume that if the caller has a latch on the page, then the
- caller has already dropped the hash index for the page, and we never
- get here. Therefore we can acquire the s-latch to the page without
- having to fear a deadlock. */
-
- page = buf_page_get_gen(space, page_no, RW_S_LATCH, NULL,
- BUF_GET_IF_IN_POOL, __FILE__, __LINE__,
- &mtr);
- /* Because the buffer pool mutex was released by
- buf_page_peek_if_search_hashed(), it is possible that the
- block was removed from the buffer pool by another thread
- before buf_page_get_gen() got a chance to acquire the buffer
- pool mutex again. Thus, we must check for a NULL return. */
-
- if (UNIV_LIKELY(page != NULL)) {
-
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_TREE_NODE_FROM_HASH);
-#endif /* UNIV_SYNC_DEBUG */
-
- btr_search_drop_page_hash_index(page);
- }
-
- mtr_commit(&mtr);
-}
-
-/************************************************************************
-Builds a hash index on a page with the given parameters. If the page already
-has a hash index with different parameters, the old hash index is removed.
-If index is non-NULL, this function checks if n_fields and n_bytes are
-sensible values, and does not build a hash index if not. */
-static
-void
-btr_search_build_page_hash_index(
-/*=============================*/
- dict_index_t* index, /* in: index for which to build */
- page_t* page, /* in: index page, s- or x-latched */
- ulint n_fields,/* in: hash this many full fields */
- ulint n_bytes,/* in: hash this many bytes from the next
- field */
- ibool left_side)/* in: hash for searches from left side? */
-{
- hash_table_t* table;
- buf_block_t* block;
- rec_t* rec;
- rec_t* next_rec;
- ulint fold;
- ulint next_fold;
- dulint index_id;
- ulint n_cached;
- ulint n_recs;
- ulint* folds;
- rec_t** recs;
- ulint i;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- ut_ad(index);
-
- block = buf_block_align(page);
- table = btr_search_sys->hash_index;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
- || rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- rw_lock_s_lock(&btr_search_latch);
-
- if (block->is_hashed && ((block->curr_n_fields != n_fields)
- || (block->curr_n_bytes != n_bytes)
- || (block->curr_left_side != left_side))) {
-
- rw_lock_s_unlock(&btr_search_latch);
-
- btr_search_drop_page_hash_index(page);
- } else {
- rw_lock_s_unlock(&btr_search_latch);
- }
-
- n_recs = page_get_n_recs(page);
-
- if (n_recs == 0) {
-
- return;
- }
-
- /* Check that the values for hash index build are sensible */
-
- if (n_fields + n_bytes == 0) {
-
- return;
- }
-
- if (dict_index_get_n_unique_in_tree(index) < n_fields
- || (dict_index_get_n_unique_in_tree(index) == n_fields
- && n_bytes > 0)) {
- return;
- }
-
- /* Calculate and cache fold values and corresponding records into
- an array for fast insertion to the hash index */
-
- folds = mem_alloc(n_recs * sizeof(ulint));
- recs = mem_alloc(n_recs * sizeof(rec_t*));
-
- n_cached = 0;
-
- index_id = btr_page_get_index_id(page);
-
- rec = page_get_infimum_rec(page);
- rec = page_rec_get_next(rec);
-
- offsets = rec_get_offsets(rec, index, offsets,
- n_fields + (n_bytes > 0), &heap);
-
- if (!page_rec_is_supremum(rec)) {
- ut_a(n_fields <= rec_offs_n_fields(offsets));
-
- if (n_bytes > 0) {
- ut_a(n_fields < rec_offs_n_fields(offsets));
- }
- }
-
- fold = rec_fold(rec, offsets, n_fields, n_bytes, index_id);
-
- if (left_side) {
-
- folds[n_cached] = fold;
- recs[n_cached] = rec;
- n_cached++;
- }
-
- for (;;) {
- next_rec = page_rec_get_next(rec);
-
- if (page_rec_is_supremum(next_rec)) {
-
- if (!left_side) {
-
- folds[n_cached] = fold;
- recs[n_cached] = rec;
- n_cached++;
- }
-
- break;
- }
-
- offsets = rec_get_offsets(next_rec, index, offsets,
- n_fields + (n_bytes > 0), &heap);
- next_fold = rec_fold(next_rec, offsets, n_fields,
- n_bytes, index_id);
-
- if (fold != next_fold) {
- /* Insert an entry into the hash index */
-
- if (left_side) {
-
- folds[n_cached] = next_fold;
- recs[n_cached] = next_rec;
- n_cached++;
- } else {
- folds[n_cached] = fold;
- recs[n_cached] = rec;
- n_cached++;
- }
- }
-
- rec = next_rec;
- fold = next_fold;
- }
-
- btr_search_check_free_space_in_heap();
-
- rw_lock_x_lock(&btr_search_latch);
-
- if (block->is_hashed && ((block->curr_n_fields != n_fields)
- || (block->curr_n_bytes != n_bytes)
- || (block->curr_left_side != left_side))) {
- goto exit_func;
- }
-
- /* This counter is decremented every time we drop page
- hash index entries and is incremented here. Since we can
- rebuild hash index for a page that is already hashed, we
- have to take care not to increment the counter in that
- case. */
- if (!block->is_hashed) {
- index->search_info->ref_count++;
- }
-
- block->is_hashed = TRUE;
- block->n_hash_helps = 0;
-
- block->curr_n_fields = n_fields;
- block->curr_n_bytes = n_bytes;
- block->curr_left_side = left_side;
- block->index = index;
-
- for (i = 0; i < n_cached; i++) {
-
- ha_insert_for_fold(table, folds[i], recs[i]);
- }
-
-exit_func:
- rw_lock_x_unlock(&btr_search_latch);
-
- mem_free(folds);
- mem_free(recs);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-}
-
-/************************************************************************
-Moves or deletes hash entries for moved records. If new_page is already hashed,
-then the hash index for page, if any, is dropped. If new_page is not hashed,
-and page is hashed, then a new hash index is built to new_page with the same
-parameters as page (this often happens when a page is split). */
-
-void
-btr_search_move_or_delete_hash_entries(
-/*===================================*/
- page_t* new_page, /* in: records are copied
- to this page */
- page_t* page, /* in: index page from which
- records were copied, and the
- copied records will be deleted
- from this page */
- dict_index_t* index) /* in: record descriptor */
-{
- buf_block_t* block;
- buf_block_t* new_block;
- ulint n_fields;
- ulint n_bytes;
- ibool left_side;
-
- block = buf_block_align(page);
- new_block = buf_block_align(new_page);
- ut_a(page_is_comp(page) == page_is_comp(new_page));
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
- ut_ad(rw_lock_own(&(new_block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_a(!new_block->is_hashed || new_block->index == index);
- ut_a(!block->is_hashed || block->index == index);
-
- rw_lock_s_lock(&btr_search_latch);
-
- if (new_block->is_hashed) {
-
- rw_lock_s_unlock(&btr_search_latch);
-
- btr_search_drop_page_hash_index(page);
-
- return;
- }
-
- if (block->is_hashed) {
-
- n_fields = block->curr_n_fields;
- n_bytes = block->curr_n_bytes;
- left_side = block->curr_left_side;
-
- new_block->n_fields = block->curr_n_fields;
- new_block->n_bytes = block->curr_n_bytes;
- new_block->left_side = left_side;
-
- rw_lock_s_unlock(&btr_search_latch);
-
- ut_a(n_fields + n_bytes > 0);
-
- btr_search_build_page_hash_index(index, new_page, n_fields,
- n_bytes, left_side);
-#if 1 /* TODO: safe to remove? */
- ut_a(n_fields == block->curr_n_fields);
- ut_a(n_bytes == block->curr_n_bytes);
- ut_a(left_side == block->curr_left_side);
-#endif
- return;
- }
-
- rw_lock_s_unlock(&btr_search_latch);
-}
-
-/************************************************************************
-Updates the page hash index when a single record is deleted from a page. */
-
-void
-btr_search_update_hash_on_delete(
-/*=============================*/
- btr_cur_t* cursor) /* in: cursor which was positioned on the
- record to delete using btr_cur_search_...,
- the record is not yet deleted */
-{
- hash_table_t* table;
- buf_block_t* block;
- rec_t* rec;
- ulint fold;
- dulint index_id;
- ibool found;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- mem_heap_t* heap = NULL;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- rec = btr_cur_get_rec(cursor);
-
- block = buf_block_align(rec);
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- if (!block->is_hashed) {
-
- return;
- }
-
- ut_a(block->index == cursor->index);
- ut_a(block->curr_n_fields + block->curr_n_bytes > 0);
-
- table = btr_search_sys->hash_index;
-
- index_id = cursor->index->id;
- fold = rec_fold(rec, rec_get_offsets(rec, cursor->index, offsets_,
- ULINT_UNDEFINED, &heap),
- block->curr_n_fields, block->curr_n_bytes, index_id);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- rw_lock_x_lock(&btr_search_latch);
-
- found = ha_search_and_delete_if_found(table, fold, rec);
-
- rw_lock_x_unlock(&btr_search_latch);
-}
-
-/************************************************************************
-Updates the page hash index when a single record is inserted on a page. */
-
-void
-btr_search_update_hash_node_on_insert(
-/*==================================*/
- btr_cur_t* cursor) /* in: cursor which was positioned to the
- place to insert using btr_cur_search_...,
- and the new record has been inserted next
- to the cursor */
-{
- hash_table_t* table;
- buf_block_t* block;
- rec_t* rec;
-
- rec = btr_cur_get_rec(cursor);
-
- block = buf_block_align(rec);
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- if (!block->is_hashed) {
-
- return;
- }
-
- ut_a(block->index == cursor->index);
-
- rw_lock_x_lock(&btr_search_latch);
-
- if ((cursor->flag == BTR_CUR_HASH)
- && (cursor->n_fields == block->curr_n_fields)
- && (cursor->n_bytes == block->curr_n_bytes)
- && !block->curr_left_side) {
-
- table = btr_search_sys->hash_index;
-
- ha_search_and_update_if_found(table, cursor->fold, rec,
- page_rec_get_next(rec));
-
- rw_lock_x_unlock(&btr_search_latch);
- } else {
- rw_lock_x_unlock(&btr_search_latch);
-
- btr_search_update_hash_on_insert(cursor);
- }
-}
-
-/************************************************************************
-Updates the page hash index when a single record is inserted on a page. */
-
-void
-btr_search_update_hash_on_insert(
-/*=============================*/
- btr_cur_t* cursor) /* in: cursor which was positioned to the
- place to insert using btr_cur_search_...,
- and the new record has been inserted next
- to the cursor */
-{
- hash_table_t* table;
- buf_block_t* block;
- rec_t* rec;
- rec_t* ins_rec;
- rec_t* next_rec;
- dulint index_id;
- ulint fold;
- ulint ins_fold;
- ulint next_fold = 0; /* remove warning (??? bug ???) */
- ulint n_fields;
- ulint n_bytes;
- ibool left_side;
- ibool locked = FALSE;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- table = btr_search_sys->hash_index;
-
- btr_search_check_free_space_in_heap();
-
- rec = btr_cur_get_rec(cursor);
-
- block = buf_block_align(rec);
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- if (!block->is_hashed) {
-
- return;
- }
-
- ut_a(block->index == cursor->index);
-
- index_id = cursor->index->id;
-
- n_fields = block->curr_n_fields;
- n_bytes = block->curr_n_bytes;
- left_side = block->curr_left_side;
-
- ins_rec = page_rec_get_next(rec);
- next_rec = page_rec_get_next(ins_rec);
-
- offsets = rec_get_offsets(ins_rec, cursor->index, offsets,
- ULINT_UNDEFINED, &heap);
- ins_fold = rec_fold(ins_rec, offsets, n_fields, n_bytes, index_id);
-
- if (!page_rec_is_supremum(next_rec)) {
- offsets = rec_get_offsets(next_rec, cursor->index, offsets,
- n_fields + (n_bytes > 0), &heap);
- next_fold = rec_fold(next_rec, offsets, n_fields,
- n_bytes, index_id);
- }
-
- if (!page_rec_is_infimum(rec)) {
- offsets = rec_get_offsets(rec, cursor->index, offsets,
- n_fields + (n_bytes > 0), &heap);
- fold = rec_fold(rec, offsets, n_fields, n_bytes, index_id);
- } else {
- if (left_side) {
-
- rw_lock_x_lock(&btr_search_latch);
-
- locked = TRUE;
-
- ha_insert_for_fold(table, ins_fold, ins_rec);
- }
-
- goto check_next_rec;
- }
-
- if (fold != ins_fold) {
-
- if (!locked) {
-
- rw_lock_x_lock(&btr_search_latch);
-
- locked = TRUE;
- }
-
- if (!left_side) {
- ha_insert_for_fold(table, fold, rec);
- } else {
- ha_insert_for_fold(table, ins_fold, ins_rec);
- }
- }
-
-check_next_rec:
- if (page_rec_is_supremum(next_rec)) {
-
- if (!left_side) {
-
- if (!locked) {
- rw_lock_x_lock(&btr_search_latch);
-
- locked = TRUE;
- }
-
- ha_insert_for_fold(table, ins_fold, ins_rec);
- }
-
- goto function_exit;
- }
-
- if (ins_fold != next_fold) {
-
- if (!locked) {
-
- rw_lock_x_lock(&btr_search_latch);
-
- locked = TRUE;
- }
-
- if (!left_side) {
-
- ha_insert_for_fold(table, ins_fold, ins_rec);
- /*
- fputs("Hash insert for ", stderr);
- dict_index_name_print(stderr, cursor->index);
- fprintf(stderr, " fold %lu\n", ins_fold);
- */
- } else {
- ha_insert_for_fold(table, next_fold, next_rec);
- }
- }
-
-function_exit:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- if (locked) {
- rw_lock_x_unlock(&btr_search_latch);
- }
-}
-
-/************************************************************************
-Validates the search system. */
-
-ibool
-btr_search_validate(void)
-/*=====================*/
- /* out: TRUE if ok */
-{
- buf_block_t* block;
- page_t* page;
- ha_node_t* node;
- ulint n_page_dumps = 0;
- ibool ok = TRUE;
- ulint i;
- ulint cell_count;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
-
- /* How many cells to check before temporarily releasing
- btr_search_latch. */
- ulint chunk_size = 10000;
-
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- rw_lock_x_lock(&btr_search_latch);
-
- cell_count = hash_get_n_cells(btr_search_sys->hash_index);
-
- for (i = 0; i < cell_count; i++) {
- /* We release btr_search_latch every once in a while to
- give other queries a chance to run. */
- if ((i != 0) && ((i % chunk_size) == 0)) {
- rw_lock_x_unlock(&btr_search_latch);
- os_thread_yield();
- rw_lock_x_lock(&btr_search_latch);
- }
-
- node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
-
- while (node != NULL) {
- block = buf_block_align(node->data);
- page = buf_frame_align(node->data);
- offsets = rec_get_offsets((rec_t*) node->data,
- block->index, offsets,
- block->curr_n_fields
- + (block->curr_n_bytes > 0),
- &heap);
-
- if (!block->is_hashed || node->fold
- != rec_fold((rec_t*)(node->data),
- offsets,
- block->curr_n_fields,
- block->curr_n_bytes,
- btr_page_get_index_id(page))) {
- ok = FALSE;
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Error in an adaptive hash"
- " index pointer to page %lu\n"
- "InnoDB: ptr mem address %p"
- " index id %lu %lu,"
- " node fold %lu, rec fold %lu\n",
- (ulong) buf_frame_get_page_no(page),
- node->data,
- (ulong) ut_dulint_get_high(
- btr_page_get_index_id(page)),
- (ulong) ut_dulint_get_low(
- btr_page_get_index_id(page)),
- (ulong) node->fold,
- (ulong) rec_fold((rec_t*)(node->data),
- offsets,
- block->curr_n_fields,
- block->curr_n_bytes,
- btr_page_get_index_id(
- page)));
-
- fputs("InnoDB: Record ", stderr);
- rec_print_new(stderr, (rec_t*)node->data,
- offsets);
- fprintf(stderr, "\nInnoDB: on that page."
- " Page mem address %p, is hashed %lu,"
- " n fields %lu, n bytes %lu\n"
- "InnoDB: side %lu\n",
- (void*) page, (ulong) block->is_hashed,
- (ulong) block->curr_n_fields,
- (ulong) block->curr_n_bytes,
- (ulong) block->curr_left_side);
-
- if (n_page_dumps < 20) {
- buf_page_print(page);
- n_page_dumps++;
- }
- }
-
- node = node->next;
- }
- }
-
- for (i = 0; i < cell_count; i += chunk_size) {
- ulint end_index = ut_min(i + chunk_size - 1, cell_count - 1);
-
- /* We release btr_search_latch every once in a while to
- give other queries a chance to run. */
- if (i != 0) {
- rw_lock_x_unlock(&btr_search_latch);
- os_thread_yield();
- rw_lock_x_lock(&btr_search_latch);
- }
-
- if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
- ok = FALSE;
- }
- }
-
- rw_lock_x_unlock(&btr_search_latch);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- return(ok);
-}
diff --git a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
deleted file mode 100644
index 5b4f0ee6ecb..00000000000
--- a/storage/innobase/buf/buf0buf.c
+++ /dev/null
@@ -1,2590 +0,0 @@
-/* Innobase relational database engine; Copyright (C) 2001 Innobase Oy
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License 2
- as published by the Free Software Foundation in June 1991.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License 2
- along with this program (in file COPYING); if not, write to the Free
- Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
-/******************************************************
-The database buffer buf_pool
-
-(c) 1995 Innobase Oy
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
-#include "buf0buf.h"
-
-#ifdef UNIV_NONINL
-#include "buf0buf.ic"
-#endif
-
-#include "mem0mem.h"
-#include "btr0btr.h"
-#include "fil0fil.h"
-#include "lock0lock.h"
-#include "btr0sea.h"
-#include "ibuf0ibuf.h"
-#include "dict0dict.h"
-#include "log0recv.h"
-#include "log0log.h"
-#include "trx0undo.h"
-#include "srv0srv.h"
-
-/*
- IMPLEMENTATION OF THE BUFFER POOL
- =================================
-
-Performance improvement:
-------------------------
-Thread scheduling in NT may be so slow that the OS wait mechanism should
-not be used even in waiting for disk reads to complete.
-Rather, we should put waiting query threads to the queue of
-waiting jobs, and let the OS thread do something useful while the i/o
-is processed. In this way we could remove most OS thread switches in
-an i/o-intensive benchmark like TPC-C.
-
-A possibility is to put a user space thread library between the database
-and NT. User space thread libraries might be very fast.
-
-SQL Server 7.0 can be configured to use 'fibers' which are lightweight
-threads in NT. These should be studied.
-
- Buffer frames and blocks
- ------------------------
-Following the terminology of Gray and Reuter, we call the memory
-blocks where file pages are loaded buffer frames. For each buffer
-frame there is a control block, or shortly, a block, in the buffer
-control array. The control info which does not need to be stored
-in the file along with the file page, resides in the control block.
-
- Buffer pool struct
- ------------------
-The buffer buf_pool contains a single mutex which protects all the
-control data structures of the buf_pool. The content of a buffer frame is
-protected by a separate read-write lock in its control block, though.
-These locks can be locked and unlocked without owning the buf_pool mutex.
-The OS events in the buf_pool struct can be waited for without owning the
-buf_pool mutex.
-
-The buf_pool mutex is a hot-spot in main memory, causing a lot of
-memory bus traffic on multiprocessor systems when processors
-alternately access the mutex. On our Pentium, the mutex is accessed
-maybe every 10 microseconds. We gave up the solution to have mutexes
-for each control block, for instance, because it seemed to be
-complicated.
-
-A solution to reduce mutex contention of the buf_pool mutex is to
-create a separate mutex for the page hash table. On Pentium,
-accessing the hash table takes 2 microseconds, about half
-of the total buf_pool mutex hold time.
-
- Control blocks
- --------------
-
-The control block contains, for instance, the bufferfix count
-which is incremented when a thread wants a file page to be fixed
-in a buffer frame. The bufferfix operation does not lock the
-contents of the frame, however. For this purpose, the control
-block contains a read-write lock.
-
-The buffer frames have to be aligned so that the start memory
-address of a frame is divisible by the universal page size, which
-is a power of two.
-
-We intend to make the buffer buf_pool size on-line reconfigurable,
-that is, the buf_pool size can be changed without closing the database.
-Then the database administarator may adjust it to be bigger
-at night, for example. The control block array must
-contain enough control blocks for the maximum buffer buf_pool size
-which is used in the particular database.
-If the buf_pool size is cut, we exploit the virtual memory mechanism of
-the OS, and just refrain from using frames at high addresses. Then the OS
-can swap them to disk.
-
-The control blocks containing file pages are put to a hash table
-according to the file address of the page.
-We could speed up the access to an individual page by using
-"pointer swizzling": we could replace the page references on
-non-leaf index pages by direct pointers to the page, if it exists
-in the buf_pool. We could make a separate hash table where we could
-chain all the page references in non-leaf pages residing in the buf_pool,
-using the page reference as the hash key,
-and at the time of reading of a page update the pointers accordingly.
-Drawbacks of this solution are added complexity and,
-possibly, extra space required on non-leaf pages for memory pointers.
-A simpler solution is just to speed up the hash table mechanism
-in the database, using tables whose size is a power of 2.
-
- Lists of blocks
- ---------------
-
-There are several lists of control blocks. The free list contains
-blocks which are currently not used.
-
-The LRU-list contains all the blocks holding a file page
-except those for which the bufferfix count is non-zero.
-The pages are in the LRU list roughly in the order of the last
-access to the page, so that the oldest pages are at the end of the
-list. We also keep a pointer to near the end of the LRU list,
-which we can use when we want to artificially age a page in the
-buf_pool. This is used if we know that some page is not needed
-again for some time: we insert the block right after the pointer,
-causing it to be replaced sooner than would noramlly be the case.
-Currently this aging mechanism is used for read-ahead mechanism
-of pages, and it can also be used when there is a scan of a full
-table which cannot fit in the memory. Putting the pages near the
-of the LRU list, we make sure that most of the buf_pool stays in the
-main memory, undisturbed.
-
-The chain of modified blocks contains the blocks
-holding file pages that have been modified in the memory
-but not written to disk yet. The block with the oldest modification
-which has not yet been written to disk is at the end of the chain.
-
- Loading a file page
- -------------------
-
-First, a victim block for replacement has to be found in the
-buf_pool. It is taken from the free list or searched for from the
-end of the LRU-list. An exclusive lock is reserved for the frame,
-the io_fix field is set in the block fixing the block in buf_pool,
-and the io-operation for loading the page is queued. The io-handler thread
-releases the X-lock on the frame and resets the io_fix field
-when the io operation completes.
-
-A thread may request the above operation using the function
-buf_page_get(). It may then continue to request a lock on the frame.
-The lock is granted when the io-handler releases the x-lock.
-
- Read-ahead
- ----------
-
-The read-ahead mechanism is intended to be intelligent and
-isolated from the semantically higher levels of the database
-index management. From the higher level we only need the
-information if a file page has a natural successor or
-predecessor page. On the leaf level of a B-tree index,
-these are the next and previous pages in the natural
-order of the pages.
-
-Let us first explain the read-ahead mechanism when the leafs
-of a B-tree are scanned in an ascending or descending order.
-When a read page is the first time referenced in the buf_pool,
-the buffer manager checks if it is at the border of a so-called
-linear read-ahead area. The tablespace is divided into these
-areas of size 64 blocks, for example. So if the page is at the
-border of such an area, the read-ahead mechanism checks if
-all the other blocks in the area have been accessed in an
-ascending or descending order. If this is the case, the system
-looks at the natural successor or predecessor of the page,
-checks if that is at the border of another area, and in this case
-issues read-requests for all the pages in that area. Maybe
-we could relax the condition that all the pages in the area
-have to be accessed: if data is deleted from a table, there may
-appear holes of unused pages in the area.
-
-A different read-ahead mechanism is used when there appears
-to be a random access pattern to a file.
-If a new page is referenced in the buf_pool, and several pages
-of its random access area (for instance, 32 consecutive pages
-in a tablespace) have recently been referenced, we may predict
-that the whole area may be needed in the near future, and issue
-the read requests for the whole area.
-
- AWE implementation
- ------------------
-
-By a 'block' we mean the buffer header of type buf_block_t. By a 'page'
-we mean the physical 16 kB memory area allocated from RAM for that block.
-By a 'frame' we mean a 16 kB area in the virtual address space of the
-process, in the frame_mem of buf_pool.
-
-We can map pages to the frames of the buffer pool.
-
-1) A buffer block allocated to use as a non-data page, e.g., to the lock
-table, is always mapped to a frame.
-2) A bufferfixed or io-fixed data page is always mapped to a frame.
-3) When we need to map a block to frame, we look from the list
-awe_LRU_free_mapped and try to unmap its last block, but note that
-bufferfixed or io-fixed pages cannot be unmapped.
-4) For every frame in the buffer pool there is always a block whose page is
-mapped to it. When we create the buffer pool, we map the first elements
-in the free list to the frames.
-5) When we have AWE enabled, we disable adaptive hash indexes.
-*/
-
-/* Value in microseconds */
-static const int WAIT_FOR_READ = 20000;
-
-buf_pool_t* buf_pool = NULL; /* The buffer buf_pool of the database */
-
-#ifdef UNIV_DEBUG
-ulint buf_dbg_counter = 0; /* This is used to insert validation
- operations in excution in the
- debug version */
-ibool buf_debug_prints = FALSE; /* If this is set TRUE,
- the program prints info whenever
- read-ahead or flush occurs */
-#endif /* UNIV_DEBUG */
-/************************************************************************
-Calculates a page checksum which is stored to the page when it is written
-to a file. Note that we must be careful to calculate the same value on
-32-bit and 64-bit architectures. */
-
-ulint
-buf_calc_page_new_checksum(
-/*=======================*/
- /* out: checksum */
- byte* page) /* in: buffer page */
-{
- ulint checksum;
-
- /* Since the field FIL_PAGE_FILE_FLUSH_LSN, and in versions <= 4.1.x
- ..._ARCH_LOG_NO, are written outside the buffer pool to the first
- pages of data files, we have to skip them in the page checksum
- calculation.
- We must also skip the field FIL_PAGE_SPACE_OR_CHKSUM where the
- checksum is stored, and also the last 8 bytes of page because
- there we store the old formula checksum. */
-
- checksum = ut_fold_binary(page + FIL_PAGE_OFFSET,
- FIL_PAGE_FILE_FLUSH_LSN - FIL_PAGE_OFFSET)
- + ut_fold_binary(page + FIL_PAGE_DATA,
- UNIV_PAGE_SIZE - FIL_PAGE_DATA
- - FIL_PAGE_END_LSN_OLD_CHKSUM);
- checksum = checksum & 0xFFFFFFFFUL;
-
- return(checksum);
-}
-
-/************************************************************************
-In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only
-looked at the first few bytes of the page. This calculates that old
-checksum.
-NOTE: we must first store the new formula checksum to
-FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
-because this takes that field as an input! */
-
-ulint
-buf_calc_page_old_checksum(
-/*=======================*/
- /* out: checksum */
- byte* page) /* in: buffer page */
-{
- ulint checksum;
-
- checksum = ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN);
-
- checksum = checksum & 0xFFFFFFFFUL;
-
- return(checksum);
-}
-
-/************************************************************************
-Checks if a page is corrupt. */
-
-ibool
-buf_page_is_corrupted(
-/*==================*/
- /* out: TRUE if corrupted */
- byte* read_buf) /* in: a database page */
-{
- ulint checksum;
- ulint old_checksum;
- ulint checksum_field;
- ulint old_checksum_field;
-#ifndef UNIV_HOTBACKUP
- dulint current_lsn;
-#endif
- if (mach_read_from_4(read_buf + FIL_PAGE_LSN + 4)
- != mach_read_from_4(read_buf + UNIV_PAGE_SIZE
- - FIL_PAGE_END_LSN_OLD_CHKSUM + 4)) {
-
- /* Stored log sequence numbers at the start and the end
- of page do not match */
-
- return(TRUE);
- }
-
-#ifndef UNIV_HOTBACKUP
- if (recv_lsn_checks_on && log_peek_lsn(&current_lsn)) {
- if (ut_dulint_cmp(current_lsn,
- mach_read_from_8(read_buf + FIL_PAGE_LSN))
- < 0) {
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Error: page %lu log sequence number"
- " %lu %lu\n"
- "InnoDB: is in the future! Current system "
- "log sequence number %lu %lu.\n"
- "InnoDB: Your database may be corrupt or "
- "you may have copied the InnoDB\n"
- "InnoDB: tablespace but not the InnoDB "
- "log files. See\n"
- "InnoDB: http://dev.mysql.com/doc/refman/"
- "5.1/en/forcing-recovery.html\n"
- "InnoDB: for more information.\n",
- (ulong) mach_read_from_4(read_buf
- + FIL_PAGE_OFFSET),
- (ulong) ut_dulint_get_high
- (mach_read_from_8(read_buf + FIL_PAGE_LSN)),
- (ulong) ut_dulint_get_low
- (mach_read_from_8(read_buf + FIL_PAGE_LSN)),
- (ulong) ut_dulint_get_high(current_lsn),
- (ulong) ut_dulint_get_low(current_lsn));
- }
- }
-#endif
-
- /* If we use checksums validation, make additional check before
- returning TRUE to ensure that the checksum is not equal to
- BUF_NO_CHECKSUM_MAGIC which might be stored by InnoDB with checksums
- disabled. Otherwise, skip checksum calculation and return FALSE */
-
- if (srv_use_checksums) {
- old_checksum = buf_calc_page_old_checksum(read_buf);
-
- old_checksum_field = mach_read_from_4(
- read_buf + UNIV_PAGE_SIZE
- - FIL_PAGE_END_LSN_OLD_CHKSUM);
-
- /* There are 2 valid formulas for old_checksum_field:
-
- 1. Very old versions of InnoDB only stored 8 byte lsn to the
- start and the end of the page.
-
- 2. Newer InnoDB versions store the old formula checksum
- there. */
-
- if (old_checksum_field != mach_read_from_4(read_buf
- + FIL_PAGE_LSN)
- && old_checksum_field != old_checksum
- && old_checksum_field != BUF_NO_CHECKSUM_MAGIC) {
-
- return(TRUE);
- }
-
- checksum = buf_calc_page_new_checksum(read_buf);
- checksum_field = mach_read_from_4(read_buf
- + FIL_PAGE_SPACE_OR_CHKSUM);
-
- /* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id
- (always equal to 0), to FIL_PAGE_SPACE_SPACE_OR_CHKSUM */
-
- if (checksum_field != 0 && checksum_field != checksum
- && checksum_field != BUF_NO_CHECKSUM_MAGIC) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/************************************************************************
-Prints a page to stderr. */
-
-void
-buf_page_print(
-/*===========*/
- byte* read_buf) /* in: a database page */
-{
- dict_index_t* index;
- ulint checksum;
- ulint old_checksum;
-
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Page dump in ascii and hex (%lu bytes):\n",
- (ulint)UNIV_PAGE_SIZE);
- ut_print_buf(stderr, read_buf, UNIV_PAGE_SIZE);
- fputs("InnoDB: End of page dump\n", stderr);
-
- checksum = srv_use_checksums
- ? buf_calc_page_new_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
- old_checksum = srv_use_checksums
- ? buf_calc_page_old_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Page checksum %lu, prior-to-4.0.14-form"
- " checksum %lu\n"
- "InnoDB: stored checksum %lu, prior-to-4.0.14-form"
- " stored checksum %lu\n"
- "InnoDB: Page lsn %lu %lu, low 4 bytes of lsn"
- " at page end %lu\n"
- "InnoDB: Page number (if stored to page already) %lu,\n"
- "InnoDB: space id (if created with >= MySQL-4.1.1"
- " and stored already) %lu\n",
- (ulong) checksum, (ulong) old_checksum,
- (ulong) mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
- (ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
- - FIL_PAGE_END_LSN_OLD_CHKSUM),
- (ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN),
- (ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN + 4),
- (ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
- - FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
- (ulong) mach_read_from_4(read_buf + FIL_PAGE_OFFSET),
- (ulong) mach_read_from_4(read_buf
- + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
-
- if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE)
- == TRX_UNDO_INSERT) {
- fprintf(stderr,
- "InnoDB: Page may be an insert undo log page\n");
- } else if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_TYPE)
- == TRX_UNDO_UPDATE) {
- fprintf(stderr,
- "InnoDB: Page may be an update undo log page\n");
- }
-
- switch (fil_page_get_type(read_buf)) {
- case FIL_PAGE_INDEX:
- fprintf(stderr,
- "InnoDB: Page may be an index page where"
- " index id is %lu %lu\n",
- (ulong) ut_dulint_get_high
- (btr_page_get_index_id(read_buf)),
- (ulong) ut_dulint_get_low
- (btr_page_get_index_id(read_buf)));
-
- /* If the code is in ibbackup, dict_sys may be uninitialized,
- i.e., NULL */
-
- if (dict_sys != NULL) {
-
- index = dict_index_find_on_id_low(
- btr_page_get_index_id(read_buf));
- if (index) {
- fputs("InnoDB: (", stderr);
- dict_index_name_print(stderr, NULL, index);
- fputs(")\n", stderr);
- }
- }
- break;
- case FIL_PAGE_INODE:
- fputs("InnoDB: Page may be an 'inode' page\n", stderr);
- break;
- case FIL_PAGE_IBUF_FREE_LIST:
- fputs("InnoDB: Page may be an insert buffer free list page\n",
- stderr);
- break;
- case FIL_PAGE_TYPE_ALLOCATED:
- fputs("InnoDB: Page may be a freshly allocated page\n",
- stderr);
- break;
- case FIL_PAGE_IBUF_BITMAP:
- fputs("InnoDB: Page may be an insert buffer bitmap page\n",
- stderr);
- break;
- case FIL_PAGE_TYPE_SYS:
- fputs("InnoDB: Page may be a system page\n",
- stderr);
- break;
- case FIL_PAGE_TYPE_TRX_SYS:
- fputs("InnoDB: Page may be a transaction system page\n",
- stderr);
- break;
- case FIL_PAGE_TYPE_FSP_HDR:
- fputs("InnoDB: Page may be a file space header page\n",
- stderr);
- break;
- case FIL_PAGE_TYPE_XDES:
- fputs("InnoDB: Page may be an extent descriptor page\n",
- stderr);
- break;
- case FIL_PAGE_TYPE_BLOB:
- fputs("InnoDB: Page may be a BLOB page\n",
- stderr);
- break;
- }
-}
-
-/************************************************************************
-Initializes a buffer control block when the buf_pool is created. */
-static
-void
-buf_block_init(
-/*===========*/
- buf_block_t* block, /* in: pointer to control block */
- byte* frame) /* in: pointer to buffer frame, or NULL if in
- the case of AWE there is no frame */
-{
- block->magic_n = 0;
-
- block->state = BUF_BLOCK_NOT_USED;
-
- block->frame = frame;
-
- block->awe_info = NULL;
-
- block->buf_fix_count = 0;
- block->io_fix = 0;
-
- block->modify_clock = ut_dulint_zero;
-
- block->file_page_was_freed = FALSE;
-
- block->check_index_page_at_flush = FALSE;
- block->index = NULL;
-
- block->in_free_list = FALSE;
- block->in_LRU_list = FALSE;
-
- block->n_pointers = 0;
-
- mutex_create(&block->mutex, SYNC_BUF_BLOCK);
-
- rw_lock_create(&block->lock, SYNC_LEVEL_VARYING);
- ut_ad(rw_lock_validate(&(block->lock)));
-
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_create(&block->debug_latch, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
-}
-
-/************************************************************************
-Creates the buffer pool. */
-
-buf_pool_t*
-buf_pool_init(
-/*==========*/
- /* out, own: buf_pool object, NULL if not
- enough memory or error */
- ulint max_size, /* in: maximum size of the buf_pool in
- blocks */
- ulint curr_size, /* in: current size to use, must be <=
- max_size, currently must be equal to
- max_size */
- ulint n_frames) /* in: number of frames; if AWE is used,
- this is the size of the address space window
- where physical memory pages are mapped; if
- AWE is not used then this must be the same
- as max_size */
-{
- byte* frame;
- ulint i;
- buf_block_t* block;
-
- ut_a(max_size == curr_size);
- ut_a(srv_use_awe || n_frames == max_size);
-
- if (n_frames > curr_size) {
- fprintf(stderr,
- "InnoDB: AWE: Error: you must specify in my.cnf"
- " .._awe_mem_mb larger\n"
- "InnoDB: than .._buffer_pool_size. Now the former"
- " is %lu pages,\n"
- "InnoDB: the latter %lu pages.\n",
- (ulong) curr_size, (ulong) n_frames);
-
- return(NULL);
- }
-
- buf_pool = mem_alloc(sizeof(buf_pool_t));
-
- /* 1. Initialize general fields
- ---------------------------- */
- mutex_create(&buf_pool->mutex, SYNC_BUF_POOL);
-
- mutex_enter(&(buf_pool->mutex));
-
- if (srv_use_awe) {
- /*----------------------------------------*/
- /* Allocate the virtual address space window, i.e., the
- buffer pool frames */
-
- buf_pool->frame_mem = os_awe_allocate_virtual_mem_window(
- UNIV_PAGE_SIZE * (n_frames + 1));
-
- /* Allocate the physical memory for AWE and the AWE info array
- for buf_pool */
-
- if ((curr_size % ((1024 * 1024) / UNIV_PAGE_SIZE)) != 0) {
-
- fprintf(stderr,
- "InnoDB: AWE: Error: physical memory must be"
- " allocated in full megabytes.\n"
- "InnoDB: Trying to allocate %lu"
- " database pages.\n",
- (ulong) curr_size);
-
- return(NULL);
- }
-
- if (!os_awe_allocate_physical_mem(&(buf_pool->awe_info),
- curr_size
- / ((1024 * 1024)
- / UNIV_PAGE_SIZE))) {
-
- return(NULL);
- }
- /*----------------------------------------*/
- } else {
- buf_pool->frame_mem = os_mem_alloc_large(
- UNIV_PAGE_SIZE * (n_frames + 1), TRUE, FALSE);
- }
-
- if (buf_pool->frame_mem == NULL) {
-
- return(NULL);
- }
-
- buf_pool->blocks = ut_malloc(sizeof(buf_block_t) * max_size);
-
- if (buf_pool->blocks == NULL) {
-
- return(NULL);
- }
-
- buf_pool->max_size = max_size;
- buf_pool->curr_size = curr_size;
-
- buf_pool->n_frames = n_frames;
-
- /* Align pointer to the first frame */
-
- frame = ut_align(buf_pool->frame_mem, UNIV_PAGE_SIZE);
-
- buf_pool->frame_zero = frame;
- buf_pool->high_end = frame + UNIV_PAGE_SIZE * n_frames;
-
- if (srv_use_awe) {
- /*----------------------------------------*/
- /* Map an initial part of the allocated physical memory to
- the window */
-
- os_awe_map_physical_mem_to_window(buf_pool->frame_zero,
- n_frames
- * (UNIV_PAGE_SIZE
- / OS_AWE_X86_PAGE_SIZE),
- buf_pool->awe_info);
- /*----------------------------------------*/
- }
-
- buf_pool->blocks_of_frames = ut_malloc(sizeof(void*) * n_frames);
-
- if (buf_pool->blocks_of_frames == NULL) {
-
- return(NULL);
- }
-
- /* Init block structs and assign frames for them; in the case of
- AWE there are less frames than blocks. Then we assign the frames
- to the first blocks (we already mapped the memory above). We also
- init the awe_info for every block. */
-
- for (i = 0; i < max_size; i++) {
-
- block = buf_pool_get_nth_block(buf_pool, i);
-
- if (i < n_frames) {
- frame = buf_pool->frame_zero + i * UNIV_PAGE_SIZE;
- *(buf_pool->blocks_of_frames + i) = block;
- } else {
- frame = NULL;
- }
-
- buf_block_init(block, frame);
-
- if (srv_use_awe) {
- /*----------------------------------------*/
- block->awe_info = buf_pool->awe_info
- + i * (UNIV_PAGE_SIZE / OS_AWE_X86_PAGE_SIZE);
- /*----------------------------------------*/
- }
- }
-
- buf_pool->page_hash = hash_create(2 * max_size);
-
- buf_pool->n_pend_reads = 0;
-
- buf_pool->last_printout_time = time(NULL);
-
- buf_pool->n_pages_read = 0;
- buf_pool->n_pages_written = 0;
- buf_pool->n_pages_created = 0;
- buf_pool->n_pages_awe_remapped = 0;
-
- buf_pool->n_page_gets = 0;
- buf_pool->n_page_gets_old = 0;
- buf_pool->n_pages_read_old = 0;
- buf_pool->n_pages_written_old = 0;
- buf_pool->n_pages_created_old = 0;
- buf_pool->n_pages_awe_remapped_old = 0;
-
- /* 2. Initialize flushing fields
- ---------------------------- */
- UT_LIST_INIT(buf_pool->flush_list);
-
- for (i = BUF_FLUSH_LRU; i <= BUF_FLUSH_LIST; i++) {
- buf_pool->n_flush[i] = 0;
- buf_pool->init_flush[i] = FALSE;
- buf_pool->no_flush[i] = os_event_create(NULL);
- }
-
- buf_pool->LRU_flush_ended = 0;
-
- buf_pool->ulint_clock = 1;
- buf_pool->freed_page_clock = 0;
-
- /* 3. Initialize LRU fields
- ---------------------------- */
- UT_LIST_INIT(buf_pool->LRU);
-
- buf_pool->LRU_old = NULL;
-
- UT_LIST_INIT(buf_pool->awe_LRU_free_mapped);
-
- /* Add control blocks to the free list */
- UT_LIST_INIT(buf_pool->free);
-
- for (i = 0; i < curr_size; i++) {
-
- block = buf_pool_get_nth_block(buf_pool, i);
-
- if (block->frame) {
- /* Wipe contents of frame to eliminate a Purify
- warning */
-
-#ifdef HAVE_purify
- memset(block->frame, '\0', UNIV_PAGE_SIZE);
-#endif
- if (srv_use_awe) {
- /* Add to the list of blocks mapped to
- frames */
-
- UT_LIST_ADD_LAST(awe_LRU_free_mapped,
- buf_pool->awe_LRU_free_mapped,
- block);
- }
- }
-
- UT_LIST_ADD_LAST(free, buf_pool->free, block);
- block->in_free_list = TRUE;
- }
-
- mutex_exit(&(buf_pool->mutex));
-
- if (srv_use_adaptive_hash_indexes) {
- btr_search_sys_create(curr_size * UNIV_PAGE_SIZE
- / sizeof(void*) / 64);
- } else {
- /* Create only a small dummy system */
- btr_search_sys_create(1000);
- }
-
- return(buf_pool);
-}
-
-/************************************************************************
-Maps the page of block to a frame, if not mapped yet. Unmaps some page
-from the end of the awe_LRU_free_mapped. */
-
-void
-buf_awe_map_page_to_frame(
-/*======================*/
- buf_block_t* block, /* in: block whose page should be
- mapped to a frame */
- ibool add_to_mapped_list) /* in: TRUE if we in the case
- we need to map the page should also
- add the block to the
- awe_LRU_free_mapped list */
-{
- buf_block_t* bck;
-
- ut_ad(mutex_own(&(buf_pool->mutex)));
- ut_ad(block);
-
- if (block->frame) {
-
- return;
- }
-
- /* Scan awe_LRU_free_mapped from the end and try to find a block
- which is not bufferfixed or io-fixed */
-
- bck = UT_LIST_GET_LAST(buf_pool->awe_LRU_free_mapped);
-
- while (bck) {
- ibool skip;
-
- mutex_enter(&bck->mutex);
-
- skip = (bck->state == BUF_BLOCK_FILE_PAGE
- && (bck->buf_fix_count != 0 || bck->io_fix != 0));
-
- if (skip) {
- mutex_exit(&bck->mutex);
-
- /* We have to skip this */
- bck = UT_LIST_GET_PREV(awe_LRU_free_mapped, bck);
- } else {
- /* We can map block to the frame of bck */
-
- os_awe_map_physical_mem_to_window(
- bck->frame,
- UNIV_PAGE_SIZE / OS_AWE_X86_PAGE_SIZE,
- block->awe_info);
-
- block->frame = bck->frame;
-
- *(buf_pool->blocks_of_frames
- + (((ulint)(block->frame
- - buf_pool->frame_zero))
- >> UNIV_PAGE_SIZE_SHIFT))
- = block;
-
- bck->frame = NULL;
- UT_LIST_REMOVE(awe_LRU_free_mapped,
- buf_pool->awe_LRU_free_mapped,
- bck);
-
- if (add_to_mapped_list) {
- UT_LIST_ADD_FIRST(
- awe_LRU_free_mapped,
- buf_pool->awe_LRU_free_mapped,
- block);
- }
-
- buf_pool->n_pages_awe_remapped++;
-
- mutex_exit(&bck->mutex);
-
- return;
- }
- }
-
- fprintf(stderr,
- "InnoDB: AWE: Fatal error: cannot find a page to unmap\n"
- "InnoDB: awe_LRU_free_mapped list length %lu\n",
- (ulong) UT_LIST_GET_LEN(buf_pool->awe_LRU_free_mapped));
-
- ut_a(0);
-}
-
-/************************************************************************
-Allocates a buffer block. */
-UNIV_INLINE
-buf_block_t*
-buf_block_alloc(void)
-/*=================*/
- /* out, own: the allocated block; also if AWE
- is used it is guaranteed that the page is
- mapped to a frame */
-{
- buf_block_t* block;
-
- block = buf_LRU_get_free_block();
-
- return(block);
-}
-
-/************************************************************************
-Moves to the block to the start of the LRU list if there is a danger
-that the block would drift out of the buffer pool. */
-UNIV_INLINE
-void
-buf_block_make_young(
-/*=================*/
- buf_block_t* block) /* in: block to make younger */
-{
- ut_ad(!mutex_own(&(buf_pool->mutex)));
-
- /* Note that we read freed_page_clock's without holding any mutex:
- this is allowed since the result is used only in heuristics */
-
- if (buf_block_peek_if_too_old(block)) {
-
- mutex_enter(&buf_pool->mutex);
- /* There has been freeing activity in the LRU list:
- best to move to the head of the LRU list */
-
- buf_LRU_make_block_young(block);
- mutex_exit(&buf_pool->mutex);
- }
-}
-
-/************************************************************************
-Moves a page to the start of the buffer pool LRU list. This high-level
-function can be used to prevent an important page from from slipping out of
-the buffer pool. */
-
-void
-buf_page_make_young(
-/*================*/
- buf_frame_t* frame) /* in: buffer frame of a file page */
-{
- buf_block_t* block;
-
- mutex_enter(&(buf_pool->mutex));
-
- block = buf_block_align(frame);
-
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-
- buf_LRU_make_block_young(block);
-
- mutex_exit(&(buf_pool->mutex));
-}
-
-/************************************************************************
-Frees a buffer block which does not contain a file page. */
-UNIV_INLINE
-void
-buf_block_free(
-/*===========*/
- buf_block_t* block) /* in, own: block to be freed */
-{
- mutex_enter(&(buf_pool->mutex));
-
- mutex_enter(&block->mutex);
-
- ut_a(block->state != BUF_BLOCK_FILE_PAGE);
-
- buf_LRU_block_free_non_file_page(block);
-
- mutex_exit(&block->mutex);
-
- mutex_exit(&(buf_pool->mutex));
-}
-
-/*************************************************************************
-Allocates a buffer frame. */
-
-buf_frame_t*
-buf_frame_alloc(void)
-/*=================*/
- /* out: buffer frame */
-{
- return(buf_block_alloc()->frame);
-}
-
-/*************************************************************************
-Frees a buffer frame which does not contain a file page. */
-
-void
-buf_frame_free(
-/*===========*/
- buf_frame_t* frame) /* in: buffer frame */
-{
- buf_block_free(buf_block_align(frame));
-}
-
-/************************************************************************
-Returns the buffer control block if the page can be found in the buffer
-pool. NOTE that it is possible that the page is not yet read
-from disk, though. This is a very low-level function: use with care! */
-
-buf_block_t*
-buf_page_peek_block(
-/*================*/
- /* out: control block if found from page hash table,
- otherwise NULL; NOTE that the page is not necessarily
- yet read from disk! */
- ulint space, /* in: space id */
- ulint offset) /* in: page number */
-{
- buf_block_t* block;
-
- mutex_enter_fast(&(buf_pool->mutex));
-
- block = buf_page_hash_get(space, offset);
-
- mutex_exit(&(buf_pool->mutex));
-
- return(block);
-}
-
-/************************************************************************
-Resets the check_index_page_at_flush field of a page if found in the buffer
-pool. */
-
-void
-buf_reset_check_index_page_at_flush(
-/*================================*/
- ulint space, /* in: space id */
- ulint offset) /* in: page number */
-{
- buf_block_t* block;
-
- mutex_enter_fast(&(buf_pool->mutex));
-
- block = buf_page_hash_get(space, offset);
-
- if (block) {
- block->check_index_page_at_flush = FALSE;
- }
-
- mutex_exit(&(buf_pool->mutex));
-}
-
-/************************************************************************
-Returns the current state of is_hashed of a page. FALSE if the page is
-not in the pool. NOTE that this operation does not fix the page in the
-pool if it is found there. */
-
-ibool
-buf_page_peek_if_search_hashed(
-/*===========================*/
- /* out: TRUE if page hash index is built in search
- system */
- ulint space, /* in: space id */
- ulint offset) /* in: page number */
-{
- buf_block_t* block;
- ibool is_hashed;
-
- mutex_enter_fast(&(buf_pool->mutex));
-
- block = buf_page_hash_get(space, offset);
-
- if (!block) {
- is_hashed = FALSE;
- } else {
- is_hashed = block->is_hashed;
- }
-
- mutex_exit(&(buf_pool->mutex));
-
- return(is_hashed);
-}
-
-/************************************************************************
-Returns TRUE if the page can be found in the buffer pool hash table. NOTE
-that it is possible that the page is not yet read from disk, though. */
-
-ibool
-buf_page_peek(
-/*==========*/
- /* out: TRUE if found from page hash table,
- NOTE that the page is not necessarily yet read
- from disk! */
- ulint space, /* in: space id */
- ulint offset) /* in: page number */
-{
- if (buf_page_peek_block(space, offset)) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/************************************************************************
-Sets file_page_was_freed TRUE if the page is found in the buffer pool.
-This function should be called when we free a file page and want the
-debug version to check that it is not accessed any more unless
-reallocated. */
-
-buf_block_t*
-buf_page_set_file_page_was_freed(
-/*=============================*/
- /* out: control block if found from page hash table,
- otherwise NULL */
- ulint space, /* in: space id */
- ulint offset) /* in: page number */
-{
- buf_block_t* block;
-
- mutex_enter_fast(&(buf_pool->mutex));
-
- block = buf_page_hash_get(space, offset);
-
- if (block) {
- block->file_page_was_freed = TRUE;
- }
-
- mutex_exit(&(buf_pool->mutex));
-
- return(block);
-}
-
-/************************************************************************
-Sets file_page_was_freed FALSE if the page is found in the buffer pool.
-This function should be called when we free a file page and want the
-debug version to check that it is not accessed any more unless
-reallocated. */
-
-buf_block_t*
-buf_page_reset_file_page_was_freed(
-/*===============================*/
- /* out: control block if found from page hash table,
- otherwise NULL */
- ulint space, /* in: space id */
- ulint offset) /* in: page number */
-{
- buf_block_t* block;
-
- mutex_enter_fast(&(buf_pool->mutex));
-
- block = buf_page_hash_get(space, offset);
-
- if (block) {
- block->file_page_was_freed = FALSE;
- }
-
- mutex_exit(&(buf_pool->mutex));
-
- return(block);
-}
-
-/************************************************************************
-This is the general function used to get access to a database page. */
-
-buf_frame_t*
-buf_page_get_gen(
-/*=============*/
- /* out: pointer to the frame or NULL */
- ulint space, /* in: space id */
- ulint offset, /* in: page number */
- ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
- buf_frame_t* guess, /* in: guessed frame or NULL */
- ulint mode, /* in: BUF_GET, BUF_GET_IF_IN_POOL,
- BUF_GET_NO_LATCH, BUF_GET_NOWAIT */
- const char* file, /* in: file name */
- ulint line, /* in: line where called */
- mtr_t* mtr) /* in: mini-transaction */
-{
- buf_block_t* block;
- ibool accessed;
- ulint fix_type;
- ibool success;
- ibool must_read;
-
- ut_ad(mtr);
- ut_ad((rw_latch == RW_S_LATCH)
- || (rw_latch == RW_X_LATCH)
- || (rw_latch == RW_NO_LATCH));
- ut_ad((mode != BUF_GET_NO_LATCH) || (rw_latch == RW_NO_LATCH));
- ut_ad((mode == BUF_GET) || (mode == BUF_GET_IF_IN_POOL)
- || (mode == BUF_GET_NO_LATCH) || (mode == BUF_GET_NOWAIT));
-#ifndef UNIV_LOG_DEBUG
- ut_ad(!ibuf_inside() || ibuf_page(space, offset));
-#endif
- buf_pool->n_page_gets++;
-loop:
- block = NULL;
- mutex_enter_fast(&(buf_pool->mutex));
-
- if (guess) {
- block = buf_block_align(guess);
-
- if ((offset != block->offset) || (space != block->space)
- || (block->state != BUF_BLOCK_FILE_PAGE)) {
-
- block = NULL;
- }
- }
-
- if (block == NULL) {
- block = buf_page_hash_get(space, offset);
- }
-
- if (block == NULL) {
- /* Page not in buf_pool: needs to be read from file */
-
- mutex_exit(&(buf_pool->mutex));
-
- if (mode == BUF_GET_IF_IN_POOL) {
-
- return(NULL);
- }
-
- buf_read_page(space, offset);
-
-#ifdef UNIV_DEBUG
- buf_dbg_counter++;
-
- if (buf_dbg_counter % 37 == 0) {
- ut_ad(buf_validate());
- }
-#endif
- goto loop;
- }
-
- mutex_enter(&block->mutex);
-
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-
- must_read = FALSE;
-
- if (block->io_fix == BUF_IO_READ) {
-
- must_read = TRUE;
-
- if (mode == BUF_GET_IF_IN_POOL) {
- /* The page is only being read to buffer */
- mutex_exit(&buf_pool->mutex);
- mutex_exit(&block->mutex);
-
- return(NULL);
- }
- }
-
- /* If AWE is enabled and the page is not mapped to a frame, then
- map it */
-
- if (block->frame == NULL) {
- ut_a(srv_use_awe);
-
- /* We set second parameter TRUE because the block is in the
- LRU list and we must put it to awe_LRU_free_mapped list once
- mapped to a frame */
-
- buf_awe_map_page_to_frame(block, TRUE);
- }
-
-#ifdef UNIV_SYNC_DEBUG
- buf_block_buf_fix_inc_debug(block, file, line);
-#else
- buf_block_buf_fix_inc(block);
-#endif
- mutex_exit(&buf_pool->mutex);
-
- /* Check if this is the first access to the page */
-
- accessed = block->accessed;
-
- block->accessed = TRUE;
-
- mutex_exit(&block->mutex);
-
- buf_block_make_young(block);
-
-#ifdef UNIV_DEBUG_FILE_ACCESSES
- ut_a(block->file_page_was_freed == FALSE);
-#endif
-
-#ifdef UNIV_DEBUG
- buf_dbg_counter++;
-
- if (buf_dbg_counter % 5771 == 0) {
- ut_ad(buf_validate());
- }
-#endif
- ut_ad(block->buf_fix_count > 0);
- ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
-
- if (mode == BUF_GET_NOWAIT) {
- if (rw_latch == RW_S_LATCH) {
- success = rw_lock_s_lock_nowait(&(block->lock),
- file, line);
- fix_type = MTR_MEMO_PAGE_S_FIX;
- } else {
- ut_ad(rw_latch == RW_X_LATCH);
- success = rw_lock_x_lock_func_nowait(&(block->lock),
- file, line);
- fix_type = MTR_MEMO_PAGE_X_FIX;
- }
-
- if (!success) {
- mutex_enter(&block->mutex);
-
- block->buf_fix_count--;
-
- mutex_exit(&block->mutex);
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_s_unlock(&(block->debug_latch));
-#endif
-
- return(NULL);
- }
- } else if (rw_latch == RW_NO_LATCH) {
-
- if (must_read) {
- /* Let us wait until the read operation
- completes */
-
- for (;;) {
- mutex_enter(&block->mutex);
-
- if (block->io_fix == BUF_IO_READ) {
-
- mutex_exit(&block->mutex);
-
- os_thread_sleep(WAIT_FOR_READ);
- } else {
-
- mutex_exit(&block->mutex);
-
- break;
- }
- }
- }
-
- fix_type = MTR_MEMO_BUF_FIX;
- } else if (rw_latch == RW_S_LATCH) {
-
- rw_lock_s_lock_func(&(block->lock), 0, file, line);
-
- fix_type = MTR_MEMO_PAGE_S_FIX;
- } else {
- rw_lock_x_lock_func(&(block->lock), 0, file, line);
-
- fix_type = MTR_MEMO_PAGE_X_FIX;
- }
-
- mtr_memo_push(mtr, block, fix_type);
-
- if (!accessed) {
- /* In the case of a first access, try to apply linear
- read-ahead */
-
- buf_read_ahead_linear(space, offset);
- }
-
-#ifdef UNIV_IBUF_DEBUG
- ut_a(ibuf_count_get(block->space, block->offset) == 0);
-#endif
- return(block->frame);
-}
-
-/************************************************************************
-This is the general function used to get optimistic access to a database
-page. */
-
-ibool
-buf_page_optimistic_get_func(
-/*=========================*/
- /* out: TRUE if success */
- ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
- buf_block_t* block, /* in: guessed buffer block */
- buf_frame_t* guess, /* in: guessed frame; note that AWE may move
- frames */
- dulint modify_clock,/* in: modify clock value if mode is
- ..._GUESS_ON_CLOCK */
- const char* file, /* in: file name */
- ulint line, /* in: line where called */
- mtr_t* mtr) /* in: mini-transaction */
-{
- ibool accessed;
- ibool success;
- ulint fix_type;
-
- ut_ad(mtr && block);
- ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
-
- /* If AWE is used, block may have a different frame now, e.g., NULL */
-
- mutex_enter(&block->mutex);
-
- if (UNIV_UNLIKELY(block->state != BUF_BLOCK_FILE_PAGE)
- || UNIV_UNLIKELY(block->frame != guess)) {
-
- mutex_exit(&block->mutex);
-
- return(FALSE);
- }
-
-#ifdef UNIV_SYNC_DEBUG
- buf_block_buf_fix_inc_debug(block, file, line);
-#else
- buf_block_buf_fix_inc(block);
-#endif
- accessed = block->accessed;
- block->accessed = TRUE;
-
- mutex_exit(&block->mutex);
-
- buf_block_make_young(block);
-
- /* Check if this is the first access to the page */
-
- ut_ad(!ibuf_inside() || ibuf_page(block->space, block->offset));
-
- if (rw_latch == RW_S_LATCH) {
- success = rw_lock_s_lock_nowait(&(block->lock),
- file, line);
- fix_type = MTR_MEMO_PAGE_S_FIX;
- } else {
- success = rw_lock_x_lock_func_nowait(&(block->lock),
- file, line);
- fix_type = MTR_MEMO_PAGE_X_FIX;
- }
-
- if (UNIV_UNLIKELY(!success)) {
- mutex_enter(&block->mutex);
-
- block->buf_fix_count--;
-
- mutex_exit(&block->mutex);
-
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_s_unlock(&(block->debug_latch));
-#endif
- return(FALSE);
- }
-
- if (UNIV_UNLIKELY(!UT_DULINT_EQ(modify_clock, block->modify_clock))) {
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(block->frame, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
- if (rw_latch == RW_S_LATCH) {
- rw_lock_s_unlock(&(block->lock));
- } else {
- rw_lock_x_unlock(&(block->lock));
- }
-
- mutex_enter(&block->mutex);
-
- block->buf_fix_count--;
-
- mutex_exit(&block->mutex);
-
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_s_unlock(&(block->debug_latch));
-#endif
- return(FALSE);
- }
-
- mtr_memo_push(mtr, block, fix_type);
-
-#ifdef UNIV_DEBUG
- buf_dbg_counter++;
-
- if (buf_dbg_counter % 5771 == 0) {
- ut_ad(buf_validate());
- }
-#endif
- ut_ad(block->buf_fix_count > 0);
- ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
-
-#ifdef UNIV_DEBUG_FILE_ACCESSES
- ut_a(block->file_page_was_freed == FALSE);
-#endif
- if (UNIV_UNLIKELY(!accessed)) {
- /* In the case of a first access, try to apply linear
- read-ahead */
-
- buf_read_ahead_linear(buf_frame_get_space_id(guess),
- buf_frame_get_page_no(guess));
- }
-
-#ifdef UNIV_IBUF_DEBUG
- ut_a(ibuf_count_get(block->space, block->offset) == 0);
-#endif
- buf_pool->n_page_gets++;
-
- return(TRUE);
-}
-
-/************************************************************************
-This is used to get access to a known database page, when no waiting can be
-done. For example, if a search in an adaptive hash index leads us to this
-frame. */
-
-ibool
-buf_page_get_known_nowait(
-/*======================*/
- /* out: TRUE if success */
- ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
- buf_frame_t* guess, /* in: the known page frame */
- ulint mode, /* in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
- const char* file, /* in: file name */
- ulint line, /* in: line where called */
- mtr_t* mtr) /* in: mini-transaction */
-{
- buf_block_t* block;
- ibool success;
- ulint fix_type;
-
- ut_ad(mtr);
- ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
-
- block = buf_block_align(guess);
-
- mutex_enter(&block->mutex);
-
- if (block->state == BUF_BLOCK_REMOVE_HASH) {
- /* Another thread is just freeing the block from the LRU list
- of the buffer pool: do not try to access this page; this
- attempt to access the page can only come through the hash
- index because when the buffer block state is ..._REMOVE_HASH,
- we have already removed it from the page address hash table
- of the buffer pool. */
-
- mutex_exit(&block->mutex);
-
- return(FALSE);
- }
-
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-
-#ifdef UNIV_SYNC_DEBUG
- buf_block_buf_fix_inc_debug(block, file, line);
-#else
- buf_block_buf_fix_inc(block);
-#endif
- mutex_exit(&block->mutex);
-
- if (mode == BUF_MAKE_YOUNG) {
- buf_block_make_young(block);
- }
-
- ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
-
- if (rw_latch == RW_S_LATCH) {
- success = rw_lock_s_lock_nowait(&(block->lock),
- file, line);
- fix_type = MTR_MEMO_PAGE_S_FIX;
- } else {
- success = rw_lock_x_lock_func_nowait(&(block->lock),
- file, line);
- fix_type = MTR_MEMO_PAGE_X_FIX;
- }
-
- if (!success) {
- mutex_enter(&block->mutex);
-
- block->buf_fix_count--;
-
- mutex_exit(&block->mutex);
-
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_s_unlock(&(block->debug_latch));
-#endif
-
- return(FALSE);
- }
-
- mtr_memo_push(mtr, block, fix_type);
-
-#ifdef UNIV_DEBUG
- buf_dbg_counter++;
-
- if (buf_dbg_counter % 5771 == 0) {
- ut_ad(buf_validate());
- }
-#endif
- ut_ad(block->buf_fix_count > 0);
- ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
-#ifdef UNIV_DEBUG_FILE_ACCESSES
- ut_a(block->file_page_was_freed == FALSE);
-#endif
-
-#ifdef UNIV_IBUF_DEBUG
- ut_a((mode == BUF_KEEP_OLD)
- || (ibuf_count_get(block->space, block->offset) == 0));
-#endif
- buf_pool->n_page_gets++;
-
- return(TRUE);
-}
-
-/************************************************************************
-Inits a page to the buffer buf_pool, for use in ibbackup --restore. */
-
-void
-buf_page_init_for_backup_restore(
-/*=============================*/
- ulint space, /* in: space id */
- ulint offset, /* in: offset of the page within space
- in units of a page */
- buf_block_t* block) /* in: block to init */
-{
- /* Set the state of the block */
- block->magic_n = BUF_BLOCK_MAGIC_N;
-
- block->state = BUF_BLOCK_FILE_PAGE;
- block->space = space;
- block->offset = offset;
-
- block->lock_hash_val = 0;
-
- block->freed_page_clock = 0;
-
- block->newest_modification = ut_dulint_zero;
- block->oldest_modification = ut_dulint_zero;
-
- block->accessed = FALSE;
- block->buf_fix_count = 0;
- block->io_fix = 0;
-
- block->n_hash_helps = 0;
- block->is_hashed = FALSE;
- block->n_fields = 1;
- block->n_bytes = 0;
- block->left_side = TRUE;
-
- block->file_page_was_freed = FALSE;
-}
-
-/************************************************************************
-Inits a page to the buffer buf_pool. */
-static
-void
-buf_page_init(
-/*==========*/
- ulint space, /* in: space id */
- ulint offset, /* in: offset of the page within space
- in units of a page */
- buf_block_t* block) /* in: block to init */
-{
-
- ut_ad(mutex_own(&(buf_pool->mutex)));
- ut_ad(mutex_own(&(block->mutex)));
- ut_a(block->state != BUF_BLOCK_FILE_PAGE);
-
- /* Set the state of the block */
- block->magic_n = BUF_BLOCK_MAGIC_N;
-
- block->state = BUF_BLOCK_FILE_PAGE;
- block->space = space;
- block->offset = offset;
-
- block->check_index_page_at_flush = FALSE;
- block->index = NULL;
-
- block->lock_hash_val = lock_rec_hash(space, offset);
-
-#ifdef UNIV_DEBUG_VALGRIND
- if (!space) {
- /* Silence valid Valgrind warnings about uninitialized
- data being written to data files. There are some unused
- bytes on some pages that InnoDB does not initialize. */
- UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE);
- }
-#endif /* UNIV_DEBUG_VALGRIND */
-
- /* Insert into the hash table of file pages */
-
- if (buf_page_hash_get(space, offset)) {
- fprintf(stderr,
- "InnoDB: Error: page %lu %lu already found"
- " in the hash table\n",
- (ulong) space,
- (ulong) offset);
-#ifdef UNIV_DEBUG
- buf_print();
- buf_LRU_print();
- buf_validate();
- buf_LRU_validate();
-#endif /* UNIV_DEBUG */
- ut_a(0);
- }
-
- HASH_INSERT(buf_block_t, hash, buf_pool->page_hash,
- buf_page_address_fold(space, offset), block);
-
- block->freed_page_clock = 0;
-
- block->newest_modification = ut_dulint_zero;
- block->oldest_modification = ut_dulint_zero;
-
- block->accessed = FALSE;
- block->buf_fix_count = 0;
- block->io_fix = 0;
-
- block->n_hash_helps = 0;
- block->is_hashed = FALSE;
- block->n_fields = 1;
- block->n_bytes = 0;
- block->left_side = TRUE;
-
- block->file_page_was_freed = FALSE;
-}
-
-/************************************************************************
-Function which inits a page for read to the buffer buf_pool. If the page is
-(1) already in buf_pool, or
-(2) if we specify to read only ibuf pages and the page is not an ibuf page, or
-(3) if the space is deleted or being deleted,
-then this function does nothing.
-Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
-on the buffer frame. The io-handler must take care that the flag is cleared
-and the lock released later. This is one of the functions which perform the
-state transition NOT_USED => FILE_PAGE to a block (the other is
-buf_page_create). */
-
-buf_block_t*
-buf_page_init_for_read(
-/*===================*/
- /* out: pointer to the block or NULL */
- ulint* err, /* out: DB_SUCCESS or DB_TABLESPACE_DELETED */
- ulint mode, /* in: BUF_READ_IBUF_PAGES_ONLY, ... */
- ulint space, /* in: space id */
- ib_longlong tablespace_version,/* in: prevents reading from a wrong
- version of the tablespace in case we have done
- DISCARD + IMPORT */
- ulint offset) /* in: page number */
-{
- buf_block_t* block;
- mtr_t mtr;
-
- ut_ad(buf_pool);
-
- *err = DB_SUCCESS;
-
- if (mode == BUF_READ_IBUF_PAGES_ONLY) {
- /* It is a read-ahead within an ibuf routine */
-
- ut_ad(!ibuf_bitmap_page(offset));
- ut_ad(ibuf_inside());
-
- mtr_start(&mtr);
-
- if (!ibuf_page_low(space, offset, &mtr)) {
-
- mtr_commit(&mtr);
-
- return(NULL);
- }
- } else {
- ut_ad(mode == BUF_READ_ANY_PAGE);
- }
-
- block = buf_block_alloc();
-
- ut_a(block);
-
- mutex_enter(&(buf_pool->mutex));
- mutex_enter(&block->mutex);
-
- if (fil_tablespace_deleted_or_being_deleted_in_mem(
- space, tablespace_version)) {
- *err = DB_TABLESPACE_DELETED;
- }
-
- if (*err == DB_TABLESPACE_DELETED
- || NULL != buf_page_hash_get(space, offset)) {
-
- /* The page belongs to a space which has been
- deleted or is being deleted, or the page is
- already in buf_pool, return */
-
- mutex_exit(&block->mutex);
- mutex_exit(&(buf_pool->mutex));
-
- buf_block_free(block);
-
- if (mode == BUF_READ_IBUF_PAGES_ONLY) {
-
- mtr_commit(&mtr);
- }
-
- return(NULL);
- }
-
- ut_ad(block);
-
- buf_page_init(space, offset, block);
-
- /* The block must be put to the LRU list, to the old blocks */
-
- buf_LRU_add_block(block, TRUE); /* TRUE == to old blocks */
-
- block->io_fix = BUF_IO_READ;
-
- buf_pool->n_pend_reads++;
-
- /* We set a pass-type x-lock on the frame because then the same
- thread which called for the read operation (and is running now at
- this point of code) can wait for the read to complete by waiting
- for the x-lock on the frame; if the x-lock were recursive, the
- same thread would illegally get the x-lock before the page read
- is completed. The x-lock is cleared by the io-handler thread. */
-
- rw_lock_x_lock_gen(&(block->lock), BUF_IO_READ);
-
- mutex_exit(&block->mutex);
- mutex_exit(&(buf_pool->mutex));
-
- if (mode == BUF_READ_IBUF_PAGES_ONLY) {
-
- mtr_commit(&mtr);
- }
-
- return(block);
-}
-
-/************************************************************************
-Initializes a page to the buffer buf_pool. The page is usually not read
-from a file even if it cannot be found in the buffer buf_pool. This is one
-of the functions which perform to a block a state transition NOT_USED =>
-FILE_PAGE (the other is buf_page_init_for_read above). */
-
-buf_frame_t*
-buf_page_create(
-/*============*/
- /* out: pointer to the frame, page bufferfixed */
- ulint space, /* in: space id */
- ulint offset, /* in: offset of the page within space in units of
- a page */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- buf_frame_t* frame;
- buf_block_t* block;
- buf_block_t* free_block = NULL;
-
- ut_ad(mtr);
-
- free_block = buf_LRU_get_free_block();
-
- mutex_enter(&(buf_pool->mutex));
-
- block = buf_page_hash_get(space, offset);
-
- if (block != NULL) {
-#ifdef UNIV_IBUF_DEBUG
- ut_a(ibuf_count_get(block->space, block->offset) == 0);
-#endif
- block->file_page_was_freed = FALSE;
-
- /* Page can be found in buf_pool */
- mutex_exit(&(buf_pool->mutex));
-
- buf_block_free(free_block);
-
- frame = buf_page_get_with_no_latch(space, offset, mtr);
-
- return(frame);
- }
-
- /* If we get here, the page was not in buf_pool: init it there */
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fprintf(stderr, "Creating space %lu page %lu to buffer\n",
- (ulong) space, (ulong) offset);
- }
-#endif /* UNIV_DEBUG */
-
- block = free_block;
-
- mutex_enter(&block->mutex);
-
- buf_page_init(space, offset, block);
-
- /* The block must be put to the LRU list */
- buf_LRU_add_block(block, FALSE);
-
-#ifdef UNIV_SYNC_DEBUG
- buf_block_buf_fix_inc_debug(block, __FILE__, __LINE__);
-#else
- buf_block_buf_fix_inc(block);
-#endif
- buf_pool->n_pages_created++;
-
- mutex_exit(&(buf_pool->mutex));
-
- mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
-
- block->accessed = TRUE;
-
- mutex_exit(&block->mutex);
-
- /* Delete possible entries for the page from the insert buffer:
- such can exist if the page belonged to an index which was dropped */
-
- ibuf_merge_or_delete_for_page(NULL, space, offset, TRUE);
-
- /* Flush pages from the end of the LRU list if necessary */
- buf_flush_free_margin();
-
- frame = block->frame;
-
- memset(frame + FIL_PAGE_PREV, 0xff, 4);
- memset(frame + FIL_PAGE_NEXT, 0xff, 4);
- mach_write_to_2(frame + FIL_PAGE_TYPE, FIL_PAGE_TYPE_ALLOCATED);
-
- /* Reset to zero the file flush lsn field in the page; if the first
- page of an ibdata file is 'created' in this function into the buffer
- pool then we lose the original contents of the file flush lsn stamp.
- Then InnoDB could in a crash recovery print a big, false, corruption
- warning if the stamp contains an lsn bigger than the ib_logfile lsn. */
-
- memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
-
-#ifdef UNIV_DEBUG
- buf_dbg_counter++;
-
- if (buf_dbg_counter % 357 == 0) {
- ut_ad(buf_validate());
- }
-#endif
-#ifdef UNIV_IBUF_DEBUG
- ut_a(ibuf_count_get(block->space, block->offset) == 0);
-#endif
- return(frame);
-}
-
-/************************************************************************
-Completes an asynchronous read or write request of a file page to or from
-the buffer pool. */
-
-void
-buf_page_io_complete(
-/*=================*/
- buf_block_t* block) /* in: pointer to the block in question */
-{
- ulint io_type;
-
- ut_ad(block);
-
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-
- /* We do not need protect block->io_fix here by block->mutex to read
- it because this is the only function where we can change the value
- from BUF_IO_READ or BUF_IO_WRITE to some other value, and our code
- ensures that this is the only thread that handles the i/o for this
- block. */
-
- io_type = block->io_fix;
-
- if (io_type == BUF_IO_READ) {
- /* If this page is not uninitialized and not in the
- doublewrite buffer, then the page number and space id
- should be the same as in block. */
- ulint read_page_no = mach_read_from_4(
- block->frame + FIL_PAGE_OFFSET);
- ulint read_space_id = mach_read_from_4(
- block->frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
-
- if (!block->space
- && trx_doublewrite_page_inside(block->offset)) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: reading page %lu\n"
- "InnoDB: which is in the"
- " doublewrite buffer!\n",
- (ulong) block->offset);
- } else if (!read_space_id && !read_page_no) {
- /* This is likely an uninitialized page. */
- } else if ((block->space && block->space != read_space_id)
- || block->offset != read_page_no) {
- /* We did not compare space_id to read_space_id
- if block->space == 0, because the field on the
- page may contain garbage in MySQL < 4.1.1,
- which only supported block->space == 0. */
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: space id and page n:o"
- " stored in the page\n"
- "InnoDB: read in are %lu:%lu,"
- " should be %lu:%lu!\n",
- (ulong) read_space_id, (ulong) read_page_no,
- (ulong) block->space, (ulong) block->offset);
- }
- /* From version 3.23.38 up we store the page checksum
- to the 4 first bytes of the page end lsn field */
-
- if (buf_page_is_corrupted(block->frame)) {
- fprintf(stderr,
- "InnoDB: Database page corruption on disk"
- " or a failed\n"
- "InnoDB: file read of page %lu.\n",
- (ulong) block->offset);
-
- fputs("InnoDB: You may have to recover"
- " from a backup.\n", stderr);
-
- buf_page_print(block->frame);
-
- fprintf(stderr,
- "InnoDB: Database page corruption on disk"
- " or a failed\n"
- "InnoDB: file read of page %lu.\n",
- (ulong) block->offset);
- fputs("InnoDB: You may have to recover"
- " from a backup.\n", stderr);
- fputs("InnoDB: It is also possible that"
- " your operating\n"
- "InnoDB: system has corrupted its"
- " own file cache\n"
- "InnoDB: and rebooting your computer"
- " removes the\n"
- "InnoDB: error.\n"
- "InnoDB: If the corrupt page is an index page\n"
- "InnoDB: you can also try to"
- " fix the corruption\n"
- "InnoDB: by dumping, dropping,"
- " and reimporting\n"
- "InnoDB: the corrupt table."
- " You can use CHECK\n"
- "InnoDB: TABLE to scan your"
- " table for corruption.\n"
- "InnoDB: See also"
- " http://dev.mysql.com/doc/refman/5.1/en/"
- "forcing-recovery.html\n"
- "InnoDB: about forcing recovery.\n", stderr);
-
- if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) {
- fputs("InnoDB: Ending processing because of"
- " a corrupt database page.\n",
- stderr);
- exit(1);
- }
- }
-
- if (recv_recovery_is_on()) {
- recv_recover_page(FALSE, TRUE, block->frame,
- block->space, block->offset);
- }
-
- if (!recv_no_ibuf_operations) {
- ibuf_merge_or_delete_for_page(
- block->frame, block->space, block->offset,
- TRUE);
- }
- }
-
- mutex_enter(&(buf_pool->mutex));
- mutex_enter(&block->mutex);
-
-#ifdef UNIV_IBUF_DEBUG
- ut_a(ibuf_count_get(block->space, block->offset) == 0);
-#endif
- /* Because this thread which does the unlocking is not the same that
- did the locking, we use a pass value != 0 in unlock, which simply
- removes the newest lock debug record, without checking the thread
- id. */
-
- block->io_fix = 0;
-
- if (io_type == BUF_IO_READ) {
- /* NOTE that the call to ibuf may have moved the ownership of
- the x-latch to this OS thread: do not let this confuse you in
- debugging! */
-
- ut_ad(buf_pool->n_pend_reads > 0);
- buf_pool->n_pend_reads--;
- buf_pool->n_pages_read++;
-
- rw_lock_x_unlock_gen(&(block->lock), BUF_IO_READ);
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fputs("Has read ", stderr);
- }
-#endif /* UNIV_DEBUG */
- } else {
- ut_ad(io_type == BUF_IO_WRITE);
-
- /* Write means a flush operation: call the completion
- routine in the flush system */
-
- buf_flush_write_complete(block);
-
- rw_lock_s_unlock_gen(&(block->lock), BUF_IO_WRITE);
-
- buf_pool->n_pages_written++;
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fputs("Has written ", stderr);
- }
-#endif /* UNIV_DEBUG */
- }
-
- mutex_exit(&block->mutex);
- mutex_exit(&(buf_pool->mutex));
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fprintf(stderr, "page space %lu page no %lu\n",
- (ulong) block->space, (ulong) block->offset);
- }
-#endif /* UNIV_DEBUG */
-}
-
-/*************************************************************************
-Invalidates the file pages in the buffer pool when an archive recovery is
-completed. All the file pages buffered must be in a replaceable state when
-this function is called: not latched and not modified. */
-
-void
-buf_pool_invalidate(void)
-/*=====================*/
-{
- ibool freed;
-
- ut_ad(buf_all_freed());
-
- freed = TRUE;
-
- while (freed) {
- freed = buf_LRU_search_and_free_block(100);
- }
-
- mutex_enter(&(buf_pool->mutex));
-
- ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
-
- mutex_exit(&(buf_pool->mutex));
-}
-
-#ifdef UNIV_DEBUG
-/*************************************************************************
-Validates the buffer buf_pool data structure. */
-
-ibool
-buf_validate(void)
-/*==============*/
-{
- buf_block_t* block;
- ulint i;
- ulint n_single_flush = 0;
- ulint n_lru_flush = 0;
- ulint n_list_flush = 0;
- ulint n_lru = 0;
- ulint n_flush = 0;
- ulint n_free = 0;
- ulint n_page = 0;
-
- ut_ad(buf_pool);
-
- mutex_enter(&(buf_pool->mutex));
-
- for (i = 0; i < buf_pool->curr_size; i++) {
-
- block = buf_pool_get_nth_block(buf_pool, i);
-
- mutex_enter(&block->mutex);
-
- if (block->state == BUF_BLOCK_FILE_PAGE) {
-
- ut_a(buf_page_hash_get(block->space,
- block->offset) == block);
- n_page++;
-
-#ifdef UNIV_IBUF_DEBUG
- ut_a((block->io_fix == BUF_IO_READ)
- || ibuf_count_get(block->space, block->offset)
- == 0);
-#endif
- if (block->io_fix == BUF_IO_WRITE) {
-
- if (block->flush_type == BUF_FLUSH_LRU) {
- n_lru_flush++;
- ut_a(rw_lock_is_locked(
- &block->lock,
- RW_LOCK_SHARED));
- } else if (block->flush_type
- == BUF_FLUSH_LIST) {
- n_list_flush++;
- } else if (block->flush_type
- == BUF_FLUSH_SINGLE_PAGE) {
- n_single_flush++;
- } else {
- ut_error;
- }
-
- } else if (block->io_fix == BUF_IO_READ) {
-
- ut_a(rw_lock_is_locked(&(block->lock),
- RW_LOCK_EX));
- }
-
- n_lru++;
-
- if (ut_dulint_cmp(block->oldest_modification,
- ut_dulint_zero) > 0) {
- n_flush++;
- }
-
- } else if (block->state == BUF_BLOCK_NOT_USED) {
- n_free++;
- }
-
- mutex_exit(&block->mutex);
- }
-
- if (n_lru + n_free > buf_pool->curr_size) {
- fprintf(stderr, "n LRU %lu, n free %lu\n",
- (ulong) n_lru, (ulong) n_free);
- ut_error;
- }
-
- ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
- if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
- fprintf(stderr, "Free list len %lu, free blocks %lu\n",
- (ulong) UT_LIST_GET_LEN(buf_pool->free),
- (ulong) n_free);
- ut_error;
- }
- ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
-
- ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
- ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
- ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
-
- mutex_exit(&(buf_pool->mutex));
-
- ut_a(buf_LRU_validate());
- ut_a(buf_flush_validate());
-
- return(TRUE);
-}
-
-/*************************************************************************
-Prints info of the buffer buf_pool data structure. */
-
-void
-buf_print(void)
-/*===========*/
-{
- dulint* index_ids;
- ulint* counts;
- ulint size;
- ulint i;
- ulint j;
- dulint id;
- ulint n_found;
- buf_frame_t* frame;
- dict_index_t* index;
-
- ut_ad(buf_pool);
-
- size = buf_pool->curr_size;
-
- index_ids = mem_alloc(sizeof(dulint) * size);
- counts = mem_alloc(sizeof(ulint) * size);
-
- mutex_enter(&(buf_pool->mutex));
-
- fprintf(stderr,
- "buf_pool size %lu\n"
- "database pages %lu\n"
- "free pages %lu\n"
- "modified database pages %lu\n"
- "n pending reads %lu\n"
- "n pending flush LRU %lu list %lu single page %lu\n"
- "pages read %lu, created %lu, written %lu\n",
- (ulong) size,
- (ulong) UT_LIST_GET_LEN(buf_pool->LRU),
- (ulong) UT_LIST_GET_LEN(buf_pool->free),
- (ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
- (ulong) buf_pool->n_pend_reads,
- (ulong) buf_pool->n_flush[BUF_FLUSH_LRU],
- (ulong) buf_pool->n_flush[BUF_FLUSH_LIST],
- (ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE],
- (ulong) buf_pool->n_pages_read, buf_pool->n_pages_created,
- (ulong) buf_pool->n_pages_written);
-
- /* Count the number of blocks belonging to each index in the buffer */
-
- n_found = 0;
-
- for (i = 0; i < size; i++) {
- frame = buf_pool_get_nth_block(buf_pool, i)->frame;
-
- if (fil_page_get_type(frame) == FIL_PAGE_INDEX) {
-
- id = btr_page_get_index_id(frame);
-
- /* Look for the id in the index_ids array */
- j = 0;
-
- while (j < n_found) {
-
- if (ut_dulint_cmp(index_ids[j], id) == 0) {
- (counts[j])++;
-
- break;
- }
- j++;
- }
-
- if (j == n_found) {
- n_found++;
- index_ids[j] = id;
- counts[j] = 1;
- }
- }
- }
-
- mutex_exit(&(buf_pool->mutex));
-
- for (i = 0; i < n_found; i++) {
- index = dict_index_get_if_in_cache(index_ids[i]);
-
- fprintf(stderr,
- "Block count for index %lu in buffer is about %lu",
- (ulong) ut_dulint_get_low(index_ids[i]),
- (ulong) counts[i]);
-
- if (index) {
- putc(' ', stderr);
- dict_index_name_print(stderr, NULL, index);
- }
-
- putc('\n', stderr);
- }
-
- mem_free(index_ids);
- mem_free(counts);
-
- ut_a(buf_validate());
-}
-
-/*************************************************************************
-Returns the number of latched pages in the buffer pool. */
-
-ulint
-buf_get_latched_pages_number(void)
-{
- buf_block_t* block;
- ulint i;
- ulint fixed_pages_number = 0;
-
- mutex_enter(&(buf_pool->mutex));
-
- for (i = 0; i < buf_pool->curr_size; i++) {
-
- block = buf_pool_get_nth_block(buf_pool, i);
-
- if (block->magic_n == BUF_BLOCK_MAGIC_N) {
- mutex_enter(&block->mutex);
-
- if (block->buf_fix_count != 0 || block->io_fix != 0) {
- fixed_pages_number++;
- }
-
- mutex_exit(&block->mutex);
- }
- }
-
- mutex_exit(&(buf_pool->mutex));
-
- return(fixed_pages_number);
-}
-#endif /* UNIV_DEBUG */
-
-/*************************************************************************
-Returns the number of pending buf pool ios. */
-
-ulint
-buf_get_n_pending_ios(void)
-/*=======================*/
-{
- return(buf_pool->n_pend_reads
- + buf_pool->n_flush[BUF_FLUSH_LRU]
- + buf_pool->n_flush[BUF_FLUSH_LIST]
- + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
-}
-
-/*************************************************************************
-Returns the ratio in percents of modified pages in the buffer pool /
-database pages in the buffer pool. */
-
-ulint
-buf_get_modified_ratio_pct(void)
-/*============================*/
-{
- ulint ratio;
-
- mutex_enter(&(buf_pool->mutex));
-
- ratio = (100 * UT_LIST_GET_LEN(buf_pool->flush_list))
- / (1 + UT_LIST_GET_LEN(buf_pool->LRU)
- + UT_LIST_GET_LEN(buf_pool->free));
-
- /* 1 + is there to avoid division by zero */
-
- mutex_exit(&(buf_pool->mutex));
-
- return(ratio);
-}
-
-/*************************************************************************
-Prints info of the buffer i/o. */
-
-void
-buf_print_io(
-/*=========*/
- FILE* file) /* in/out: buffer where to print */
-{
- time_t current_time;
- double time_elapsed;
- ulint size;
-
- ut_ad(buf_pool);
- size = buf_pool->curr_size;
-
- mutex_enter(&(buf_pool->mutex));
-
- if (srv_use_awe) {
- fprintf(stderr,
- "AWE: Buffer pool memory frames %lu\n",
- (ulong) buf_pool->n_frames);
-
- fprintf(stderr,
- "AWE: Database pages and free buffers"
- " mapped in frames %lu\n",
- (ulong)
- UT_LIST_GET_LEN(buf_pool->awe_LRU_free_mapped));
- }
- fprintf(file,
- "Buffer pool size %lu\n"
- "Free buffers %lu\n"
- "Database pages %lu\n"
- "Modified db pages %lu\n"
- "Pending reads %lu\n"
- "Pending writes: LRU %lu, flush list %lu, single page %lu\n",
- (ulong) size,
- (ulong) UT_LIST_GET_LEN(buf_pool->free),
- (ulong) UT_LIST_GET_LEN(buf_pool->LRU),
- (ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
- (ulong) buf_pool->n_pend_reads,
- (ulong) buf_pool->n_flush[BUF_FLUSH_LRU]
- + buf_pool->init_flush[BUF_FLUSH_LRU],
- (ulong) buf_pool->n_flush[BUF_FLUSH_LIST]
- + buf_pool->init_flush[BUF_FLUSH_LIST],
- (ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
-
- current_time = time(NULL);
- time_elapsed = 0.001 + difftime(current_time,
- buf_pool->last_printout_time);
- buf_pool->last_printout_time = current_time;
-
- fprintf(file,
- "Pages read %lu, created %lu, written %lu\n"
- "%.2f reads/s, %.2f creates/s, %.2f writes/s\n",
- (ulong) buf_pool->n_pages_read,
- (ulong) buf_pool->n_pages_created,
- (ulong) buf_pool->n_pages_written,
- (buf_pool->n_pages_read - buf_pool->n_pages_read_old)
- / time_elapsed,
- (buf_pool->n_pages_created - buf_pool->n_pages_created_old)
- / time_elapsed,
- (buf_pool->n_pages_written - buf_pool->n_pages_written_old)
- / time_elapsed);
-
- if (srv_use_awe) {
- fprintf(file, "AWE: %.2f page remaps/s\n",
- (buf_pool->n_pages_awe_remapped
- - buf_pool->n_pages_awe_remapped_old)
- / time_elapsed);
- }
-
- if (buf_pool->n_page_gets > buf_pool->n_page_gets_old) {
- fprintf(file, "Buffer pool hit rate %lu / 1000\n",
- (ulong)
- (1000 - ((1000 * (buf_pool->n_pages_read
- - buf_pool->n_pages_read_old))
- / (buf_pool->n_page_gets
- - buf_pool->n_page_gets_old))));
- } else {
- fputs("No buffer pool page gets since the last printout\n",
- file);
- }
-
- buf_pool->n_page_gets_old = buf_pool->n_page_gets;
- buf_pool->n_pages_read_old = buf_pool->n_pages_read;
- buf_pool->n_pages_created_old = buf_pool->n_pages_created;
- buf_pool->n_pages_written_old = buf_pool->n_pages_written;
- buf_pool->n_pages_awe_remapped_old = buf_pool->n_pages_awe_remapped;
-
- mutex_exit(&(buf_pool->mutex));
-}
-
-/**************************************************************************
-Refreshes the statistics used to print per-second averages. */
-
-void
-buf_refresh_io_stats(void)
-/*======================*/
-{
- buf_pool->last_printout_time = time(NULL);
- buf_pool->n_page_gets_old = buf_pool->n_page_gets;
- buf_pool->n_pages_read_old = buf_pool->n_pages_read;
- buf_pool->n_pages_created_old = buf_pool->n_pages_created;
- buf_pool->n_pages_written_old = buf_pool->n_pages_written;
- buf_pool->n_pages_awe_remapped_old = buf_pool->n_pages_awe_remapped;
-}
-
-/*************************************************************************
-Checks that all file pages in the buffer are in a replaceable state. */
-
-ibool
-buf_all_freed(void)
-/*===============*/
-{
- buf_block_t* block;
- ulint i;
-
- ut_ad(buf_pool);
-
- mutex_enter(&(buf_pool->mutex));
-
- for (i = 0; i < buf_pool->curr_size; i++) {
-
- block = buf_pool_get_nth_block(buf_pool, i);
-
- mutex_enter(&block->mutex);
-
- if (block->state == BUF_BLOCK_FILE_PAGE) {
-
- if (!buf_flush_ready_for_replace(block)) {
-
- fprintf(stderr,
- "Page %lu %lu still fixed or dirty\n",
- (ulong) block->space,
- (ulong) block->offset);
- ut_error;
- }
- }
-
- mutex_exit(&block->mutex);
- }
-
- mutex_exit(&(buf_pool->mutex));
-
- return(TRUE);
-}
-
-/*************************************************************************
-Checks that there currently are no pending i/o-operations for the buffer
-pool. */
-
-ibool
-buf_pool_check_no_pending_io(void)
-/*==============================*/
- /* out: TRUE if there is no pending i/o */
-{
- ibool ret;
-
- mutex_enter(&(buf_pool->mutex));
-
- if (buf_pool->n_pend_reads + buf_pool->n_flush[BUF_FLUSH_LRU]
- + buf_pool->n_flush[BUF_FLUSH_LIST]
- + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]) {
- ret = FALSE;
- } else {
- ret = TRUE;
- }
-
- mutex_exit(&(buf_pool->mutex));
-
- return(ret);
-}
-
-/*************************************************************************
-Gets the current length of the free list of buffer blocks. */
-
-ulint
-buf_get_free_list_len(void)
-/*=======================*/
-{
- ulint len;
-
- mutex_enter(&(buf_pool->mutex));
-
- len = UT_LIST_GET_LEN(buf_pool->free);
-
- mutex_exit(&(buf_pool->mutex));
-
- return(len);
-}
diff --git a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c
deleted file mode 100644
index 423c08c0569..00000000000
--- a/storage/innobase/buf/buf0flu.c
+++ /dev/null
@@ -1,1115 +0,0 @@
-/******************************************************
-The database buffer buf_pool flush algorithm
-
-(c) 1995-2001 Innobase Oy
-
-Created 11/11/1995 Heikki Tuuri
-*******************************************************/
-
-#include "buf0flu.h"
-
-#ifdef UNIV_NONINL
-#include "buf0flu.ic"
-#include "trx0sys.h"
-#endif
-
-#include "ut0byte.h"
-#include "ut0lst.h"
-#include "page0page.h"
-#include "fil0fil.h"
-#include "buf0buf.h"
-#include "buf0lru.h"
-#include "buf0rea.h"
-#include "ibuf0ibuf.h"
-#include "log0log.h"
-#include "os0file.h"
-#include "trx0sys.h"
-#include "srv0srv.h"
-
-/* When flushed, dirty blocks are searched in neighborhoods of this size, and
-flushed along with the original page. */
-
-#define BUF_FLUSH_AREA ut_min(BUF_READ_AHEAD_AREA,\
- buf_pool->curr_size / 16)
-
-/**********************************************************************
-Validates the flush list. */
-static
-ibool
-buf_flush_validate_low(void);
-/*========================*/
- /* out: TRUE if ok */
-
-/************************************************************************
-Inserts a modified block into the flush list. */
-
-void
-buf_flush_insert_into_flush_list(
-/*=============================*/
- buf_block_t* block) /* in: block which is modified */
-{
- ut_ad(mutex_own(&(buf_pool->mutex)));
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-
- ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
- || (ut_dulint_cmp((UT_LIST_GET_FIRST(buf_pool->flush_list))
- ->oldest_modification,
- block->oldest_modification) <= 0));
-
- UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, block);
-
- ut_ad(buf_flush_validate_low());
-}
-
-/************************************************************************
-Inserts a modified block into the flush list in the right sorted position.
-This function is used by recovery, because there the modifications do not
-necessarily come in the order of lsn's. */
-
-void
-buf_flush_insert_sorted_into_flush_list(
-/*====================================*/
- buf_block_t* block) /* in: block which is modified */
-{
- buf_block_t* prev_b;
- buf_block_t* b;
-
- ut_ad(mutex_own(&(buf_pool->mutex)));
-
- prev_b = NULL;
- b = UT_LIST_GET_FIRST(buf_pool->flush_list);
-
- while (b && (ut_dulint_cmp(b->oldest_modification,
- block->oldest_modification) > 0)) {
- prev_b = b;
- b = UT_LIST_GET_NEXT(flush_list, b);
- }
-
- if (prev_b == NULL) {
- UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, block);
- } else {
- UT_LIST_INSERT_AFTER(flush_list, buf_pool->flush_list, prev_b,
- block);
- }
-
- ut_ad(buf_flush_validate_low());
-}
-
-/************************************************************************
-Returns TRUE if the file page block is immediately suitable for replacement,
-i.e., the transition FILE_PAGE => NOT_USED allowed. */
-
-ibool
-buf_flush_ready_for_replace(
-/*========================*/
- /* out: TRUE if can replace immediately */
- buf_block_t* block) /* in: buffer control block, must be in state
- BUF_BLOCK_FILE_PAGE and in the LRU list */
-{
- ut_ad(mutex_own(&(buf_pool->mutex)));
- ut_ad(mutex_own(&block->mutex));
- if (block->state != BUF_BLOCK_FILE_PAGE) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: buffer block state %lu"
- " in the LRU list!\n",
- (ulong)block->state);
- ut_print_buf(stderr, block, sizeof(buf_block_t));
-
- return(FALSE);
- }
-
- if ((ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) > 0)
- || (block->buf_fix_count != 0)
- || (block->io_fix != 0)) {
-
- return(FALSE);
- }
-
- return(TRUE);
-}
-
-/************************************************************************
-Returns TRUE if the block is modified and ready for flushing. */
-UNIV_INLINE
-ibool
-buf_flush_ready_for_flush(
-/*======================*/
- /* out: TRUE if can flush immediately */
- buf_block_t* block, /* in: buffer control block, must be in state
- BUF_BLOCK_FILE_PAGE */
- ulint flush_type)/* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
-{
- ut_ad(mutex_own(&(buf_pool->mutex)));
- ut_ad(mutex_own(&(block->mutex)));
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-
- if ((ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) > 0)
- && (block->io_fix == 0)) {
- if (flush_type != BUF_FLUSH_LRU) {
-
- return(TRUE);
-
- } else if (block->buf_fix_count == 0) {
-
- /* If we are flushing the LRU list, to avoid deadlocks
- we require the block not to be bufferfixed, and hence
- not latched. */
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/************************************************************************
-Updates the flush system data structures when a write is completed. */
-
-void
-buf_flush_write_complete(
-/*=====================*/
- buf_block_t* block) /* in: pointer to the block in question */
-{
- ut_ad(block);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(mutex_own(&(buf_pool->mutex)));
-#endif /* UNIV_SYNC_DEBUG */
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-
- block->oldest_modification = ut_dulint_zero;
-
- UT_LIST_REMOVE(flush_list, buf_pool->flush_list, block);
-
- ut_d(UT_LIST_VALIDATE(flush_list, buf_block_t, buf_pool->flush_list));
-
- (buf_pool->n_flush[block->flush_type])--;
-
- if (block->flush_type == BUF_FLUSH_LRU) {
- /* Put the block to the end of the LRU list to wait to be
- moved to the free list */
-
- buf_LRU_make_block_old(block);
-
- buf_pool->LRU_flush_ended++;
- }
-
- /* fprintf(stderr, "n pending flush %lu\n",
- buf_pool->n_flush[block->flush_type]); */
-
- if ((buf_pool->n_flush[block->flush_type] == 0)
- && (buf_pool->init_flush[block->flush_type] == FALSE)) {
-
- /* The running flush batch has ended */
-
- os_event_set(buf_pool->no_flush[block->flush_type]);
- }
-}
-
-/************************************************************************
-Flushes possible buffered writes from the doublewrite memory buffer to disk,
-and also wakes up the aio thread if simulated aio is used. It is very
-important to call this function after a batch of writes has been posted,
-and also when we may have to wait for a page latch! Otherwise a deadlock
-of threads can occur. */
-static
-void
-buf_flush_buffered_writes(void)
-/*===========================*/
-{
- buf_block_t* block;
- byte* write_buf;
- ulint len;
- ulint len2;
- ulint i;
-
- if (!srv_use_doublewrite_buf || trx_doublewrite == NULL) {
- os_aio_simulated_wake_handler_threads();
-
- return;
- }
-
- mutex_enter(&(trx_doublewrite->mutex));
-
- /* Write first to doublewrite buffer blocks. We use synchronous
- aio and thus know that file write has been completed when the
- control returns. */
-
- if (trx_doublewrite->first_free == 0) {
-
- mutex_exit(&(trx_doublewrite->mutex));
-
- return;
- }
-
- for (i = 0; i < trx_doublewrite->first_free; i++) {
-
- block = trx_doublewrite->buf_block_arr[i];
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-
- if (mach_read_from_4(block->frame + FIL_PAGE_LSN + 4)
- != mach_read_from_4(block->frame + UNIV_PAGE_SIZE
- - FIL_PAGE_END_LSN_OLD_CHKSUM + 4)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: ERROR: The page to be written"
- " seems corrupt!\n"
- "InnoDB: The lsn fields do not match!"
- " Noticed in the buffer pool\n"
- "InnoDB: before posting to the"
- " doublewrite buffer.\n");
- }
-
- if (block->check_index_page_at_flush
- && !page_simple_validate(block->frame)) {
-
- buf_page_print(block->frame);
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Apparent corruption of an"
- " index page n:o %lu in space %lu\n"
- "InnoDB: to be written to data file."
- " We intentionally crash server\n"
- "InnoDB: to prevent corrupt data"
- " from ending up in data\n"
- "InnoDB: files.\n",
- (ulong) block->offset, (ulong) block->space);
-
- ut_error;
- }
- }
-
- /* increment the doublewrite flushed pages counter */
- srv_dblwr_pages_written+= trx_doublewrite->first_free;
- srv_dblwr_writes++;
-
- if (trx_doublewrite->first_free > TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
- len = TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
- } else {
- len = trx_doublewrite->first_free * UNIV_PAGE_SIZE;
- }
-
- fil_io(OS_FILE_WRITE,
- TRUE, TRX_SYS_SPACE,
- trx_doublewrite->block1, 0, len,
- (void*)trx_doublewrite->write_buf, NULL);
-
- write_buf = trx_doublewrite->write_buf;
-
- for (len2 = 0; len2 + UNIV_PAGE_SIZE <= len; len2 += UNIV_PAGE_SIZE) {
- if (mach_read_from_4(write_buf + len2 + FIL_PAGE_LSN + 4)
- != mach_read_from_4(write_buf + len2 + UNIV_PAGE_SIZE
- - FIL_PAGE_END_LSN_OLD_CHKSUM + 4)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: ERROR: The page to be written"
- " seems corrupt!\n"
- "InnoDB: The lsn fields do not match!"
- " Noticed in the doublewrite block1.\n");
- }
- }
-
- if (trx_doublewrite->first_free > TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
- len = (trx_doublewrite->first_free
- - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) * UNIV_PAGE_SIZE;
-
- fil_io(OS_FILE_WRITE,
- TRUE, TRX_SYS_SPACE,
- trx_doublewrite->block2, 0, len,
- (void*)(trx_doublewrite->write_buf
- + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
- * UNIV_PAGE_SIZE),
- NULL);
-
- write_buf = trx_doublewrite->write_buf
- + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
- for (len2 = 0; len2 + UNIV_PAGE_SIZE <= len;
- len2 += UNIV_PAGE_SIZE) {
- if (mach_read_from_4(write_buf + len2
- + FIL_PAGE_LSN + 4)
- != mach_read_from_4(write_buf + len2
- + UNIV_PAGE_SIZE
- - FIL_PAGE_END_LSN_OLD_CHKSUM
- + 4)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: ERROR: The page to be"
- " written seems corrupt!\n"
- "InnoDB: The lsn fields do not match!"
- " Noticed in"
- " the doublewrite block2.\n");
- }
- }
- }
-
- /* Now flush the doublewrite buffer data to disk */
-
- fil_flush(TRX_SYS_SPACE);
-
- /* We know that the writes have been flushed to disk now
- and in recovery we will find them in the doublewrite buffer
- blocks. Next do the writes to the intended positions. */
-
- for (i = 0; i < trx_doublewrite->first_free; i++) {
- block = trx_doublewrite->buf_block_arr[i];
-
- if (mach_read_from_4(block->frame + FIL_PAGE_LSN + 4)
- != mach_read_from_4(block->frame + UNIV_PAGE_SIZE
- - FIL_PAGE_END_LSN_OLD_CHKSUM + 4)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: ERROR: The page to be written"
- " seems corrupt!\n"
- "InnoDB: The lsn fields do not match!"
- " Noticed in the buffer pool\n"
- "InnoDB: after posting and flushing"
- " the doublewrite buffer.\n"
- "InnoDB: Page buf fix count %lu,"
- " io fix %lu, state %lu\n",
- (ulong)block->buf_fix_count,
- (ulong)block->io_fix,
- (ulong)block->state);
- }
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-
- fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
- FALSE, block->space, block->offset, 0, UNIV_PAGE_SIZE,
- (void*)block->frame, (void*)block);
- }
-
- /* Wake possible simulated aio thread to actually post the
- writes to the operating system */
-
- os_aio_simulated_wake_handler_threads();
-
- /* Wait that all async writes to tablespaces have been posted to
- the OS */
-
- os_aio_wait_until_no_pending_writes();
-
- /* Now we flush the data to disk (for example, with fsync) */
-
- fil_flush_file_spaces(FIL_TABLESPACE);
-
- /* We can now reuse the doublewrite memory buffer: */
-
- trx_doublewrite->first_free = 0;
-
- mutex_exit(&(trx_doublewrite->mutex));
-}
-
-/************************************************************************
-Posts a buffer page for writing. If the doublewrite memory buffer is
-full, calls buf_flush_buffered_writes and waits for for free space to
-appear. */
-static
-void
-buf_flush_post_to_doublewrite_buf(
-/*==============================*/
- buf_block_t* block) /* in: buffer block to write */
-{
-try_again:
- mutex_enter(&(trx_doublewrite->mutex));
-
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-
- if (trx_doublewrite->first_free
- >= 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
- mutex_exit(&(trx_doublewrite->mutex));
-
- buf_flush_buffered_writes();
-
- goto try_again;
- }
-
- ut_memcpy(trx_doublewrite->write_buf
- + UNIV_PAGE_SIZE * trx_doublewrite->first_free,
- block->frame, UNIV_PAGE_SIZE);
-
- trx_doublewrite->buf_block_arr[trx_doublewrite->first_free] = block;
-
- trx_doublewrite->first_free++;
-
- if (trx_doublewrite->first_free
- >= 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
- mutex_exit(&(trx_doublewrite->mutex));
-
- buf_flush_buffered_writes();
-
- return;
- }
-
- mutex_exit(&(trx_doublewrite->mutex));
-}
-
-/************************************************************************
-Initializes a page for writing to the tablespace. */
-
-void
-buf_flush_init_for_writing(
-/*=======================*/
- byte* page, /* in: page */
- dulint newest_lsn, /* in: newest modification lsn to the page */
- ulint space, /* in: space id */
- ulint page_no) /* in: page number */
-{
- /* Write the newest modification lsn to the page header and trailer */
- mach_write_to_8(page + FIL_PAGE_LSN, newest_lsn);
-
- mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
- newest_lsn);
- /* Write the page number and the space id */
-
- mach_write_to_4(page + FIL_PAGE_OFFSET, page_no);
- mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space);
-
- /* Store the new formula checksum */
-
- mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
- srv_use_checksums
- ? buf_calc_page_new_checksum(page)
- : BUF_NO_CHECKSUM_MAGIC);
-
- /* We overwrite the first 4 bytes of the end lsn field to store
- the old formula checksum. Since it depends also on the field
- FIL_PAGE_SPACE_OR_CHKSUM, it has to be calculated after storing the
- new formula checksum. */
-
- mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
- srv_use_checksums
- ? buf_calc_page_old_checksum(page)
- : BUF_NO_CHECKSUM_MAGIC);
-}
-
-/************************************************************************
-Does an asynchronous write of a buffer page. NOTE: in simulated aio and
-also when the doublewrite buffer is used, we must call
-buf_flush_buffered_writes after we have posted a batch of writes! */
-static
-void
-buf_flush_write_block_low(
-/*======================*/
- buf_block_t* block) /* in: buffer block to write */
-{
-#ifdef UNIV_LOG_DEBUG
- static ibool univ_log_debug_warned;
-#endif /* UNIV_LOG_DEBUG */
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-
-#ifdef UNIV_IBUF_DEBUG
- ut_a(ibuf_count_get(block->space, block->offset) == 0);
-#endif
- ut_ad(!ut_dulint_is_zero(block->newest_modification));
-
-#ifdef UNIV_LOG_DEBUG
- if (!univ_log_debug_warned) {
- univ_log_debug_warned = TRUE;
- fputs("Warning: cannot force log to disk if"
- " UNIV_LOG_DEBUG is defined!\n"
- "Crash recovery will not work!\n",
- stderr);
- }
-#else
- /* Force the log to the disk before writing the modified block */
- log_write_up_to(block->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE);
-#endif
- buf_flush_init_for_writing(block->frame, block->newest_modification,
- block->space, block->offset);
- if (!srv_use_doublewrite_buf || !trx_doublewrite) {
- fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
- FALSE, block->space, block->offset, 0, UNIV_PAGE_SIZE,
- (void*)block->frame, (void*)block);
- } else {
- buf_flush_post_to_doublewrite_buf(block);
- }
-}
-
-/************************************************************************
-Writes a page asynchronously from the buffer buf_pool to a file, if it can be
-found in the buf_pool and it is in a flushable state. NOTE: in simulated aio
-we must call os_aio_simulated_wake_handler_threads after we have posted a batch
-of writes! */
-static
-ulint
-buf_flush_try_page(
-/*===============*/
- /* out: 1 if a page was flushed, 0 otherwise */
- ulint space, /* in: space id */
- ulint offset, /* in: page offset */
- ulint flush_type) /* in: BUF_FLUSH_LRU, BUF_FLUSH_LIST, or
- BUF_FLUSH_SINGLE_PAGE */
-{
- buf_block_t* block;
- ibool locked;
-
- ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST
- || flush_type == BUF_FLUSH_SINGLE_PAGE);
-
- mutex_enter(&(buf_pool->mutex));
-
- block = buf_page_hash_get(space, offset);
-
- ut_a(!block || block->state == BUF_BLOCK_FILE_PAGE);
-
- if (!block) {
- mutex_exit(&(buf_pool->mutex));
- return(0);
- }
-
- mutex_enter(&block->mutex);
-
- if (flush_type == BUF_FLUSH_LIST
- && buf_flush_ready_for_flush(block, flush_type)) {
-
- block->io_fix = BUF_IO_WRITE;
-
- /* If AWE is enabled and the page is not mapped to a frame,
- then map it */
-
- if (block->frame == NULL) {
- ut_a(srv_use_awe);
-
- /* We set second parameter TRUE because the block is
- in the LRU list and we must put it to
- awe_LRU_free_mapped list once mapped to a frame */
-
- buf_awe_map_page_to_frame(block, TRUE);
- }
-
- block->flush_type = flush_type;
-
- if (buf_pool->n_flush[flush_type] == 0) {
-
- os_event_reset(buf_pool->no_flush[flush_type]);
- }
-
- (buf_pool->n_flush[flush_type])++;
-
- locked = FALSE;
-
- /* If the simulated aio thread is not running, we must
- not wait for any latch, as we may end up in a deadlock:
- if buf_fix_count == 0, then we know we need not wait */
-
- if (block->buf_fix_count == 0) {
- rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE);
-
- locked = TRUE;
- }
-
- mutex_exit(&block->mutex);
- mutex_exit(&(buf_pool->mutex));
-
- if (!locked) {
- buf_flush_buffered_writes();
-
- rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE);
- }
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fprintf(stderr,
- "Flushing page space %lu, page no %lu \n",
- (ulong) block->space, (ulong) block->offset);
- }
-#endif /* UNIV_DEBUG */
-
- buf_flush_write_block_low(block);
-
- return(1);
-
- } else if (flush_type == BUF_FLUSH_LRU
- && buf_flush_ready_for_flush(block, flush_type)) {
-
- /* VERY IMPORTANT:
- Because any thread may call the LRU flush, even when owning
- locks on pages, to avoid deadlocks, we must make sure that the
- s-lock is acquired on the page without waiting: this is
- accomplished because in the if-condition above we require
- the page not to be bufferfixed (in function
- ..._ready_for_flush). */
-
- block->io_fix = BUF_IO_WRITE;
-
- /* If AWE is enabled and the page is not mapped to a frame,
- then map it */
-
- if (block->frame == NULL) {
- ut_a(srv_use_awe);
-
- /* We set second parameter TRUE because the block is
- in the LRU list and we must put it to
- awe_LRU_free_mapped list once mapped to a frame */
-
- buf_awe_map_page_to_frame(block, TRUE);
- }
-
- block->flush_type = flush_type;
-
- if (buf_pool->n_flush[flush_type] == 0) {
-
- os_event_reset(buf_pool->no_flush[flush_type]);
- }
-
- (buf_pool->n_flush[flush_type])++;
-
- rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE);
-
- /* Note that the s-latch is acquired before releasing the
- buf_pool mutex: this ensures that the latch is acquired
- immediately. */
-
- mutex_exit(&block->mutex);
- mutex_exit(&(buf_pool->mutex));
-
- buf_flush_write_block_low(block);
-
- return(1);
-
- } else if (flush_type == BUF_FLUSH_SINGLE_PAGE
- && buf_flush_ready_for_flush(block, flush_type)) {
-
- block->io_fix = BUF_IO_WRITE;
-
- /* If AWE is enabled and the page is not mapped to a frame,
- then map it */
-
- if (block->frame == NULL) {
- ut_a(srv_use_awe);
-
- /* We set second parameter TRUE because the block is
- in the LRU list and we must put it to
- awe_LRU_free_mapped list once mapped to a frame */
-
- buf_awe_map_page_to_frame(block, TRUE);
- }
-
- block->flush_type = flush_type;
-
- if (buf_pool->n_flush[block->flush_type] == 0) {
-
- os_event_reset(buf_pool->no_flush[block->flush_type]);
- }
-
- (buf_pool->n_flush[flush_type])++;
-
- mutex_exit(&block->mutex);
- mutex_exit(&(buf_pool->mutex));
-
- rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE);
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fprintf(stderr,
- "Flushing single page space %lu,"
- " page no %lu \n",
- (ulong) block->space,
- (ulong) block->offset);
- }
-#endif /* UNIV_DEBUG */
-
- buf_flush_write_block_low(block);
-
- return(1);
- }
-
- mutex_exit(&block->mutex);
- mutex_exit(&(buf_pool->mutex));
-
- return(0);
-}
-
-/***************************************************************
-Flushes to disk all flushable pages within the flush area. */
-static
-ulint
-buf_flush_try_neighbors(
-/*====================*/
- /* out: number of pages flushed */
- ulint space, /* in: space id */
- ulint offset, /* in: page offset */
- ulint flush_type) /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
-{
- buf_block_t* block;
- ulint low, high;
- ulint count = 0;
- ulint i;
-
- ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
-
- low = (offset / BUF_FLUSH_AREA) * BUF_FLUSH_AREA;
- high = (offset / BUF_FLUSH_AREA + 1) * BUF_FLUSH_AREA;
-
- if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
- /* If there is little space, it is better not to flush any
- block except from the end of the LRU list */
-
- low = offset;
- high = offset + 1;
- }
-
- /* fprintf(stderr, "Flush area: low %lu high %lu\n", low, high); */
-
- if (high > fil_space_get_size(space)) {
- high = fil_space_get_size(space);
- }
-
- mutex_enter(&(buf_pool->mutex));
-
- for (i = low; i < high; i++) {
-
- block = buf_page_hash_get(space, i);
- ut_a(!block || block->state == BUF_BLOCK_FILE_PAGE);
-
- if (!block) {
-
- continue;
-
- } else if (flush_type == BUF_FLUSH_LRU && i != offset
- && !block->old) {
-
- /* We avoid flushing 'non-old' blocks in an LRU flush,
- because the flushed blocks are soon freed */
-
- continue;
- } else {
-
- mutex_enter(&block->mutex);
-
- if (buf_flush_ready_for_flush(block, flush_type)
- && (i == offset || block->buf_fix_count == 0)) {
- /* We only try to flush those
- neighbors != offset where the buf fix count is
- zero, as we then know that we probably can
- latch the page without a semaphore wait.
- Semaphore waits are expensive because we must
- flush the doublewrite buffer before we start
- waiting. */
-
- mutex_exit(&block->mutex);
-
- mutex_exit(&(buf_pool->mutex));
-
- /* Note: as we release the buf_pool mutex
- above, in buf_flush_try_page we cannot be sure
- the page is still in a flushable state:
- therefore we check it again inside that
- function. */
-
- count += buf_flush_try_page(space, i,
- flush_type);
-
- mutex_enter(&(buf_pool->mutex));
- } else {
- mutex_exit(&block->mutex);
- }
- }
- }
-
- mutex_exit(&(buf_pool->mutex));
-
- return(count);
-}
-
-/***********************************************************************
-This utility flushes dirty blocks from the end of the LRU list or flush_list.
-NOTE 1: in the case of an LRU flush the calling thread may own latches to
-pages: to avoid deadlocks, this function must be written so that it cannot
-end up waiting for these latches! NOTE 2: in the case of a flush list flush,
-the calling thread is not allowed to own any latches on pages! */
-
-ulint
-buf_flush_batch(
-/*============*/
- /* out: number of blocks for which the write
- request was queued; ULINT_UNDEFINED if there
- was a flush of the same type already running */
- ulint flush_type, /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST; if
- BUF_FLUSH_LIST, then the caller must not own
- any latches on pages */
- ulint min_n, /* in: wished minimum mumber of blocks flushed
- (it is not guaranteed that the actual number
- is that big, though) */
- dulint lsn_limit) /* in the case BUF_FLUSH_LIST all blocks whose
- oldest_modification is smaller than this
- should be flushed (if their number does not
- exceed min_n), otherwise ignored */
-{
- buf_block_t* block;
- ulint page_count = 0;
- ulint old_page_count;
- ulint space;
- ulint offset;
- ibool found;
-
- ut_ad((flush_type == BUF_FLUSH_LRU)
- || (flush_type == BUF_FLUSH_LIST));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad((flush_type != BUF_FLUSH_LIST)
- || sync_thread_levels_empty_gen(TRUE));
-#endif /* UNIV_SYNC_DEBUG */
- mutex_enter(&(buf_pool->mutex));
-
- if ((buf_pool->n_flush[flush_type] > 0)
- || (buf_pool->init_flush[flush_type] == TRUE)) {
-
- /* There is already a flush batch of the same type running */
-
- mutex_exit(&(buf_pool->mutex));
-
- return(ULINT_UNDEFINED);
- }
-
- (buf_pool->init_flush)[flush_type] = TRUE;
-
- for (;;) {
- /* If we have flushed enough, leave the loop */
- if (page_count >= min_n) {
-
- break;
- }
-
- /* Start from the end of the list looking for a suitable
- block to be flushed. */
-
- if (flush_type == BUF_FLUSH_LRU) {
- block = UT_LIST_GET_LAST(buf_pool->LRU);
- } else {
- ut_ad(flush_type == BUF_FLUSH_LIST);
-
- block = UT_LIST_GET_LAST(buf_pool->flush_list);
- if (!block
- || (ut_dulint_cmp(block->oldest_modification,
- lsn_limit) >= 0)) {
- /* We have flushed enough */
-
- break;
- }
- }
-
- found = FALSE;
-
- /* Note that after finding a single flushable page, we try to
- flush also all its neighbors, and after that start from the
- END of the LRU list or flush list again: the list may change
- during the flushing and we cannot safely preserve within this
- function a pointer to a block in the list! */
-
- while ((block != NULL) && !found) {
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-
- mutex_enter(&block->mutex);
-
- if (buf_flush_ready_for_flush(block, flush_type)) {
-
- found = TRUE;
- space = block->space;
- offset = block->offset;
-
- mutex_exit(&block->mutex);
- mutex_exit(&(buf_pool->mutex));
-
- old_page_count = page_count;
-
- /* Try to flush also all the neighbors */
- page_count += buf_flush_try_neighbors(
- space, offset, flush_type);
- /* fprintf(stderr,
- "Flush type %lu, page no %lu, neighb %lu\n",
- flush_type, offset,
- page_count - old_page_count); */
-
- mutex_enter(&(buf_pool->mutex));
-
- } else if (flush_type == BUF_FLUSH_LRU) {
-
- mutex_exit(&block->mutex);
-
- block = UT_LIST_GET_PREV(LRU, block);
- } else {
- ut_ad(flush_type == BUF_FLUSH_LIST);
-
- mutex_exit(&block->mutex);
-
- block = UT_LIST_GET_PREV(flush_list, block);
- }
- }
-
- /* If we could not find anything to flush, leave the loop */
-
- if (!found) {
- break;
- }
- }
-
- (buf_pool->init_flush)[flush_type] = FALSE;
-
- if ((buf_pool->n_flush[flush_type] == 0)
- && (buf_pool->init_flush[flush_type] == FALSE)) {
-
- /* The running flush batch has ended */
-
- os_event_set(buf_pool->no_flush[flush_type]);
- }
-
- mutex_exit(&(buf_pool->mutex));
-
- buf_flush_buffered_writes();
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints && page_count > 0) {
- ut_a(flush_type == BUF_FLUSH_LRU
- || flush_type == BUF_FLUSH_LIST);
- fprintf(stderr, flush_type == BUF_FLUSH_LRU
- ? "Flushed %lu pages in LRU flush\n"
- : "Flushed %lu pages in flush list flush\n",
- (ulong) page_count);
- }
-#endif /* UNIV_DEBUG */
-
- srv_buf_pool_flushed += page_count;
-
- return(page_count);
-}
-
-/**********************************************************************
-Waits until a flush batch of the given type ends */
-
-void
-buf_flush_wait_batch_end(
-/*=====================*/
- ulint type) /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
-{
- ut_ad((type == BUF_FLUSH_LRU) || (type == BUF_FLUSH_LIST));
-
- os_event_wait(buf_pool->no_flush[type]);
-}
-
-/**********************************************************************
-Gives a recommendation of how many blocks should be flushed to establish
-a big enough margin of replaceable blocks near the end of the LRU list
-and in the free list. */
-static
-ulint
-buf_flush_LRU_recommendation(void)
-/*==============================*/
- /* out: number of blocks which should be flushed
- from the end of the LRU list */
-{
- buf_block_t* block;
- ulint n_replaceable;
- ulint distance = 0;
-
- mutex_enter(&(buf_pool->mutex));
-
- n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
-
- block = UT_LIST_GET_LAST(buf_pool->LRU);
-
- while ((block != NULL)
- && (n_replaceable < BUF_FLUSH_FREE_BLOCK_MARGIN
- + BUF_FLUSH_EXTRA_MARGIN)
- && (distance < BUF_LRU_FREE_SEARCH_LEN)) {
-
- mutex_enter(&block->mutex);
-
- if (buf_flush_ready_for_replace(block)) {
- n_replaceable++;
- }
-
- mutex_exit(&block->mutex);
-
- distance++;
-
- block = UT_LIST_GET_PREV(LRU, block);
- }
-
- mutex_exit(&(buf_pool->mutex));
-
- if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN) {
-
- return(0);
- }
-
- return(BUF_FLUSH_FREE_BLOCK_MARGIN + BUF_FLUSH_EXTRA_MARGIN
- - n_replaceable);
-}
-
-/*************************************************************************
-Flushes pages from the end of the LRU list if there is too small a margin
-of replaceable pages there or in the free list. VERY IMPORTANT: this function
-is called also by threads which have locks on pages. To avoid deadlocks, we
-flush only pages such that the s-lock required for flushing can be acquired
-immediately, without waiting. */
-
-void
-buf_flush_free_margin(void)
-/*=======================*/
-{
- ulint n_to_flush;
- ulint n_flushed;
-
- n_to_flush = buf_flush_LRU_recommendation();
-
- if (n_to_flush > 0) {
- n_flushed = buf_flush_batch(BUF_FLUSH_LRU, n_to_flush,
- ut_dulint_zero);
- if (n_flushed == ULINT_UNDEFINED) {
- /* There was an LRU type flush batch already running;
- let us wait for it to end */
-
- buf_flush_wait_batch_end(BUF_FLUSH_LRU);
- }
- }
-}
-
-/**********************************************************************
-Validates the flush list. */
-static
-ibool
-buf_flush_validate_low(void)
-/*========================*/
- /* out: TRUE if ok */
-{
- buf_block_t* block;
- dulint om;
-
- UT_LIST_VALIDATE(flush_list, buf_block_t, buf_pool->flush_list);
-
- block = UT_LIST_GET_FIRST(buf_pool->flush_list);
-
- while (block != NULL) {
- om = block->oldest_modification;
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
- ut_a(ut_dulint_cmp(om, ut_dulint_zero) > 0);
-
- block = UT_LIST_GET_NEXT(flush_list, block);
-
- if (block) {
- ut_a(ut_dulint_cmp(om, block->oldest_modification)
- >= 0);
- }
- }
-
- return(TRUE);
-}
-
-/**********************************************************************
-Validates the flush list. */
-
-ibool
-buf_flush_validate(void)
-/*====================*/
- /* out: TRUE if ok */
-{
- ibool ret;
-
- mutex_enter(&(buf_pool->mutex));
-
- ret = buf_flush_validate_low();
-
- mutex_exit(&(buf_pool->mutex));
-
- return(ret);
-}
diff --git a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c
deleted file mode 100644
index d3c787d1578..00000000000
--- a/storage/innobase/buf/buf0lru.c
+++ /dev/null
@@ -1,1237 +0,0 @@
-/******************************************************
-The database buffer replacement algorithm
-
-(c) 1995 Innobase Oy
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
-#include "buf0lru.h"
-
-#ifdef UNIV_NONINL
-#include "buf0lru.ic"
-#include "srv0srv.h" /* Needed to getsrv_print_innodb_monitor */
-#endif
-
-#include "ut0byte.h"
-#include "ut0lst.h"
-#include "ut0rnd.h"
-#include "sync0sync.h"
-#include "sync0rw.h"
-#include "hash0hash.h"
-#include "os0sync.h"
-#include "fil0fil.h"
-#include "btr0btr.h"
-#include "buf0buf.h"
-#include "buf0flu.h"
-#include "buf0rea.h"
-#include "btr0sea.h"
-#include "os0file.h"
-#include "log0recv.h"
-
-/* The number of blocks from the LRU_old pointer onward, including the block
-pointed to, must be 3/8 of the whole LRU list length, except that the
-tolerance defined below is allowed. Note that the tolerance must be small
-enough such that for even the BUF_LRU_OLD_MIN_LEN long LRU list, the
-LRU_old pointer is not allowed to point to either end of the LRU list. */
-
-#define BUF_LRU_OLD_TOLERANCE 20
-
-/* The whole LRU list length is divided by this number to determine an
-initial segment in buf_LRU_get_recent_limit */
-
-#define BUF_LRU_INITIAL_RATIO 8
-
-/* When dropping the search hash index entries before deleting an ibd
-file, we build a local array of pages belonging to that tablespace
-in the buffer pool. Following is the size of that array. */
-#define BUF_LRU_DROP_SEARCH_HASH_SIZE 1024
-
-/* If we switch on the InnoDB monitor because there are too few available
-frames in the buffer pool, we set this to TRUE */
-ibool buf_lru_switched_on_innodb_mon = FALSE;
-
-/**********************************************************************
-Takes a block out of the LRU list and page hash table and sets the block
-state to BUF_BLOCK_REMOVE_HASH. */
-static
-void
-buf_LRU_block_remove_hashed_page(
-/*=============================*/
- buf_block_t* block); /* in: block, must contain a file page and
- be in a state where it can be freed; there
- may or may not be a hash index to the page */
-/**********************************************************************
-Puts a file page whose has no hash index to the free list. */
-static
-void
-buf_LRU_block_free_hashed_page(
-/*===========================*/
- buf_block_t* block); /* in: block, must contain a file page and
- be in a state where it can be freed */
-
-/**********************************************************************
-Attempts to drop page hash index on a batch of pages belonging to a
-particular space id. */
-static
-void
-buf_LRU_drop_page_hash_batch(
-/*=========================*/
- ulint id, /* in: space id */
- const ulint* arr, /* in: array of page_no */
- ulint count) /* in: number of entries in array */
-{
- ulint i;
-
- ut_ad(arr != NULL);
- ut_ad(count <= BUF_LRU_DROP_SEARCH_HASH_SIZE);
-
- for (i = 0; i < count; ++i) {
- btr_search_drop_page_hash_when_freed(id, arr[i]);
- }
-}
-
-/**********************************************************************
-When doing a DROP TABLE/DISCARD TABLESPACE we have to drop all page
-hash index entries belonging to that table. This function tries to
-do that in batch. Note that this is a 'best effort' attempt and does
-not guarantee that ALL hash entries will be removed. */
-static
-void
-buf_LRU_drop_page_hash_for_tablespace(
-/*==================================*/
- ulint id) /* in: space id */
-{
- buf_block_t* block;
- ulint* page_arr;
- ulint num_entries;
-
- page_arr = ut_malloc(sizeof(ulint)
- * BUF_LRU_DROP_SEARCH_HASH_SIZE);
- mutex_enter(&buf_pool->mutex);
-
-scan_again:
- num_entries = 0;
- block = UT_LIST_GET_LAST(buf_pool->LRU);
-
- while (block != NULL) {
- buf_block_t* prev_block;
-
- mutex_enter(&block->mutex);
- prev_block = UT_LIST_GET_PREV(LRU, block);
-
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-
- if (block->space != id
- || block->buf_fix_count > 0
- || block->io_fix != 0) {
- /* We leave the fixed pages as is in this scan.
- To be dealt with later in the final scan. */
- mutex_exit(&block->mutex);
- goto next_page;
- }
-
- ut_ad(block->space == id);
- if (block->is_hashed) {
-
- /* Store the offset(i.e.: page_no) in the array
- so that we can drop hash index in a batch
- later. */
- page_arr[num_entries] = block->offset;
- mutex_exit(&block->mutex);
- ut_a(num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE);
- ++num_entries;
-
- if (num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE) {
- goto next_page;
- }
- /* Array full. We release the buf_pool->mutex to
- obey the latching order. */
- mutex_exit(&buf_pool->mutex);
-
- buf_LRU_drop_page_hash_batch(id, page_arr,
- num_entries);
- num_entries = 0;
- mutex_enter(&buf_pool->mutex);
- } else {
- mutex_exit(&block->mutex);
- }
-
-next_page:
- /* Note that we may have released the buf_pool->mutex
- above after reading the prev_block during processing
- of a page_hash_batch (i.e.: when the array was full).
- This means that prev_block can change in LRU list.
- This is OK because this function is a 'best effort'
- to drop as many search hash entries as possible and
- it does not guarantee that ALL such entries will be
- dropped. */
- block = prev_block;
-
- /* If, however, block has been removed from LRU list
- to the free list then we should restart the scan.
- block->state is protected by buf_pool->mutex. */
- if (block && block->state != BUF_BLOCK_FILE_PAGE) {
- ut_a(num_entries == 0);
- goto scan_again;
- }
- }
-
- mutex_exit(&buf_pool->mutex);
-
- /* Drop any remaining batch of search hashed pages. */
- buf_LRU_drop_page_hash_batch(id, page_arr, num_entries);
- ut_free(page_arr);
-}
-
-/**********************************************************************
-Invalidates all pages belonging to a given tablespace when we are deleting
-the data file(s) of that tablespace. */
-
-void
-buf_LRU_invalidate_tablespace(
-/*==========================*/
- ulint id) /* in: space id */
-{
- buf_block_t* block;
- ulint page_no;
- ibool all_freed;
-
- /* Before we attempt to drop pages one by one we first
- attempt to drop page hash index entries in batches to make
- it more efficient. The batching attempt is a best effort
- attempt and does not guarantee that all pages hash entries
- will be dropped. We get rid of remaining page hash entries
- one by one below. */
- buf_LRU_drop_page_hash_for_tablespace(id);
-
-scan_again:
- mutex_enter(&(buf_pool->mutex));
-
- all_freed = TRUE;
-
- block = UT_LIST_GET_LAST(buf_pool->LRU);
-
- while (block != NULL) {
- buf_block_t* prev_block;
-
- mutex_enter(&block->mutex);
- prev_block = UT_LIST_GET_PREV(LRU, block);
-
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-
- if (block->space == id
- && (block->buf_fix_count > 0 || block->io_fix != 0)) {
-
- /* We cannot remove this page during this scan yet;
- maybe the system is currently reading it in, or
- flushing the modifications to the file */
-
- all_freed = FALSE;
-
- goto next_page;
- }
-
- if (block->space == id) {
-#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fprintf(stderr,
- "Dropping space %lu page %lu\n",
- (ulong) block->space,
- (ulong) block->offset);
- }
-#endif
- if (block->is_hashed) {
- page_no = block->offset;
-
- mutex_exit(&block->mutex);
-
- mutex_exit(&(buf_pool->mutex));
-
- /* Note that the following call will acquire
- an S-latch on the page */
-
- btr_search_drop_page_hash_when_freed(id,
- page_no);
- goto scan_again;
- }
-
- if (0 != ut_dulint_cmp(block->oldest_modification,
- ut_dulint_zero)) {
-
- /* Remove from the flush list of modified
- blocks */
- block->oldest_modification = ut_dulint_zero;
-
- UT_LIST_REMOVE(flush_list,
- buf_pool->flush_list, block);
- }
-
- /* Remove from the LRU list */
- buf_LRU_block_remove_hashed_page(block);
- buf_LRU_block_free_hashed_page(block);
- }
-next_page:
- mutex_exit(&block->mutex);
- block = prev_block;
- }
-
- mutex_exit(&(buf_pool->mutex));
-
- if (!all_freed) {
- os_thread_sleep(20000);
-
- goto scan_again;
- }
-}
-
-/**********************************************************************
-Gets the minimum LRU_position field for the blocks in an initial segment
-(determined by BUF_LRU_INITIAL_RATIO) of the LRU list. The limit is not
-guaranteed to be precise, because the ulint_clock may wrap around. */
-
-ulint
-buf_LRU_get_recent_limit(void)
-/*==========================*/
- /* out: the limit; zero if could not determine it */
-{
- buf_block_t* block;
- ulint len;
- ulint limit;
-
- mutex_enter(&(buf_pool->mutex));
-
- len = UT_LIST_GET_LEN(buf_pool->LRU);
-
- if (len < BUF_LRU_OLD_MIN_LEN) {
- /* The LRU list is too short to do read-ahead */
-
- mutex_exit(&(buf_pool->mutex));
-
- return(0);
- }
-
- block = UT_LIST_GET_FIRST(buf_pool->LRU);
-
- limit = block->LRU_position - len / BUF_LRU_INITIAL_RATIO;
-
- mutex_exit(&(buf_pool->mutex));
-
- return(limit);
-}
-
-/**********************************************************************
-Look for a replaceable block from the end of the LRU list and put it to
-the free list if found. */
-
-ibool
-buf_LRU_search_and_free_block(
-/*==========================*/
- /* out: TRUE if freed */
- ulint n_iterations) /* in: how many times this has been called
- repeatedly without result: a high value means
- that we should search farther; if value is
- k < 10, then we only search k/10 * [number
- of pages in the buffer pool] from the end
- of the LRU list */
-{
- buf_block_t* block;
- ulint distance = 0;
- ibool freed;
-
- mutex_enter(&(buf_pool->mutex));
-
- freed = FALSE;
- block = UT_LIST_GET_LAST(buf_pool->LRU);
-
- while (block != NULL) {
- ut_a(block->in_LRU_list);
-
- mutex_enter(&block->mutex);
-
- if (buf_flush_ready_for_replace(block)) {
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fprintf(stderr,
- "Putting space %lu page %lu"
- " to free list\n",
- (ulong) block->space,
- (ulong) block->offset);
- }
-#endif /* UNIV_DEBUG */
-
- buf_LRU_block_remove_hashed_page(block);
-
- mutex_exit(&(buf_pool->mutex));
- mutex_exit(&block->mutex);
-
- /* Remove possible adaptive hash index built on the
- page; in the case of AWE the block may not have a
- frame at all */
-
- if (block->frame) {
- /* The page was declared uninitialized
- by buf_LRU_block_remove_hashed_page().
- We need to flag the contents of the
- page valid (which it still is) in
- order to avoid bogus Valgrind
- warnings. */
- UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE);
- btr_search_drop_page_hash_index(block->frame);
- UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE);
- }
-
- ut_a(block->buf_fix_count == 0);
-
- mutex_enter(&(buf_pool->mutex));
- mutex_enter(&block->mutex);
-
- buf_LRU_block_free_hashed_page(block);
- freed = TRUE;
- mutex_exit(&block->mutex);
-
- break;
- }
-
- mutex_exit(&block->mutex);
-
- block = UT_LIST_GET_PREV(LRU, block);
- distance++;
-
- if (!freed && n_iterations <= 10
- && distance > 100 + (n_iterations * buf_pool->curr_size)
- / 10) {
- buf_pool->LRU_flush_ended = 0;
-
- mutex_exit(&(buf_pool->mutex));
-
- return(FALSE);
- }
- }
- if (buf_pool->LRU_flush_ended > 0) {
- buf_pool->LRU_flush_ended--;
- }
- if (!freed) {
- buf_pool->LRU_flush_ended = 0;
- }
- mutex_exit(&(buf_pool->mutex));
-
- return(freed);
-}
-
-/**********************************************************************
-Tries to remove LRU flushed blocks from the end of the LRU list and put them
-to the free list. This is beneficial for the efficiency of the insert buffer
-operation, as flushed pages from non-unique non-clustered indexes are here
-taken out of the buffer pool, and their inserts redirected to the insert
-buffer. Otherwise, the flushed blocks could get modified again before read
-operations need new buffer blocks, and the i/o work done in flushing would be
-wasted. */
-
-void
-buf_LRU_try_free_flushed_blocks(void)
-/*=================================*/
-{
- mutex_enter(&(buf_pool->mutex));
-
- while (buf_pool->LRU_flush_ended > 0) {
-
- mutex_exit(&(buf_pool->mutex));
-
- buf_LRU_search_and_free_block(1);
-
- mutex_enter(&(buf_pool->mutex));
- }
-
- mutex_exit(&(buf_pool->mutex));
-}
-
-/**********************************************************************
-Returns TRUE if less than 25 % of the buffer pool is available. This can be
-used in heuristics to prevent huge transactions eating up the whole buffer
-pool for their locks. */
-
-ibool
-buf_LRU_buf_pool_running_out(void)
-/*==============================*/
- /* out: TRUE if less than 25 % of buffer pool
- left */
-{
- ibool ret = FALSE;
-
- mutex_enter(&(buf_pool->mutex));
-
- if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
- + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->max_size / 4) {
-
- ret = TRUE;
- }
-
- mutex_exit(&(buf_pool->mutex));
-
- return(ret);
-}
-
-/**********************************************************************
-Returns a free block from buf_pool. The block is taken off the free list.
-If it is empty, blocks are moved from the end of the LRU list to the free
-list. */
-
-buf_block_t*
-buf_LRU_get_free_block(void)
-/*========================*/
- /* out: the free control block; also if AWE is
- used, it is guaranteed that the block has its
- page mapped to a frame when we return */
-{
- buf_block_t* block = NULL;
- ibool freed;
- ulint n_iterations = 1;
- ibool mon_value_was = FALSE;
- ibool started_monitor = FALSE;
-loop:
- mutex_enter(&(buf_pool->mutex));
-
- if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
- + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->max_size / 20) {
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: ERROR: over 95 percent of the buffer pool"
- " is occupied by\n"
- "InnoDB: lock heaps or the adaptive hash index!"
- " Check that your\n"
- "InnoDB: transactions do not set too many row locks.\n"
- "InnoDB: Your buffer pool size is %lu MB."
- " Maybe you should make\n"
- "InnoDB: the buffer pool bigger?\n"
- "InnoDB: We intentionally generate a seg fault"
- " to print a stack trace\n"
- "InnoDB: on Linux!\n",
- (ulong) (buf_pool->curr_size
- / (1024 * 1024 / UNIV_PAGE_SIZE)));
-
- ut_error;
-
- } else if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
- + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->max_size / 3) {
-
- if (!buf_lru_switched_on_innodb_mon) {
-
- /* Over 67 % of the buffer pool is occupied by lock
- heaps or the adaptive hash index. This may be a memory
- leak! */
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: WARNING: over 67 percent of"
- " the buffer pool is occupied by\n"
- "InnoDB: lock heaps or the adaptive"
- " hash index! Check that your\n"
- "InnoDB: transactions do not set too many"
- " row locks.\n"
- "InnoDB: Your buffer pool size is %lu MB."
- " Maybe you should make\n"
- "InnoDB: the buffer pool bigger?\n"
- "InnoDB: Starting the InnoDB Monitor to print"
- " diagnostics, including\n"
- "InnoDB: lock heap and hash index sizes.\n",
- (ulong) (buf_pool->curr_size
- / (1024 * 1024 / UNIV_PAGE_SIZE)));
-
- buf_lru_switched_on_innodb_mon = TRUE;
- srv_print_innodb_monitor = TRUE;
- os_event_set(srv_lock_timeout_thread_event);
- }
- } else if (buf_lru_switched_on_innodb_mon) {
-
- /* Switch off the InnoDB Monitor; this is a simple way
- to stop the monitor if the situation becomes less urgent,
- but may also surprise users if the user also switched on the
- monitor! */
-
- buf_lru_switched_on_innodb_mon = FALSE;
- srv_print_innodb_monitor = FALSE;
- }
-
- /* If there is a block in the free list, take it */
- if (UT_LIST_GET_LEN(buf_pool->free) > 0) {
-
- block = UT_LIST_GET_FIRST(buf_pool->free);
- ut_a(block->in_free_list);
- UT_LIST_REMOVE(free, buf_pool->free, block);
- block->in_free_list = FALSE;
- ut_a(block->state != BUF_BLOCK_FILE_PAGE);
- ut_a(!block->in_LRU_list);
-
- if (srv_use_awe) {
- if (block->frame) {
- /* Remove from the list of mapped pages */
-
- UT_LIST_REMOVE(awe_LRU_free_mapped,
- buf_pool->awe_LRU_free_mapped,
- block);
- } else {
- /* We map the page to a frame; second param
- FALSE below because we do not want it to be
- added to the awe_LRU_free_mapped list */
-
- buf_awe_map_page_to_frame(block, FALSE);
- }
- }
-
- mutex_enter(&block->mutex);
-
- block->state = BUF_BLOCK_READY_FOR_USE;
- UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
-
- mutex_exit(&block->mutex);
-
- mutex_exit(&(buf_pool->mutex));
-
- if (started_monitor) {
- srv_print_innodb_monitor = mon_value_was;
- }
-
- return(block);
- }
-
- /* If no block was in the free list, search from the end of the LRU
- list and try to free a block there */
-
- mutex_exit(&(buf_pool->mutex));
-
- freed = buf_LRU_search_and_free_block(n_iterations);
-
- if (freed > 0) {
- goto loop;
- }
-
- if (n_iterations > 30) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- "InnoDB: Warning: difficult to find free blocks from\n"
- "InnoDB: the buffer pool (%lu search iterations)!"
- " Consider\n"
- "InnoDB: increasing the buffer pool size.\n"
- "InnoDB: It is also possible that"
- " in your Unix version\n"
- "InnoDB: fsync is very slow, or"
- " completely frozen inside\n"
- "InnoDB: the OS kernel. Then upgrading to"
- " a newer version\n"
- "InnoDB: of your operating system may help."
- " Look at the\n"
- "InnoDB: number of fsyncs in diagnostic info below.\n"
- "InnoDB: Pending flushes (fsync) log: %lu;"
- " buffer pool: %lu\n"
- "InnoDB: %lu OS file reads, %lu OS file writes,"
- " %lu OS fsyncs\n"
- "InnoDB: Starting InnoDB Monitor to print further\n"
- "InnoDB: diagnostics to the standard output.\n",
- (ulong) n_iterations,
- (ulong) fil_n_pending_log_flushes,
- (ulong) fil_n_pending_tablespace_flushes,
- (ulong) os_n_file_reads, (ulong) os_n_file_writes,
- (ulong) os_n_fsyncs);
-
- mon_value_was = srv_print_innodb_monitor;
- started_monitor = TRUE;
- srv_print_innodb_monitor = TRUE;
- os_event_set(srv_lock_timeout_thread_event);
- }
-
- /* No free block was found: try to flush the LRU list */
-
- buf_flush_free_margin();
- ++srv_buf_pool_wait_free;
-
- os_aio_simulated_wake_handler_threads();
-
- mutex_enter(&(buf_pool->mutex));
-
- if (buf_pool->LRU_flush_ended > 0) {
- /* We have written pages in an LRU flush. To make the insert
- buffer more efficient, we try to move these pages to the free
- list. */
-
- mutex_exit(&(buf_pool->mutex));
-
- buf_LRU_try_free_flushed_blocks();
- } else {
- mutex_exit(&(buf_pool->mutex));
- }
-
- if (n_iterations > 10) {
-
- os_thread_sleep(500000);
- }
-
- n_iterations++;
-
- goto loop;
-}
-
-/***********************************************************************
-Moves the LRU_old pointer so that the length of the old blocks list
-is inside the allowed limits. */
-UNIV_INLINE
-void
-buf_LRU_old_adjust_len(void)
-/*========================*/
-{
- ulint old_len;
- ulint new_len;
-
- ut_a(buf_pool->LRU_old);
- ut_ad(mutex_own(&(buf_pool->mutex)));
- ut_ad(3 * (BUF_LRU_OLD_MIN_LEN / 8) > BUF_LRU_OLD_TOLERANCE + 5);
-
- for (;;) {
- old_len = buf_pool->LRU_old_len;
- new_len = 3 * (UT_LIST_GET_LEN(buf_pool->LRU) / 8);
-
- ut_a(buf_pool->LRU_old->in_LRU_list);
-
- /* Update the LRU_old pointer if necessary */
-
- if (old_len < new_len - BUF_LRU_OLD_TOLERANCE) {
-
- buf_pool->LRU_old = UT_LIST_GET_PREV(
- LRU, buf_pool->LRU_old);
- (buf_pool->LRU_old)->old = TRUE;
- buf_pool->LRU_old_len++;
-
- } else if (old_len > new_len + BUF_LRU_OLD_TOLERANCE) {
-
- (buf_pool->LRU_old)->old = FALSE;
- buf_pool->LRU_old = UT_LIST_GET_NEXT(
- LRU, buf_pool->LRU_old);
- buf_pool->LRU_old_len--;
- } else {
- ut_a(buf_pool->LRU_old); /* Check that we did not
- fall out of the LRU list */
- return;
- }
- }
-}
-
-/***********************************************************************
-Initializes the old blocks pointer in the LRU list. This function should be
-called when the LRU list grows to BUF_LRU_OLD_MIN_LEN length. */
-static
-void
-buf_LRU_old_init(void)
-/*==================*/
-{
- buf_block_t* block;
-
- ut_ad(mutex_own(&(buf_pool->mutex)));
- ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
-
- /* We first initialize all blocks in the LRU list as old and then use
- the adjust function to move the LRU_old pointer to the right
- position */
-
- block = UT_LIST_GET_FIRST(buf_pool->LRU);
-
- while (block != NULL) {
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
- ut_a(block->in_LRU_list);
- block->old = TRUE;
- block = UT_LIST_GET_NEXT(LRU, block);
- }
-
- buf_pool->LRU_old = UT_LIST_GET_FIRST(buf_pool->LRU);
- buf_pool->LRU_old_len = UT_LIST_GET_LEN(buf_pool->LRU);
-
- buf_LRU_old_adjust_len();
-}
-
-/**********************************************************************
-Removes a block from the LRU list. */
-UNIV_INLINE
-void
-buf_LRU_remove_block(
-/*=================*/
- buf_block_t* block) /* in: control block */
-{
- ut_ad(buf_pool);
- ut_ad(block);
- ut_ad(mutex_own(&(buf_pool->mutex)));
-
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
- ut_a(block->in_LRU_list);
-
- /* If the LRU_old pointer is defined and points to just this block,
- move it backward one step */
-
- if (block == buf_pool->LRU_old) {
-
- /* Below: the previous block is guaranteed to exist, because
- the LRU_old pointer is only allowed to differ by the
- tolerance value from strict 3/8 of the LRU list length. */
-
- buf_pool->LRU_old = UT_LIST_GET_PREV(LRU, block);
- (buf_pool->LRU_old)->old = TRUE;
-
- buf_pool->LRU_old_len++;
- ut_a(buf_pool->LRU_old);
- }
-
- /* Remove the block from the LRU list */
- UT_LIST_REMOVE(LRU, buf_pool->LRU, block);
- block->in_LRU_list = FALSE;
-
- if (srv_use_awe && block->frame) {
- /* Remove from the list of mapped pages */
-
- UT_LIST_REMOVE(awe_LRU_free_mapped,
- buf_pool->awe_LRU_free_mapped, block);
- }
-
- /* If the LRU list is so short that LRU_old not defined, return */
- if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
-
- buf_pool->LRU_old = NULL;
-
- return;
- }
-
- ut_ad(buf_pool->LRU_old);
-
- /* Update the LRU_old_len field if necessary */
- if (block->old) {
-
- buf_pool->LRU_old_len--;
- }
-
- /* Adjust the length of the old block list if necessary */
- buf_LRU_old_adjust_len();
-}
-
-/**********************************************************************
-Adds a block to the LRU list end. */
-UNIV_INLINE
-void
-buf_LRU_add_block_to_end_low(
-/*=========================*/
- buf_block_t* block) /* in: control block */
-{
- buf_block_t* last_block;
-
- ut_ad(buf_pool);
- ut_ad(block);
- ut_ad(mutex_own(&(buf_pool->mutex)));
-
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-
- block->old = TRUE;
-
- last_block = UT_LIST_GET_LAST(buf_pool->LRU);
-
- if (last_block) {
- block->LRU_position = last_block->LRU_position;
- } else {
- block->LRU_position = buf_pool_clock_tic();
- }
-
- ut_a(!block->in_LRU_list);
- UT_LIST_ADD_LAST(LRU, buf_pool->LRU, block);
- block->in_LRU_list = TRUE;
-
- if (srv_use_awe && block->frame) {
- /* Add to the list of mapped pages */
-
- UT_LIST_ADD_LAST(awe_LRU_free_mapped,
- buf_pool->awe_LRU_free_mapped, block);
- }
-
- if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
-
- buf_pool->LRU_old_len++;
- }
-
- if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
-
- ut_ad(buf_pool->LRU_old);
-
- /* Adjust the length of the old block list if necessary */
-
- buf_LRU_old_adjust_len();
-
- } else if (UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN) {
-
- /* The LRU list is now long enough for LRU_old to become
- defined: init it */
-
- buf_LRU_old_init();
- }
-}
-
-/**********************************************************************
-Adds a block to the LRU list. */
-UNIV_INLINE
-void
-buf_LRU_add_block_low(
-/*==================*/
- buf_block_t* block, /* in: control block */
- ibool old) /* in: TRUE if should be put to the old blocks
- in the LRU list, else put to the start; if the
- LRU list is very short, the block is added to
- the start, regardless of this parameter */
-{
- ulint cl;
-
- ut_ad(buf_pool);
- ut_ad(block);
- ut_ad(mutex_own(&(buf_pool->mutex)));
-
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
- ut_a(!block->in_LRU_list);
-
- block->old = old;
- cl = buf_pool_clock_tic();
-
- if (srv_use_awe && block->frame) {
- /* Add to the list of mapped pages; for simplicity we always
- add to the start, even if the user would have set 'old'
- TRUE */
-
- UT_LIST_ADD_FIRST(awe_LRU_free_mapped,
- buf_pool->awe_LRU_free_mapped, block);
- }
-
- if (!old || (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN)) {
-
- UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, block);
-
- block->LRU_position = cl;
- block->freed_page_clock = buf_pool->freed_page_clock;
- } else {
- UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, buf_pool->LRU_old,
- block);
- buf_pool->LRU_old_len++;
-
- /* We copy the LRU position field of the previous block
- to the new block */
-
- block->LRU_position = (buf_pool->LRU_old)->LRU_position;
- }
-
- block->in_LRU_list = TRUE;
-
- if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
-
- ut_ad(buf_pool->LRU_old);
-
- /* Adjust the length of the old block list if necessary */
-
- buf_LRU_old_adjust_len();
-
- } else if (UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN) {
-
- /* The LRU list is now long enough for LRU_old to become
- defined: init it */
-
- buf_LRU_old_init();
- }
-}
-
-/**********************************************************************
-Adds a block to the LRU list. */
-
-void
-buf_LRU_add_block(
-/*==============*/
- buf_block_t* block, /* in: control block */
- ibool old) /* in: TRUE if should be put to the old
- blocks in the LRU list, else put to the start;
- if the LRU list is very short, the block is
- added to the start, regardless of this
- parameter */
-{
- buf_LRU_add_block_low(block, old);
-}
-
-/**********************************************************************
-Moves a block to the start of the LRU list. */
-
-void
-buf_LRU_make_block_young(
-/*=====================*/
- buf_block_t* block) /* in: control block */
-{
- buf_LRU_remove_block(block);
- buf_LRU_add_block_low(block, FALSE);
-}
-
-/**********************************************************************
-Moves a block to the end of the LRU list. */
-
-void
-buf_LRU_make_block_old(
-/*===================*/
- buf_block_t* block) /* in: control block */
-{
- buf_LRU_remove_block(block);
- buf_LRU_add_block_to_end_low(block);
-}
-
-/**********************************************************************
-Puts a block back to the free list. */
-
-void
-buf_LRU_block_free_non_file_page(
-/*=============================*/
- buf_block_t* block) /* in: block, must not contain a file page */
-{
-
- ut_ad(mutex_own(&(buf_pool->mutex)));
- ut_ad(mutex_own(&block->mutex));
- ut_ad(block);
-
- ut_a((block->state == BUF_BLOCK_MEMORY)
- || (block->state == BUF_BLOCK_READY_FOR_USE));
-
- ut_a(block->n_pointers == 0);
- ut_a(!block->in_free_list);
-
- block->state = BUF_BLOCK_NOT_USED;
-
- UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
-#ifdef UNIV_DEBUG
- /* Wipe contents of page to reveal possible stale pointers to it */
- memset(block->frame, '\0', UNIV_PAGE_SIZE);
-#endif
- UT_LIST_ADD_FIRST(free, buf_pool->free, block);
- block->in_free_list = TRUE;
-
- UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE);
-
- if (srv_use_awe && block->frame) {
- /* Add to the list of mapped pages */
-
- UT_LIST_ADD_FIRST(awe_LRU_free_mapped,
- buf_pool->awe_LRU_free_mapped, block);
- }
-}
-
-/**********************************************************************
-Takes a block out of the LRU list and page hash table and sets the block
-state to BUF_BLOCK_REMOVE_HASH. */
-static
-void
-buf_LRU_block_remove_hashed_page(
-/*=============================*/
- buf_block_t* block) /* in: block, must contain a file page and
- be in a state where it can be freed; there
- may or may not be a hash index to the page */
-{
- ut_ad(mutex_own(&(buf_pool->mutex)));
- ut_ad(mutex_own(&block->mutex));
- ut_ad(block);
-
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
- ut_a(block->io_fix == 0);
- ut_a(block->buf_fix_count == 0);
- ut_a(ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) == 0);
-
- buf_LRU_remove_block(block);
-
- buf_pool->freed_page_clock += 1;
-
- /* Note that if AWE is enabled the block may not have a frame at all */
-
- buf_block_modify_clock_inc(block);
-
- if (block != buf_page_hash_get(block->space, block->offset)) {
- fprintf(stderr,
- "InnoDB: Error: page %lu %lu not found"
- " in the hash table\n",
- (ulong) block->space,
- (ulong) block->offset);
- if (buf_page_hash_get(block->space, block->offset)) {
- fprintf(stderr,
- "InnoDB: In hash table we find block"
- " %p of %lu %lu which is not %p\n",
- (void*) buf_page_hash_get
- (block->space, block->offset),
- (ulong) buf_page_hash_get
- (block->space, block->offset)->space,
- (ulong) buf_page_hash_get
- (block->space, block->offset)->offset,
- (void*) block);
- }
-
-#ifdef UNIV_DEBUG
- buf_print();
- buf_LRU_print();
- buf_validate();
- buf_LRU_validate();
-#endif
- ut_a(0);
- }
-
- HASH_DELETE(buf_block_t, hash, buf_pool->page_hash,
- buf_page_address_fold(block->space, block->offset),
- block);
-
- UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE);
- block->state = BUF_BLOCK_REMOVE_HASH;
-}
-
-/**********************************************************************
-Puts a file page whose has no hash index to the free list. */
-static
-void
-buf_LRU_block_free_hashed_page(
-/*===========================*/
- buf_block_t* block) /* in: block, must contain a file page and
- be in a state where it can be freed */
-{
- ut_ad(mutex_own(&(buf_pool->mutex)));
- ut_ad(mutex_own(&block->mutex));
-
- ut_a(block->state == BUF_BLOCK_REMOVE_HASH);
-
- block->state = BUF_BLOCK_MEMORY;
-
- buf_LRU_block_free_non_file_page(block);
-}
-
-#ifdef UNIV_DEBUG
-/**************************************************************************
-Validates the LRU list. */
-
-ibool
-buf_LRU_validate(void)
-/*==================*/
-{
- buf_block_t* block;
- ulint old_len;
- ulint new_len;
- ulint LRU_pos;
-
- ut_ad(buf_pool);
- mutex_enter(&(buf_pool->mutex));
-
- if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
-
- ut_a(buf_pool->LRU_old);
- old_len = buf_pool->LRU_old_len;
- new_len = 3 * (UT_LIST_GET_LEN(buf_pool->LRU) / 8);
- ut_a(old_len >= new_len - BUF_LRU_OLD_TOLERANCE);
- ut_a(old_len <= new_len + BUF_LRU_OLD_TOLERANCE);
- }
-
- UT_LIST_VALIDATE(LRU, buf_block_t, buf_pool->LRU);
-
- block = UT_LIST_GET_FIRST(buf_pool->LRU);
-
- old_len = 0;
-
- while (block != NULL) {
-
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-
- if (block->old) {
- old_len++;
- }
-
- if (buf_pool->LRU_old && (old_len == 1)) {
- ut_a(buf_pool->LRU_old == block);
- }
-
- LRU_pos = block->LRU_position;
-
- block = UT_LIST_GET_NEXT(LRU, block);
-
- if (block) {
- /* If the following assert fails, it may
- not be an error: just the buf_pool clock
- has wrapped around */
- ut_a(LRU_pos >= block->LRU_position);
- }
- }
-
- if (buf_pool->LRU_old) {
- ut_a(buf_pool->LRU_old_len == old_len);
- }
-
- UT_LIST_VALIDATE(free, buf_block_t, buf_pool->free);
-
- block = UT_LIST_GET_FIRST(buf_pool->free);
-
- while (block != NULL) {
- ut_a(block->state == BUF_BLOCK_NOT_USED);
-
- block = UT_LIST_GET_NEXT(free, block);
- }
-
- mutex_exit(&(buf_pool->mutex));
- return(TRUE);
-}
-
-/**************************************************************************
-Prints the LRU list. */
-
-void
-buf_LRU_print(void)
-/*===============*/
-{
- buf_block_t* block;
- buf_frame_t* frame;
- ulint len;
-
- ut_ad(buf_pool);
- mutex_enter(&(buf_pool->mutex));
-
- fprintf(stderr, "Pool ulint clock %lu\n",
- (ulong) buf_pool->ulint_clock);
-
- block = UT_LIST_GET_FIRST(buf_pool->LRU);
-
- len = 0;
-
- while (block != NULL) {
-
- fprintf(stderr, "BLOCK %lu ", (ulong) block->offset);
-
- if (block->old) {
- fputs("old ", stderr);
- }
-
- if (block->buf_fix_count) {
- fprintf(stderr, "buffix count %lu ",
- (ulong) block->buf_fix_count);
- }
-
- if (block->io_fix) {
- fprintf(stderr, "io_fix %lu ", (ulong) block->io_fix);
- }
-
- if (ut_dulint_cmp(block->oldest_modification,
- ut_dulint_zero) > 0) {
- fputs("modif. ", stderr);
- }
-
- frame = buf_block_get_frame(block);
-
- fprintf(stderr, "LRU pos %lu type %lu index id %lu ",
- (ulong) block->LRU_position,
- (ulong) fil_page_get_type(frame),
- (ulong) ut_dulint_get_low
- (btr_page_get_index_id(frame)));
-
- block = UT_LIST_GET_NEXT(LRU, block);
- if (++len == 10) {
- len = 0;
- putc('\n', stderr);
- }
- }
-
- mutex_exit(&(buf_pool->mutex));
-}
-#endif /* UNIV_DEBUG */
diff --git a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
deleted file mode 100644
index fdec0206990..00000000000
--- a/storage/innobase/buf/buf0rea.c
+++ /dev/null
@@ -1,728 +0,0 @@
-/******************************************************
-The database buffer read
-
-(c) 1995 Innobase Oy
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
-#include "buf0rea.h"
-
-#include "fil0fil.h"
-#include "mtr0mtr.h"
-
-#include "buf0buf.h"
-#include "buf0flu.h"
-#include "buf0lru.h"
-#include "ibuf0ibuf.h"
-#include "log0recv.h"
-#include "trx0sys.h"
-#include "os0file.h"
-#include "srv0start.h"
-
-extern ulint srv_read_ahead_rnd;
-extern ulint srv_read_ahead_seq;
-extern ulint srv_buf_pool_reads;
-
-/* The size in blocks of the area where the random read-ahead algorithm counts
-the accessed pages when deciding whether to read-ahead */
-#define BUF_READ_AHEAD_RANDOM_AREA BUF_READ_AHEAD_AREA
-
-/* There must be at least this many pages in buf_pool in the area to start
-a random read-ahead */
-#define BUF_READ_AHEAD_RANDOM_THRESHOLD (5 + BUF_READ_AHEAD_RANDOM_AREA / 8)
-
-/* The linear read-ahead area size */
-#define BUF_READ_AHEAD_LINEAR_AREA BUF_READ_AHEAD_AREA
-
-/* The linear read-ahead threshold */
-#define BUF_READ_AHEAD_LINEAR_THRESHOLD (3 * BUF_READ_AHEAD_LINEAR_AREA / 8)
-
-/* If there are buf_pool->curr_size per the number below pending reads, then
-read-ahead is not done: this is to prevent flooding the buffer pool with
-i/o-fixed buffer blocks */
-#define BUF_READ_AHEAD_PEND_LIMIT 2
-
-/************************************************************************
-Low-level function which reads a page asynchronously from a file to the
-buffer buf_pool if it is not already there, in which case does nothing.
-Sets the io_fix flag and sets an exclusive lock on the buffer frame. The
-flag is cleared and the x-lock released by an i/o-handler thread. */
-static
-ulint
-buf_read_page_low(
-/*==============*/
- /* out: 1 if a read request was queued, 0 if the page
- already resided in buf_pool, or if the page is in
- the doublewrite buffer blocks in which case it is never
- read into the pool, or if the tablespace does not
- exist or is being dropped */
- ulint* err, /* out: DB_SUCCESS or DB_TABLESPACE_DELETED if we are
- trying to read from a non-existent tablespace, or a
- tablespace which is just now being dropped */
- ibool sync, /* in: TRUE if synchronous aio is desired */
- ulint mode, /* in: BUF_READ_IBUF_PAGES_ONLY, ...,
- ORed to OS_AIO_SIMULATED_WAKE_LATER (see below
- at read-ahead functions) */
- ulint space, /* in: space id */
- ib_longlong tablespace_version, /* in: if the space memory object has
- this timestamp different from what we are giving here,
- treat the tablespace as dropped; this is a timestamp we
- use to stop dangling page reads from a tablespace
- which we have DISCARDed + IMPORTed back */
- ulint offset) /* in: page number */
-{
- buf_block_t* block;
- ulint wake_later;
-
- *err = DB_SUCCESS;
-
- wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
- mode = mode & ~OS_AIO_SIMULATED_WAKE_LATER;
-
- if (trx_doublewrite && space == TRX_SYS_SPACE
- && ( (offset >= trx_doublewrite->block1
- && offset < trx_doublewrite->block1
- + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
- || (offset >= trx_doublewrite->block2
- && offset < trx_doublewrite->block2
- + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE))) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: trying to read"
- " doublewrite buffer page %lu\n",
- (ulong) offset);
-
- return(0);
- }
-
- if (ibuf_bitmap_page(offset) || trx_sys_hdr_page(space, offset)) {
-
- /* Trx sys header is so low in the latching order that we play
- safe and do not leave the i/o-completion to an asynchronous
- i/o-thread. Ibuf bitmap pages must always be read with
- syncronous i/o, to make sure they do not get involved in
- thread deadlocks. */
-
- sync = TRUE;
- }
-
- /* The following call will also check if the tablespace does not exist
- or is being dropped; if we succeed in initing the page in the buffer
- pool for read, then DISCARD cannot proceed until the read has
- completed */
- block = buf_page_init_for_read(err, mode, space, tablespace_version,
- offset);
- if (block == NULL) {
-
- return(0);
- }
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fprintf(stderr,
- "Posting read request for page %lu, sync %lu\n",
- (ulong) offset,
- (ulong) sync);
- }
-#endif
-
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
-
- *err = fil_io(OS_FILE_READ | wake_later,
- sync, space,
- offset, 0, UNIV_PAGE_SIZE,
- (void*)block->frame, (void*)block);
- ut_a(*err == DB_SUCCESS);
-
- if (sync) {
- /* The i/o is already completed when we arrive from
- fil_read */
- buf_page_io_complete(block);
- }
-
- return(1);
-}
-
-/************************************************************************
-Applies a random read-ahead in buf_pool if there are at least a threshold
-value of accessed pages from the random read-ahead area. Does not read any
-page, not even the one at the position (space, offset), if the read-ahead
-mechanism is not activated. NOTE 1: the calling thread may own latches on
-pages: to avoid deadlocks this function must be written such that it cannot
-end up waiting for these latches! NOTE 2: the calling thread must want
-access to the page given: this rule is set to prevent unintended read-aheads
-performed by ibuf routines, a situation which could result in a deadlock if
-the OS does not support asynchronous i/o. */
-static
-ulint
-buf_read_ahead_random(
-/*==================*/
- /* out: number of page read requests issued; NOTE
- that if we read ibuf pages, it may happen that
- the page at the given page number does not get
- read even if we return a value > 0! */
- ulint space, /* in: space id */
- ulint offset) /* in: page number of a page which the current thread
- wants to access */
-{
- ib_longlong tablespace_version;
- buf_block_t* block;
- ulint recent_blocks = 0;
- ulint count;
- ulint LRU_recent_limit;
- ulint ibuf_mode;
- ulint low, high;
- ulint err;
- ulint i;
-
- if (srv_startup_is_before_trx_rollback_phase) {
- /* No read-ahead to avoid thread deadlocks */
- return(0);
- }
-
- if (ibuf_bitmap_page(offset) || trx_sys_hdr_page(space, offset)) {
-
- /* If it is an ibuf bitmap page or trx sys hdr, we do
- no read-ahead, as that could break the ibuf page access
- order */
-
- return(0);
- }
-
- /* Remember the tablespace version before we ask te tablespace size
- below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
- do not try to read outside the bounds of the tablespace! */
-
- tablespace_version = fil_space_get_version(space);
-
- low = (offset / BUF_READ_AHEAD_RANDOM_AREA)
- * BUF_READ_AHEAD_RANDOM_AREA;
- high = (offset / BUF_READ_AHEAD_RANDOM_AREA + 1)
- * BUF_READ_AHEAD_RANDOM_AREA;
- if (high > fil_space_get_size(space)) {
-
- high = fil_space_get_size(space);
- }
-
- /* Get the minimum LRU_position field value for an initial segment
- of the LRU list, to determine which blocks have recently been added
- to the start of the list. */
-
- LRU_recent_limit = buf_LRU_get_recent_limit();
-
- mutex_enter(&(buf_pool->mutex));
-
- if (buf_pool->n_pend_reads
- > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
- mutex_exit(&(buf_pool->mutex));
-
- return(0);
- }
-
- /* Count how many blocks in the area have been recently accessed,
- that is, reside near the start of the LRU list. */
-
- for (i = low; i < high; i++) {
- block = buf_page_hash_get(space, i);
-
- if ((block)
- && (block->LRU_position > LRU_recent_limit)
- && block->accessed) {
-
- recent_blocks++;
- }
- }
-
- mutex_exit(&(buf_pool->mutex));
-
- if (recent_blocks < BUF_READ_AHEAD_RANDOM_THRESHOLD) {
- /* Do nothing */
-
- return(0);
- }
-
- /* Read all the suitable blocks within the area */
-
- if (ibuf_inside()) {
- ibuf_mode = BUF_READ_IBUF_PAGES_ONLY;
- } else {
- ibuf_mode = BUF_READ_ANY_PAGE;
- }
-
- count = 0;
-
- for (i = low; i < high; i++) {
- /* It is only sensible to do read-ahead in the non-sync aio
- mode: hence FALSE as the first parameter */
-
- if (!ibuf_bitmap_page(i)) {
- count += buf_read_page_low(
- &err, FALSE,
- ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
- space, tablespace_version, i);
- if (err == DB_TABLESPACE_DELETED) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: in random"
- " readahead trying to access\n"
- "InnoDB: tablespace %lu page %lu,\n"
- "InnoDB: but the tablespace does not"
- " exist or is just being dropped.\n",
- (ulong) space, (ulong) i);
- }
- }
- }
-
- /* In simulated aio we wake the aio handler threads only after
- queuing all aio requests, in native aio the following call does
- nothing: */
-
- os_aio_simulated_wake_handler_threads();
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints && (count > 0)) {
- fprintf(stderr,
- "Random read-ahead space %lu offset %lu pages %lu\n",
- (ulong) space, (ulong) offset,
- (ulong) count);
- }
-#endif /* UNIV_DEBUG */
-
- ++srv_read_ahead_rnd;
- return(count);
-}
-
-/************************************************************************
-High-level function which reads a page asynchronously from a file to the
-buffer buf_pool if it is not already there. Sets the io_fix flag and sets
-an exclusive lock on the buffer frame. The flag is cleared and the x-lock
-released by the i/o-handler thread. Does a random read-ahead if it seems
-sensible. */
-
-ulint
-buf_read_page(
-/*==========*/
- /* out: number of page read requests issued: this can
- be > 1 if read-ahead occurred */
- ulint space, /* in: space id */
- ulint offset) /* in: page number */
-{
- ib_longlong tablespace_version;
- ulint count;
- ulint count2;
- ulint err;
-
- tablespace_version = fil_space_get_version(space);
-
- count = buf_read_ahead_random(space, offset);
-
- /* We do the i/o in the synchronous aio mode to save thread
- switches: hence TRUE */
-
- count2 = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
- tablespace_version, offset);
- srv_buf_pool_reads+= count2;
- if (err == DB_TABLESPACE_DELETED) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: trying to access"
- " tablespace %lu page no. %lu,\n"
- "InnoDB: but the tablespace does not exist"
- " or is just being dropped.\n",
- (ulong) space, (ulong) offset);
- }
-
- /* Flush pages from the end of the LRU list if necessary */
- buf_flush_free_margin();
-
- return(count + count2);
-}
-
-/************************************************************************
-Applies linear read-ahead if in the buf_pool the page is a border page of
-a linear read-ahead area and all the pages in the area have been accessed.
-Does not read any page if the read-ahead mechanism is not activated. Note
-that the the algorithm looks at the 'natural' adjacent successor and
-predecessor of the page, which on the leaf level of a B-tree are the next
-and previous page in the chain of leaves. To know these, the page specified
-in (space, offset) must already be present in the buf_pool. Thus, the
-natural way to use this function is to call it when a page in the buf_pool
-is accessed the first time, calling this function just after it has been
-bufferfixed.
-NOTE 1: as this function looks at the natural predecessor and successor
-fields on the page, what happens, if these are not initialized to any
-sensible value? No problem, before applying read-ahead we check that the
-area to read is within the span of the space, if not, read-ahead is not
-applied. An uninitialized value may result in a useless read operation, but
-only very improbably.
-NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this
-function must be written such that it cannot end up waiting for these
-latches!
-NOTE 3: the calling thread must want access to the page given: this rule is
-set to prevent unintended read-aheads performed by ibuf routines, a situation
-which could result in a deadlock if the OS does not support asynchronous io. */
-
-ulint
-buf_read_ahead_linear(
-/*==================*/
- /* out: number of page read requests issued */
- ulint space, /* in: space id */
- ulint offset) /* in: page number of a page; NOTE: the current thread
- must want access to this page (see NOTE 3 above) */
-{
- ib_longlong tablespace_version;
- buf_block_t* block;
- buf_frame_t* frame;
- buf_block_t* pred_block = NULL;
- ulint pred_offset;
- ulint succ_offset;
- ulint count;
- int asc_or_desc;
- ulint new_offset;
- ulint fail_count;
- ulint ibuf_mode;
- ulint low, high;
- ulint err;
- ulint i;
-
- if (srv_startup_is_before_trx_rollback_phase) {
- /* No read-ahead to avoid thread deadlocks */
- return(0);
- }
-
- if (ibuf_bitmap_page(offset) || trx_sys_hdr_page(space, offset)) {
-
- /* If it is an ibuf bitmap page or trx sys hdr, we do
- no read-ahead, as that could break the ibuf page access
- order */
-
- return(0);
- }
-
- low = (offset / BUF_READ_AHEAD_LINEAR_AREA)
- * BUF_READ_AHEAD_LINEAR_AREA;
- high = (offset / BUF_READ_AHEAD_LINEAR_AREA + 1)
- * BUF_READ_AHEAD_LINEAR_AREA;
-
- if ((offset != low) && (offset != high - 1)) {
- /* This is not a border page of the area: return */
-
- return(0);
- }
-
- /* Remember the tablespace version before we ask te tablespace size
- below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
- do not try to read outside the bounds of the tablespace! */
-
- tablespace_version = fil_space_get_version(space);
-
- mutex_enter(&(buf_pool->mutex));
-
- if (high > fil_space_get_size(space)) {
- mutex_exit(&(buf_pool->mutex));
- /* The area is not whole, return */
-
- return(0);
- }
-
- if (buf_pool->n_pend_reads
- > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
- mutex_exit(&(buf_pool->mutex));
-
- return(0);
- }
-
- /* Check that almost all pages in the area have been accessed; if
- offset == low, the accesses must be in a descending order, otherwise,
- in an ascending order. */
-
- asc_or_desc = 1;
-
- if (offset == low) {
- asc_or_desc = -1;
- }
-
- fail_count = 0;
-
- for (i = low; i < high; i++) {
- block = buf_page_hash_get(space, i);
-
- if ((block == NULL) || !block->accessed) {
- /* Not accessed */
- fail_count++;
-
- } else if (pred_block
- && (ut_ulint_cmp(block->LRU_position,
- pred_block->LRU_position)
- != asc_or_desc)) {
- /* Accesses not in the right order */
-
- fail_count++;
- pred_block = block;
- }
- }
-
- if (fail_count > BUF_READ_AHEAD_LINEAR_AREA
- - BUF_READ_AHEAD_LINEAR_THRESHOLD) {
- /* Too many failures: return */
-
- mutex_exit(&(buf_pool->mutex));
-
- return(0);
- }
-
- /* If we got this far, we know that enough pages in the area have
- been accessed in the right order: linear read-ahead can be sensible */
-
- block = buf_page_hash_get(space, offset);
-
- if (block == NULL) {
- mutex_exit(&(buf_pool->mutex));
-
- return(0);
- }
-
- frame = block->frame;
-
- /* Read the natural predecessor and successor page addresses from
- the page; NOTE that because the calling thread may have an x-latch
- on the page, we do not acquire an s-latch on the page, this is to
- prevent deadlocks. Even if we read values which are nonsense, the
- algorithm will work. */
-
- pred_offset = fil_page_get_prev(frame);
- succ_offset = fil_page_get_next(frame);
-
- mutex_exit(&(buf_pool->mutex));
-
- if ((offset == low) && (succ_offset == offset + 1)) {
-
- /* This is ok, we can continue */
- new_offset = pred_offset;
-
- } else if ((offset == high - 1) && (pred_offset == offset - 1)) {
-
- /* This is ok, we can continue */
- new_offset = succ_offset;
- } else {
- /* Successor or predecessor not in the right order */
-
- return(0);
- }
-
- low = (new_offset / BUF_READ_AHEAD_LINEAR_AREA)
- * BUF_READ_AHEAD_LINEAR_AREA;
- high = (new_offset / BUF_READ_AHEAD_LINEAR_AREA + 1)
- * BUF_READ_AHEAD_LINEAR_AREA;
-
- if ((new_offset != low) && (new_offset != high - 1)) {
- /* This is not a border page of the area: return */
-
- return(0);
- }
-
- if (high > fil_space_get_size(space)) {
- /* The area is not whole, return */
-
- return(0);
- }
-
- /* If we got this far, read-ahead can be sensible: do it */
-
- if (ibuf_inside()) {
- ibuf_mode = BUF_READ_IBUF_PAGES_ONLY;
- } else {
- ibuf_mode = BUF_READ_ANY_PAGE;
- }
-
- count = 0;
-
- /* Since Windows XP seems to schedule the i/o handler thread
- very eagerly, and consequently it does not wait for the
- full read batch to be posted, we use special heuristics here */
-
- os_aio_simulated_put_read_threads_to_sleep();
-
- for (i = low; i < high; i++) {
- /* It is only sensible to do read-ahead in the non-sync
- aio mode: hence FALSE as the first parameter */
-
- if (!ibuf_bitmap_page(i)) {
- count += buf_read_page_low(
- &err, FALSE,
- ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
- space, tablespace_version, i);
- if (err == DB_TABLESPACE_DELETED) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: in"
- " linear readahead trying to access\n"
- "InnoDB: tablespace %lu page %lu,\n"
- "InnoDB: but the tablespace does not"
- " exist or is just being dropped.\n",
- (ulong) space, (ulong) i);
- }
- }
- }
-
- /* In simulated aio we wake the aio handler threads only after
- queuing all aio requests, in native aio the following call does
- nothing: */
-
- os_aio_simulated_wake_handler_threads();
-
- /* Flush pages from the end of the LRU list if necessary */
- buf_flush_free_margin();
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints && (count > 0)) {
- fprintf(stderr,
- "LINEAR read-ahead space %lu offset %lu pages %lu\n",
- (ulong) space, (ulong) offset, (ulong) count);
- }
-#endif /* UNIV_DEBUG */
-
- ++srv_read_ahead_seq;
- return(count);
-}
-
-/************************************************************************
-Issues read requests for pages which the ibuf module wants to read in, in
-order to contract the insert buffer tree. Technically, this function is like
-a read-ahead function. */
-
-void
-buf_read_ibuf_merge_pages(
-/*======================*/
- ibool sync, /* in: TRUE if the caller wants this function
- to wait for the highest address page to get
- read in, before this function returns */
- ulint* space_ids, /* in: array of space ids */
- ib_longlong* space_versions,/* in: the spaces must have this version
- number (timestamp), otherwise we discard the
- read; we use this to cancel reads if
- DISCARD + IMPORT may have changed the
- tablespace size */
- ulint* page_nos, /* in: array of page numbers to read, with the
- highest page number the last in the array */
- ulint n_stored) /* in: number of page numbers in the array */
-{
- ulint err;
- ulint i;
-
- ut_ad(!ibuf_inside());
-#ifdef UNIV_IBUF_DEBUG
- ut_a(n_stored < UNIV_PAGE_SIZE);
-#endif
- while (buf_pool->n_pend_reads
- > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
- os_thread_sleep(500000);
- }
-
- for (i = 0; i < n_stored; i++) {
- buf_read_page_low(&err,
- (i + 1 == n_stored) && sync,
- BUF_READ_ANY_PAGE,
- space_ids[i], space_versions[i],
- page_nos[i]);
-
- if (err == DB_TABLESPACE_DELETED) {
- /* We have deleted or are deleting the single-table
- tablespace: remove the entries for that page */
-
- ibuf_merge_or_delete_for_page(NULL, space_ids[i],
- page_nos[i], FALSE);
- }
- }
-
- os_aio_simulated_wake_handler_threads();
-
- /* Flush pages from the end of the LRU list if necessary */
- buf_flush_free_margin();
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fprintf(stderr,
- "Ibuf merge read-ahead space %lu pages %lu\n",
- (ulong) space_ids[0], (ulong) n_stored);
- }
-#endif /* UNIV_DEBUG */
-}
-
-/************************************************************************
-Issues read requests for pages which recovery wants to read in. */
-
-void
-buf_read_recv_pages(
-/*================*/
- ibool sync, /* in: TRUE if the caller wants this function
- to wait for the highest address page to get
- read in, before this function returns */
- ulint space, /* in: space id */
- ulint* page_nos, /* in: array of page numbers to read, with the
- highest page number the last in the array */
- ulint n_stored) /* in: number of page numbers in the array */
-{
- ib_longlong tablespace_version;
- ulint count;
- ulint err;
- ulint i;
-
- tablespace_version = fil_space_get_version(space);
-
- for (i = 0; i < n_stored; i++) {
-
- count = 0;
-
- os_aio_print_debug = FALSE;
-
- while (buf_pool->n_pend_reads >= recv_n_pool_free_frames / 2) {
-
- os_aio_simulated_wake_handler_threads();
- os_thread_sleep(500000);
-
- count++;
-
- if (count > 100) {
- fprintf(stderr,
- "InnoDB: Error: InnoDB has waited for"
- " 50 seconds for pending\n"
- "InnoDB: reads to the buffer pool to"
- " be finished.\n"
- "InnoDB: Number of pending reads %lu,"
- " pending pread calls %lu\n",
- (ulong) buf_pool->n_pend_reads,
- (ulong)os_file_n_pending_preads);
-
- os_aio_print_debug = TRUE;
- }
- }
-
- os_aio_print_debug = FALSE;
-
- if ((i + 1 == n_stored) && sync) {
- buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE,
- space, tablespace_version,
- page_nos[i]);
- } else {
- buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE
- | OS_AIO_SIMULATED_WAKE_LATER,
- space, tablespace_version,
- page_nos[i]);
- }
- }
-
- os_aio_simulated_wake_handler_threads();
-
- /* Flush pages from the end of the LRU list if necessary */
- buf_flush_free_margin();
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fprintf(stderr,
- "Recovery applies read-ahead pages %lu\n",
- (ulong) n_stored);
- }
-#endif /* UNIV_DEBUG */
-}
diff --git a/storage/innobase/data/data0data.c b/storage/innobase/data/data0data.c
deleted file mode 100644
index 0f03de4ca9d..00000000000
--- a/storage/innobase/data/data0data.c
+++ /dev/null
@@ -1,681 +0,0 @@
-/************************************************************************
-SQL data field and tuple
-
-(c) 1994-1996 Innobase Oy
-
-Created 5/30/1994 Heikki Tuuri
-*************************************************************************/
-
-#include "data0data.h"
-
-#ifdef UNIV_NONINL
-#include "data0data.ic"
-#endif
-
-#include "rem0rec.h"
-#include "rem0cmp.h"
-#include "page0page.h"
-#include "dict0dict.h"
-#include "btr0cur.h"
-
-#include <ctype.h>
-
-#ifdef UNIV_DEBUG
-byte data_error; /* data pointers of tuple fields are initialized
- to point here for error checking */
-
-ulint data_dummy; /* this is used to fool the compiler in
- dtuple_validate */
-#endif /* UNIV_DEBUG */
-
-/* Some non-inlined functions used in the MySQL interface: */
-void
-dfield_set_data_noninline(
- dfield_t* field, /* in: field */
- void* data, /* in: data */
- ulint len) /* in: length or UNIV_SQL_NULL */
-{
- dfield_set_data(field, data, len);
-}
-void*
-dfield_get_data_noninline(
- dfield_t* field) /* in: field */
-{
- return(dfield_get_data(field));
-}
-ulint
-dfield_get_len_noninline(
- dfield_t* field) /* in: field */
-{
- return(dfield_get_len(field));
-}
-ulint
-dtuple_get_n_fields_noninline(
- dtuple_t* tuple) /* in: tuple */
-{
- return(dtuple_get_n_fields(tuple));
-}
-dfield_t*
-dtuple_get_nth_field_noninline(
- dtuple_t* tuple, /* in: tuple */
- ulint n) /* in: index of field */
-{
- return(dtuple_get_nth_field(tuple, n));
-}
-
-/*************************************************************************
-Tests if dfield data length and content is equal to the given. */
-
-ibool
-dfield_data_is_binary_equal(
-/*========================*/
- /* out: TRUE if equal */
- dfield_t* field, /* in: field */
- ulint len, /* in: data length or UNIV_SQL_NULL */
- byte* data) /* in: data */
-{
- if (len != field->len) {
-
- return(FALSE);
- }
-
- if (len == UNIV_SQL_NULL) {
-
- return(TRUE);
- }
-
- if (0 != ut_memcmp(field->data, data, len)) {
-
- return(FALSE);
- }
-
- return(TRUE);
-}
-
-/****************************************************************
-Returns TRUE if lengths of two dtuples are equal and respective data fields
-in them are equal when compared with collation in char fields (not as binary
-strings). */
-
-ibool
-dtuple_datas_are_ordering_equal(
-/*============================*/
- /* out: TRUE if length and fieds are equal
- when compared with cmp_data_data:
- NOTE: in character type fields some letters
- are identified with others! (collation) */
- dtuple_t* tuple1, /* in: tuple 1 */
- dtuple_t* tuple2) /* in: tuple 2 */
-{
- dfield_t* field1;
- dfield_t* field2;
- ulint n_fields;
- ulint i;
-
- ut_ad(tuple1 && tuple2);
- ut_ad(tuple1->magic_n == DATA_TUPLE_MAGIC_N);
- ut_ad(tuple2->magic_n == DATA_TUPLE_MAGIC_N);
- ut_ad(dtuple_check_typed(tuple1));
- ut_ad(dtuple_check_typed(tuple2));
-
- n_fields = dtuple_get_n_fields(tuple1);
-
- if (n_fields != dtuple_get_n_fields(tuple2)) {
-
- return(FALSE);
- }
-
- for (i = 0; i < n_fields; i++) {
-
- field1 = dtuple_get_nth_field(tuple1, i);
- field2 = dtuple_get_nth_field(tuple2, i);
-
- if (0 != cmp_dfield_dfield(field1, field2)) {
-
- return(FALSE);
- }
- }
-
- return(TRUE);
-}
-
-/*************************************************************************
-Creates a dtuple for use in MySQL. */
-
-dtuple_t*
-dtuple_create_for_mysql(
-/*====================*/
- /* out, own created dtuple */
- void** heap, /* out: created memory heap */
- ulint n_fields) /* in: number of fields */
-{
- *heap = (void*)mem_heap_create(500);
-
- return(dtuple_create(*((mem_heap_t**)heap), n_fields));
-}
-
-/*************************************************************************
-Frees a dtuple used in MySQL. */
-
-void
-dtuple_free_for_mysql(
-/*==================*/
- void* heap) /* in: memory heap where tuple was created */
-{
- mem_heap_free((mem_heap_t*)heap);
-}
-
-/*************************************************************************
-Sets number of fields used in a tuple. Normally this is set in
-dtuple_create, but if you want later to set it smaller, you can use this. */
-
-void
-dtuple_set_n_fields(
-/*================*/
- dtuple_t* tuple, /* in: tuple */
- ulint n_fields) /* in: number of fields */
-{
- ut_ad(tuple);
-
- tuple->n_fields = n_fields;
- tuple->n_fields_cmp = n_fields;
-}
-
-/**************************************************************
-Checks that a data field is typed. */
-static
-ibool
-dfield_check_typed_no_assert(
-/*=========================*/
- /* out: TRUE if ok */
- dfield_t* field) /* in: data field */
-{
- if (dfield_get_type(field)->mtype > DATA_MYSQL
- || dfield_get_type(field)->mtype < DATA_VARCHAR) {
-
- fprintf(stderr,
- "InnoDB: Error: data field type %lu, len %lu\n",
- (ulong) dfield_get_type(field)->mtype,
- (ulong) dfield_get_len(field));
- return(FALSE);
- }
-
- return(TRUE);
-}
-
-/**************************************************************
-Checks that a data tuple is typed. */
-
-ibool
-dtuple_check_typed_no_assert(
-/*=========================*/
- /* out: TRUE if ok */
- dtuple_t* tuple) /* in: tuple */
-{
- dfield_t* field;
- ulint i;
-
- if (dtuple_get_n_fields(tuple) > REC_MAX_N_FIELDS) {
- fprintf(stderr,
- "InnoDB: Error: index entry has %lu fields\n",
- (ulong) dtuple_get_n_fields(tuple));
-dump:
- fputs("InnoDB: Tuple contents: ", stderr);
- dtuple_print(stderr, tuple);
- putc('\n', stderr);
-
- return(FALSE);
- }
-
- for (i = 0; i < dtuple_get_n_fields(tuple); i++) {
-
- field = dtuple_get_nth_field(tuple, i);
-
- if (!dfield_check_typed_no_assert(field)) {
- goto dump;
- }
- }
-
- return(TRUE);
-}
-
-/**************************************************************
-Checks that a data field is typed. Asserts an error if not. */
-
-ibool
-dfield_check_typed(
-/*===============*/
- /* out: TRUE if ok */
- dfield_t* field) /* in: data field */
-{
- if (dfield_get_type(field)->mtype > DATA_MYSQL
- || dfield_get_type(field)->mtype < DATA_VARCHAR) {
-
- fprintf(stderr,
- "InnoDB: Error: data field type %lu, len %lu\n",
- (ulong) dfield_get_type(field)->mtype,
- (ulong) dfield_get_len(field));
-
- ut_error;
- }
-
- return(TRUE);
-}
-
-/**************************************************************
-Checks that a data tuple is typed. Asserts an error if not. */
-
-ibool
-dtuple_check_typed(
-/*===============*/
- /* out: TRUE if ok */
- dtuple_t* tuple) /* in: tuple */
-{
- dfield_t* field;
- ulint i;
-
- for (i = 0; i < dtuple_get_n_fields(tuple); i++) {
-
- field = dtuple_get_nth_field(tuple, i);
-
- ut_a(dfield_check_typed(field));
- }
-
- return(TRUE);
-}
-
-#ifdef UNIV_DEBUG
-/**************************************************************
-Validates the consistency of a tuple which must be complete, i.e,
-all fields must have been set. */
-
-ibool
-dtuple_validate(
-/*============*/
- /* out: TRUE if ok */
- dtuple_t* tuple) /* in: tuple */
-{
- dfield_t* field;
- byte* data;
- ulint n_fields;
- ulint len;
- ulint i;
- ulint j;
-
- ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
-
- n_fields = dtuple_get_n_fields(tuple);
-
- /* We dereference all the data of each field to test
- for memory traps */
-
- for (i = 0; i < n_fields; i++) {
-
- field = dtuple_get_nth_field(tuple, i);
- len = dfield_get_len(field);
-
- if (len != UNIV_SQL_NULL) {
-
- data = field->data;
-
- for (j = 0; j < len; j++) {
-
- data_dummy += *data; /* fool the compiler not
- to optimize out this
- code */
- data++;
- }
- }
- }
-
- ut_a(dtuple_check_typed(tuple));
-
- return(TRUE);
-}
-#endif /* UNIV_DEBUG */
-
-/*****************************************************************
-Pretty prints a dfield value according to its data type. */
-
-void
-dfield_print(
-/*=========*/
- dfield_t* dfield) /* in: dfield */
-{
- byte* data;
- ulint len;
- ulint mtype;
- ulint i;
-
- len = dfield_get_len(dfield);
- data = dfield_get_data(dfield);
-
- if (len == UNIV_SQL_NULL) {
- fputs("NULL", stderr);
-
- return;
- }
-
- mtype = dtype_get_mtype(dfield_get_type(dfield));
-
- if ((mtype == DATA_CHAR) || (mtype == DATA_VARCHAR)) {
-
- for (i = 0; i < len; i++) {
- int c = *data++;
- putc(isprint(c) ? c : ' ', stderr);
- }
- } else if (mtype == DATA_INT) {
- ut_a(len == 4); /* only works for 32-bit integers */
- fprintf(stderr, "%d", (int)mach_read_from_4(data));
- } else {
- ut_error;
- }
-}
-
-/*****************************************************************
-Pretty prints a dfield value according to its data type. Also the hex string
-is printed if a string contains non-printable characters. */
-
-void
-dfield_print_also_hex(
-/*==================*/
- dfield_t* dfield) /* in: dfield */
-{
- byte* data;
- ulint len;
- ulint mtype;
- ulint i;
- ibool print_also_hex;
-
- len = dfield_get_len(dfield);
- data = dfield_get_data(dfield);
-
- if (len == UNIV_SQL_NULL) {
- fputs("NULL", stderr);
-
- return;
- }
-
- mtype = dtype_get_mtype(dfield_get_type(dfield));
-
- if ((mtype == DATA_CHAR) || (mtype == DATA_VARCHAR)) {
-
- print_also_hex = FALSE;
-
- for (i = 0; i < len; i++) {
- int c = *data++;
- if (!isprint(c)) {
- print_also_hex = TRUE;
- c = ' ';
- }
- putc(c, stderr);
- }
-
- if (!print_also_hex) {
-
- return;
- }
-
- fputs(" Hex: ", stderr);
-
- data = dfield_get_data(dfield);
-
- for (i = 0; i < len; i++) {
- fprintf(stderr, "%02lx", (ulint)*data);
-
- data++;
- }
- } else if (mtype == DATA_INT) {
- ut_a(len == 4); /* only works for 32-bit integers */
- fprintf(stderr, "%d", (int)mach_read_from_4(data));
- } else {
- ut_error;
- }
-}
-
-/*****************************************************************
-Print a dfield value using ut_print_buf. */
-static
-void
-dfield_print_raw(
-/*=============*/
- FILE* f, /* in: output stream */
- dfield_t* dfield) /* in: dfield */
-{
- ulint len = dfield->len;
- if (len != UNIV_SQL_NULL) {
- ulint print_len = ut_min(len, 1000);
- ut_print_buf(f, dfield->data, print_len);
- if (len != print_len) {
- fprintf(f, "(total %lu bytes)", (ulong) len);
- }
- } else {
- fputs(" SQL NULL", f);
- }
-}
-
-/**************************************************************
-The following function prints the contents of a tuple. */
-
-void
-dtuple_print(
-/*=========*/
- FILE* f, /* in: output stream */
- dtuple_t* tuple) /* in: tuple */
-{
- ulint n_fields;
- ulint i;
-
- n_fields = dtuple_get_n_fields(tuple);
-
- fprintf(f, "DATA TUPLE: %lu fields;\n", (ulong) n_fields);
-
- for (i = 0; i < n_fields; i++) {
- fprintf(f, " %lu:", (ulong) i);
-
- dfield_print_raw(f, dtuple_get_nth_field(tuple, i));
-
- putc(';', f);
- }
-
- putc('\n', f);
- ut_ad(dtuple_validate(tuple));
-}
-
-/******************************************************************
-Moves parts of long fields in entry to the big record vector so that
-the size of tuple drops below the maximum record size allowed in the
-database. Moves data only from those fields which are not necessary
-to determine uniquely the insertion place of the tuple in the index. */
-
-big_rec_t*
-dtuple_convert_big_rec(
-/*===================*/
- /* out, own: created big record vector,
- NULL if we are not able to shorten
- the entry enough, i.e., if there are
- too many short fields in entry */
- dict_index_t* index, /* in: index */
- dtuple_t* entry, /* in: index entry */
- ulint* ext_vec,/* in: array of externally stored fields,
- or NULL: if a field already is externally
- stored, then we cannot move it to the vector
- this function returns */
- ulint n_ext_vec)/* in: number of elements is ext_vec */
-{
- mem_heap_t* heap;
- big_rec_t* vector;
- dfield_t* dfield;
- ulint size;
- ulint n_fields;
- ulint longest;
- ulint longest_i = ULINT_MAX;
- ibool is_externally_stored;
- ulint i;
- ulint j;
-
- ut_a(dtuple_check_typed_no_assert(entry));
-
- size = rec_get_converted_size(index, entry);
-
- if (UNIV_UNLIKELY(size > 1000000000)) {
- fprintf(stderr,
- "InnoDB: Warning: tuple size very big: %lu\n",
- (ulong) size);
- fputs("InnoDB: Tuple contents: ", stderr);
- dtuple_print(stderr, entry);
- putc('\n', stderr);
- }
-
- heap = mem_heap_create(size + dtuple_get_n_fields(entry)
- * sizeof(big_rec_field_t) + 1000);
-
- vector = mem_heap_alloc(heap, sizeof(big_rec_t));
-
- vector->heap = heap;
- vector->fields = mem_heap_alloc(heap, dtuple_get_n_fields(entry)
- * sizeof(big_rec_field_t));
-
- /* Decide which fields to shorten: the algorithm is to look for
- the longest field whose type is DATA_BLOB */
-
- n_fields = 0;
-
- while (rec_get_converted_size(index, entry)
- >= ut_min(page_get_free_space_of_empty(
- dict_table_is_comp(index->table)) / 2,
- REC_MAX_DATA_SIZE)) {
-
- longest = 0;
- for (i = dict_index_get_n_unique_in_tree(index);
- i < dtuple_get_n_fields(entry); i++) {
-
- /* Skip over fields which already are externally
- stored */
-
- is_externally_stored = FALSE;
-
- if (ext_vec) {
- for (j = 0; j < n_ext_vec; j++) {
- if (ext_vec[j] == i) {
- is_externally_stored = TRUE;
- }
- }
- }
-
- if (!is_externally_stored) {
-
- dfield = dtuple_get_nth_field(entry, i);
-
- if (dfield->len != UNIV_SQL_NULL
- && dfield->len > longest) {
-
- longest = dfield->len;
-
- longest_i = i;
- }
- }
- }
-
- /* We do not store externally fields which are smaller than
- DICT_MAX_INDEX_COL_LEN */
-
-#if DICT_MAX_INDEX_COL_LEN <= REC_1BYTE_OFFS_LIMIT
-# error "DICT_MAX_INDEX_COL_LEN <= REC_1BYTE_OFFS_LIMIT"
-#endif
-
- if (longest < BTR_EXTERN_FIELD_REF_SIZE + 10
- + DICT_MAX_INDEX_COL_LEN) {
- /* Cannot shorten more */
-
- mem_heap_free(heap);
-
- return(NULL);
- }
-
- /* Move data from field longest_i to big rec vector;
- we do not let data size of the remaining entry
- drop below 128 which is the limit for the 2-byte
- offset storage format in a physical record. This
- we accomplish by storing 128 bytes of data in entry
- itself, and only the remaining part to big rec vec.
-
- We store the first bytes locally to the record. Then
- we can calculate all ordering fields in all indexes
- from locally stored data. */
-
- dfield = dtuple_get_nth_field(entry, longest_i);
- vector->fields[n_fields].field_no = longest_i;
-
- ut_a(dfield->len > DICT_MAX_INDEX_COL_LEN);
-
- vector->fields[n_fields].len = dfield->len
- - DICT_MAX_INDEX_COL_LEN;
-
- vector->fields[n_fields].data = mem_heap_alloc(
- heap, vector->fields[n_fields].len);
-
- /* Copy data (from the end of field) to big rec vector */
-
- ut_memcpy(vector->fields[n_fields].data,
- ((byte*)dfield->data) + dfield->len
- - vector->fields[n_fields].len,
- vector->fields[n_fields].len);
- dfield->len = dfield->len - vector->fields[n_fields].len
- + BTR_EXTERN_FIELD_REF_SIZE;
-
- /* Set the extern field reference in dfield to zero */
- memset(((byte*)dfield->data)
- + dfield->len - BTR_EXTERN_FIELD_REF_SIZE,
- 0, BTR_EXTERN_FIELD_REF_SIZE);
- n_fields++;
- }
-
- vector->n_fields = n_fields;
- return(vector);
-}
-
-/******************************************************************
-Puts back to entry the data stored in vector. Note that to ensure the
-fields in entry can accommodate the data, vector must have been created
-from entry with dtuple_convert_big_rec. */
-
-void
-dtuple_convert_back_big_rec(
-/*========================*/
- dict_index_t* index __attribute__((unused)), /* in: index */
- dtuple_t* entry, /* in: entry whose data was put to vector */
- big_rec_t* vector) /* in, own: big rec vector; it is
- freed in this function */
-{
- dfield_t* dfield;
- ulint i;
-
- for (i = 0; i < vector->n_fields; i++) {
-
- dfield = dtuple_get_nth_field(entry,
- vector->fields[i].field_no);
- /* Copy data from big rec vector */
-
- ut_memcpy(((byte*)dfield->data)
- + dfield->len - BTR_EXTERN_FIELD_REF_SIZE,
- vector->fields[i].data,
- vector->fields[i].len);
- dfield->len = dfield->len + vector->fields[i].len
- - BTR_EXTERN_FIELD_REF_SIZE;
- }
-
- mem_heap_free(vector->heap);
-}
-
-/******************************************************************
-Frees the memory in a big rec vector. */
-
-void
-dtuple_big_rec_free(
-/*================*/
- big_rec_t* vector) /* in, own: big rec vector; it is
- freed in this function */
-{
- mem_heap_free(vector->heap);
-}
diff --git a/storage/innobase/data/data0type.c b/storage/innobase/data/data0type.c
deleted file mode 100644
index 305000d7c0a..00000000000
--- a/storage/innobase/data/data0type.c
+++ /dev/null
@@ -1,295 +0,0 @@
-/******************************************************
-Data types
-
-(c) 1996 Innobase Oy
-
-Created 1/16/1996 Heikki Tuuri
-*******************************************************/
-
-#include "data0type.h"
-
-#ifdef UNIV_NONINL
-#include "data0type.ic"
-#endif
-
-/**********************************************************************
-This function is used to find the storage length in bytes of the first n
-characters for prefix indexes using a multibyte character set. The function
-finds charset information and returns length of prefix_len characters in the
-index field in bytes.
-
-NOTE: the prototype of this function is copied from ha_innodb.cc! If you change
-this function, you MUST change also the prototype here! */
-
-ulint
-innobase_get_at_most_n_mbchars(
-/*===========================*/
- /* out: number of bytes occupied by the first
- n characters */
- ulint charset_id, /* in: character set id */
- ulint prefix_len, /* in: prefix length in bytes of the index
- (this has to be divided by mbmaxlen to get the
- number of CHARACTERS n in the prefix) */
- ulint data_len, /* in: length of the string in bytes */
- const char* str); /* in: character string */
-
-/* At the database startup we store the default-charset collation number of
-this MySQL installation to this global variable. If we have < 4.1.2 format
-column definitions, or records in the insert buffer, we use this
-charset-collation code for them. */
-
-ulint data_mysql_default_charset_coll = 99999999;
-
-/*************************************************************************
-Determine how many bytes the first n characters of the given string occupy.
-If the string is shorter than n characters, returns the number of bytes
-the characters in the string occupy. */
-
-ulint
-dtype_get_at_most_n_mbchars(
-/*========================*/
- /* out: length of the prefix,
- in bytes */
- ulint prtype, /* in: precise type */
- ulint mbminlen, /* in: minimum length of a
- multi-byte character */
- ulint mbmaxlen, /* in: maximum length of a
- multi-byte character */
- ulint prefix_len, /* in: length of the requested
- prefix, in characters, multiplied by
- dtype_get_mbmaxlen(dtype) */
- ulint data_len, /* in: length of str (in bytes) */
- const char* str) /* in: the string whose prefix
- length is being determined */
-{
-#ifndef UNIV_HOTBACKUP
- ut_a(data_len != UNIV_SQL_NULL);
- ut_ad(!mbmaxlen || !(prefix_len % mbmaxlen));
-
- if (mbminlen != mbmaxlen) {
- ut_a(!(prefix_len % mbmaxlen));
- return(innobase_get_at_most_n_mbchars(
- dtype_get_charset_coll(prtype),
- prefix_len, data_len, str));
- }
-
- if (prefix_len < data_len) {
-
- return(prefix_len);
-
- }
-
- return(data_len);
-#else /* UNIV_HOTBACKUP */
- /* This function depends on MySQL code that is not included in
- InnoDB Hot Backup builds. Besides, this function should never
- be called in InnoDB Hot Backup. */
- ut_error;
-#endif /* UNIV_HOTBACKUP */
-}
-
-/*************************************************************************
-Checks if a data main type is a string type. Also a BLOB is considered a
-string type. */
-
-ibool
-dtype_is_string_type(
-/*=================*/
- /* out: TRUE if string type */
- ulint mtype) /* in: InnoDB main data type code: DATA_CHAR, ... */
-{
- if (mtype <= DATA_BLOB
- || mtype == DATA_MYSQL
- || mtype == DATA_VARMYSQL) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*************************************************************************
-Checks if a type is a binary string type. Note that for tables created with
-< 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. For
-those DATA_BLOB columns this function currently returns FALSE. */
-
-ibool
-dtype_is_binary_string_type(
-/*========================*/
- /* out: TRUE if binary string type */
- ulint mtype, /* in: main data type */
- ulint prtype) /* in: precise type */
-{
- if ((mtype == DATA_FIXBINARY)
- || (mtype == DATA_BINARY)
- || (mtype == DATA_BLOB && (prtype & DATA_BINARY_TYPE))) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*************************************************************************
-Checks if a type is a non-binary string type. That is, dtype_is_string_type is
-TRUE and dtype_is_binary_string_type is FALSE. Note that for tables created
-with < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column.
-For those DATA_BLOB columns this function currently returns TRUE. */
-
-ibool
-dtype_is_non_binary_string_type(
-/*============================*/
- /* out: TRUE if non-binary string type */
- ulint mtype, /* in: main data type */
- ulint prtype) /* in: precise type */
-{
- if (dtype_is_string_type(mtype) == TRUE
- && dtype_is_binary_string_type(mtype, prtype) == FALSE) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*************************************************************************
-Gets the MySQL charset-collation code for MySQL string types. */
-
-ulint
-dtype_get_charset_coll_noninline(
-/*=============================*/
- ulint prtype) /* in: precise data type */
-{
- return(dtype_get_charset_coll(prtype));
-}
-
-/*************************************************************************
-Forms a precise type from the < 4.1.2 format precise type plus the
-charset-collation code. */
-
-ulint
-dtype_form_prtype(
-/*==============*/
- ulint old_prtype, /* in: the MySQL type code and the flags
- DATA_BINARY_TYPE etc. */
- ulint charset_coll) /* in: MySQL charset-collation code */
-{
- ut_a(old_prtype < 256 * 256);
- ut_a(charset_coll < 256);
-
- return(old_prtype + (charset_coll << 16));
-}
-
-/*************************************************************************
-Validates a data type structure. */
-
-ibool
-dtype_validate(
-/*===========*/
- /* out: TRUE if ok */
- dtype_t* type) /* in: type struct to validate */
-{
- ut_a(type);
- ut_a(type->mtype >= DATA_VARCHAR);
- ut_a(type->mtype <= DATA_MYSQL);
-
- if (type->mtype == DATA_SYS) {
- ut_a((type->prtype & DATA_MYSQL_TYPE_MASK) < DATA_N_SYS_COLS);
- }
-
- ut_a(type->mbminlen <= type->mbmaxlen);
-
- return(TRUE);
-}
-
-/*************************************************************************
-Prints a data type structure. */
-
-void
-dtype_print(
-/*========*/
- dtype_t* type) /* in: type */
-{
- ulint mtype;
- ulint prtype;
- ulint len;
-
- ut_a(type);
-
- mtype = type->mtype;
- prtype = type->prtype;
-
- switch (mtype) {
- case DATA_VARCHAR:
- fputs("DATA_VARCHAR", stderr);
- break;
-
- case DATA_CHAR:
- fputs("DATA_CHAR", stderr);
- break;
-
- case DATA_BINARY:
- fputs("DATA_BINARY", stderr);
- break;
-
- case DATA_FIXBINARY:
- fputs("DATA_FIXBINARY", stderr);
- break;
-
- case DATA_BLOB:
- fputs("DATA_BLOB", stderr);
- break;
-
- case DATA_INT:
- fputs("DATA_INT", stderr);
- break;
-
- case DATA_MYSQL:
- fputs("DATA_MYSQL", stderr);
- break;
-
- case DATA_SYS:
- fputs("DATA_SYS", stderr);
- break;
-
- default:
- fprintf(stderr, "type %lu", (ulong) mtype);
- break;
- }
-
- len = type->len;
-
- if ((type->mtype == DATA_SYS)
- || (type->mtype == DATA_VARCHAR)
- || (type->mtype == DATA_CHAR)) {
- putc(' ', stderr);
- if (prtype == DATA_ROW_ID) {
- fputs("DATA_ROW_ID", stderr);
- len = DATA_ROW_ID_LEN;
- } else if (prtype == DATA_ROLL_PTR) {
- fputs("DATA_ROLL_PTR", stderr);
- len = DATA_ROLL_PTR_LEN;
- } else if (prtype == DATA_TRX_ID) {
- fputs("DATA_TRX_ID", stderr);
- len = DATA_TRX_ID_LEN;
- } else if (prtype == DATA_ENGLISH) {
- fputs("DATA_ENGLISH", stderr);
- } else {
- fprintf(stderr, "prtype %lu", (ulong) prtype);
- }
- } else {
- if (prtype & DATA_UNSIGNED) {
- fputs(" DATA_UNSIGNED", stderr);
- }
-
- if (prtype & DATA_BINARY_TYPE) {
- fputs(" DATA_BINARY_TYPE", stderr);
- }
-
- if (prtype & DATA_NOT_NULL) {
- fputs(" DATA_NOT_NULL", stderr);
- }
- }
-
- fprintf(stderr, " len %lu", (ulong) len);
-}
diff --git a/storage/innobase/dict/dict0boot.c b/storage/innobase/dict/dict0boot.c
deleted file mode 100644
index 5f9aaf71e18..00000000000
--- a/storage/innobase/dict/dict0boot.c
+++ /dev/null
@@ -1,425 +0,0 @@
-/******************************************************
-Data dictionary creation and booting
-
-(c) 1996 Innobase Oy
-
-Created 4/18/1996 Heikki Tuuri
-*******************************************************/
-
-#include "dict0boot.h"
-
-#ifdef UNIV_NONINL
-#include "dict0boot.ic"
-#endif
-
-#include "dict0crea.h"
-#include "btr0btr.h"
-#include "dict0load.h"
-#include "dict0load.h"
-#include "trx0trx.h"
-#include "srv0srv.h"
-#include "ibuf0ibuf.h"
-#include "buf0flu.h"
-#include "log0recv.h"
-#include "os0file.h"
-
-/**************************************************************************
-Gets a pointer to the dictionary header and x-latches its page. */
-
-dict_hdr_t*
-dict_hdr_get(
-/*=========*/
- /* out: pointer to the dictionary header,
- page x-latched */
- mtr_t* mtr) /* in: mtr */
-{
- dict_hdr_t* header;
-
- ut_ad(mtr);
-
- header = DICT_HDR + buf_page_get(DICT_HDR_SPACE, DICT_HDR_PAGE_NO,
- RW_X_LATCH, mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(header, SYNC_DICT_HEADER);
-#endif /* UNIV_SYNC_DEBUG */
- return(header);
-}
-
-/**************************************************************************
-Returns a new table, index, or tree id. */
-
-dulint
-dict_hdr_get_new_id(
-/*================*/
- /* out: the new id */
- ulint type) /* in: DICT_HDR_ROW_ID, ... */
-{
- dict_hdr_t* dict_hdr;
- dulint id;
- mtr_t mtr;
-
- ut_ad((type == DICT_HDR_TABLE_ID) || (type == DICT_HDR_INDEX_ID));
-
- mtr_start(&mtr);
-
- dict_hdr = dict_hdr_get(&mtr);
-
- id = mtr_read_dulint(dict_hdr + type, &mtr);
- id = ut_dulint_add(id, 1);
-
- mlog_write_dulint(dict_hdr + type, id, &mtr);
-
- mtr_commit(&mtr);
-
- return(id);
-}
-
-/**************************************************************************
-Writes the current value of the row id counter to the dictionary header file
-page. */
-
-void
-dict_hdr_flush_row_id(void)
-/*=======================*/
-{
- dict_hdr_t* dict_hdr;
- dulint id;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- id = dict_sys->row_id;
-
- mtr_start(&mtr);
-
- dict_hdr = dict_hdr_get(&mtr);
-
- mlog_write_dulint(dict_hdr + DICT_HDR_ROW_ID, id, &mtr);
-
- mtr_commit(&mtr);
-}
-
-/*********************************************************************
-Creates the file page for the dictionary header. This function is
-called only at the database creation. */
-static
-ibool
-dict_hdr_create(
-/*============*/
- /* out: TRUE if succeed */
- mtr_t* mtr) /* in: mtr */
-{
- dict_hdr_t* dict_header;
- ulint hdr_page_no;
- ulint root_page_no;
- page_t* page;
-
- ut_ad(mtr);
-
- /* Create the dictionary header file block in a new, allocated file
- segment in the system tablespace */
- page = fseg_create(DICT_HDR_SPACE, 0,
- DICT_HDR + DICT_HDR_FSEG_HEADER, mtr);
-
- hdr_page_no = buf_frame_get_page_no(page);
-
- ut_a(DICT_HDR_PAGE_NO == hdr_page_no);
-
- dict_header = dict_hdr_get(mtr);
-
- /* Start counting row, table, index, and tree ids from
- DICT_HDR_FIRST_ID */
- mlog_write_dulint(dict_header + DICT_HDR_ROW_ID,
- ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr);
-
- mlog_write_dulint(dict_header + DICT_HDR_TABLE_ID,
- ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr);
-
- mlog_write_dulint(dict_header + DICT_HDR_INDEX_ID,
- ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr);
-
- /* Obsolete, but we must initialize it to 0 anyway. */
- mlog_write_dulint(dict_header + DICT_HDR_MIX_ID,
- ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr);
-
- /* Create the B-tree roots for the clustered indexes of the basic
- system tables */
-
- /*--------------------------*/
- root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
- DICT_HDR_SPACE, DICT_TABLES_ID, FALSE, mtr);
- if (root_page_no == FIL_NULL) {
-
- return(FALSE);
- }
-
- mlog_write_ulint(dict_header + DICT_HDR_TABLES, root_page_no,
- MLOG_4BYTES, mtr);
- /*--------------------------*/
- root_page_no = btr_create(DICT_UNIQUE, DICT_HDR_SPACE,
- DICT_TABLE_IDS_ID, FALSE, mtr);
- if (root_page_no == FIL_NULL) {
-
- return(FALSE);
- }
-
- mlog_write_ulint(dict_header + DICT_HDR_TABLE_IDS, root_page_no,
- MLOG_4BYTES, mtr);
- /*--------------------------*/
- root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
- DICT_HDR_SPACE, DICT_COLUMNS_ID, FALSE, mtr);
- if (root_page_no == FIL_NULL) {
-
- return(FALSE);
- }
-
- mlog_write_ulint(dict_header + DICT_HDR_COLUMNS, root_page_no,
- MLOG_4BYTES, mtr);
- /*--------------------------*/
- root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
- DICT_HDR_SPACE, DICT_INDEXES_ID, FALSE, mtr);
- if (root_page_no == FIL_NULL) {
-
- return(FALSE);
- }
-
- mlog_write_ulint(dict_header + DICT_HDR_INDEXES, root_page_no,
- MLOG_4BYTES, mtr);
- /*--------------------------*/
- root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
- DICT_HDR_SPACE, DICT_FIELDS_ID, FALSE, mtr);
- if (root_page_no == FIL_NULL) {
-
- return(FALSE);
- }
-
- mlog_write_ulint(dict_header + DICT_HDR_FIELDS, root_page_no,
- MLOG_4BYTES, mtr);
- /*--------------------------*/
-
- return(TRUE);
-}
-
-/*********************************************************************
-Initializes the data dictionary memory structures when the database is
-started. This function is also called when the data dictionary is created. */
-
-void
-dict_boot(void)
-/*===========*/
-{
- dict_table_t* table;
- dict_index_t* index;
- dict_hdr_t* dict_hdr;
- mem_heap_t* heap;
- mtr_t mtr;
-
- mtr_start(&mtr);
-
- /* Create the hash tables etc. */
- dict_init();
-
- heap = mem_heap_create(450);
-
- mutex_enter(&(dict_sys->mutex));
-
- /* Get the dictionary header */
- dict_hdr = dict_hdr_get(&mtr);
-
- /* Because we only write new row ids to disk-based data structure
- (dictionary header) when it is divisible by
- DICT_HDR_ROW_ID_WRITE_MARGIN, in recovery we will not recover
- the latest value of the row id counter. Therefore we advance
- the counter at the database startup to avoid overlapping values.
- Note that when a user after database startup first time asks for
- a new row id, then because the counter is now divisible by
- ..._MARGIN, it will immediately be updated to the disk-based
- header. */
-
- dict_sys->row_id = ut_dulint_add(
- ut_dulint_align_up(mtr_read_dulint(dict_hdr + DICT_HDR_ROW_ID,
- &mtr),
- DICT_HDR_ROW_ID_WRITE_MARGIN),
- DICT_HDR_ROW_ID_WRITE_MARGIN);
-
- /* Insert into the dictionary cache the descriptions of the basic
- system tables */
- /*-------------------------*/
- table = dict_mem_table_create("SYS_TABLES", DICT_HDR_SPACE, 8, 0);
-
- dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0);
- dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 0);
- dict_mem_table_add_col(table, heap, "N_COLS", DATA_INT, 0, 4);
- dict_mem_table_add_col(table, heap, "TYPE", DATA_INT, 0, 4);
- dict_mem_table_add_col(table, heap, "MIX_ID", DATA_BINARY, 0, 0);
- dict_mem_table_add_col(table, heap, "MIX_LEN", DATA_INT, 0, 4);
- dict_mem_table_add_col(table, heap, "CLUSTER_NAME", DATA_BINARY, 0, 0);
- dict_mem_table_add_col(table, heap, "SPACE", DATA_INT, 0, 4);
-
- table->id = DICT_TABLES_ID;
-
- dict_table_add_to_cache(table, heap);
- dict_sys->sys_tables = table;
- mem_heap_empty(heap);
-
- index = dict_mem_index_create("SYS_TABLES", "CLUST_IND",
- DICT_HDR_SPACE,
- DICT_UNIQUE | DICT_CLUSTERED, 1);
-
- dict_mem_index_add_field(index, "NAME", 0);
-
- index->id = DICT_TABLES_ID;
-
- dict_index_add_to_cache(table, index,
- mtr_read_ulint(dict_hdr + DICT_HDR_TABLES,
- MLOG_4BYTES, &mtr));
-
- /*-------------------------*/
- index = dict_mem_index_create("SYS_TABLES", "ID_IND",
- DICT_HDR_SPACE, DICT_UNIQUE, 1);
- dict_mem_index_add_field(index, "ID", 0);
-
- index->id = DICT_TABLE_IDS_ID;
- dict_index_add_to_cache(table, index,
- mtr_read_ulint(dict_hdr + DICT_HDR_TABLE_IDS,
- MLOG_4BYTES, &mtr));
-
- /*-------------------------*/
- table = dict_mem_table_create("SYS_COLUMNS", DICT_HDR_SPACE, 7, 0);
-
- dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 0);
- dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4);
- dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0);
- dict_mem_table_add_col(table, heap, "MTYPE", DATA_INT, 0, 4);
- dict_mem_table_add_col(table, heap, "PRTYPE", DATA_INT, 0, 4);
- dict_mem_table_add_col(table, heap, "LEN", DATA_INT, 0, 4);
- dict_mem_table_add_col(table, heap, "PREC", DATA_INT, 0, 4);
-
- table->id = DICT_COLUMNS_ID;
-
- dict_table_add_to_cache(table, heap);
- dict_sys->sys_columns = table;
- mem_heap_empty(heap);
-
- index = dict_mem_index_create("SYS_COLUMNS", "CLUST_IND",
- DICT_HDR_SPACE,
- DICT_UNIQUE | DICT_CLUSTERED, 2);
-
- dict_mem_index_add_field(index, "TABLE_ID", 0);
- dict_mem_index_add_field(index, "POS", 0);
-
- index->id = DICT_COLUMNS_ID;
- dict_index_add_to_cache(table, index,
- mtr_read_ulint(dict_hdr + DICT_HDR_COLUMNS,
- MLOG_4BYTES, &mtr));
-
- /*-------------------------*/
- table = dict_mem_table_create("SYS_INDEXES", DICT_HDR_SPACE, 7, 0);
-
- dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 0);
- dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 0);
- dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0);
- dict_mem_table_add_col(table, heap, "N_FIELDS", DATA_INT, 0, 4);
- dict_mem_table_add_col(table, heap, "TYPE", DATA_INT, 0, 4);
- dict_mem_table_add_col(table, heap, "SPACE", DATA_INT, 0, 4);
- dict_mem_table_add_col(table, heap, "PAGE_NO", DATA_INT, 0, 4);
-
- /* The '+ 2' below comes from the 2 system fields */
-#if DICT_SYS_INDEXES_PAGE_NO_FIELD != 6 + 2
-#error "DICT_SYS_INDEXES_PAGE_NO_FIELD != 6 + 2"
-#endif
-#if DICT_SYS_INDEXES_SPACE_NO_FIELD != 5 + 2
-#error "DICT_SYS_INDEXES_SPACE_NO_FIELD != 5 + 2"
-#endif
-#if DICT_SYS_INDEXES_TYPE_FIELD != 4 + 2
-#error "DICT_SYS_INDEXES_TYPE_FIELD != 4 + 2"
-#endif
-
- table->id = DICT_INDEXES_ID;
- dict_table_add_to_cache(table, heap);
- dict_sys->sys_indexes = table;
- mem_heap_empty(heap);
-
- index = dict_mem_index_create("SYS_INDEXES", "CLUST_IND",
- DICT_HDR_SPACE,
- DICT_UNIQUE | DICT_CLUSTERED, 2);
-
- dict_mem_index_add_field(index, "TABLE_ID", 0);
- dict_mem_index_add_field(index, "ID", 0);
-
- index->id = DICT_INDEXES_ID;
- dict_index_add_to_cache(table, index,
- mtr_read_ulint(dict_hdr + DICT_HDR_INDEXES,
- MLOG_4BYTES, &mtr));
-
- /*-------------------------*/
- table = dict_mem_table_create("SYS_FIELDS", DICT_HDR_SPACE, 3, 0);
-
- dict_mem_table_add_col(table, heap, "INDEX_ID", DATA_BINARY, 0, 0);
- dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4);
- dict_mem_table_add_col(table, heap, "COL_NAME", DATA_BINARY, 0, 0);
-
- table->id = DICT_FIELDS_ID;
- dict_table_add_to_cache(table, heap);
- dict_sys->sys_fields = table;
- mem_heap_free(heap);
-
- index = dict_mem_index_create("SYS_FIELDS", "CLUST_IND",
- DICT_HDR_SPACE,
- DICT_UNIQUE | DICT_CLUSTERED, 2);
-
- dict_mem_index_add_field(index, "INDEX_ID", 0);
- dict_mem_index_add_field(index, "POS", 0);
-
- index->id = DICT_FIELDS_ID;
- dict_index_add_to_cache(table, index,
- mtr_read_ulint(dict_hdr + DICT_HDR_FIELDS,
- MLOG_4BYTES, &mtr));
-
- mtr_commit(&mtr);
- /*-------------------------*/
-
- /* Initialize the insert buffer table and index for each tablespace */
-
- ibuf_init_at_db_start();
-
- /* Load definitions of other indexes on system tables */
-
- dict_load_sys_table(dict_sys->sys_tables);
- dict_load_sys_table(dict_sys->sys_columns);
- dict_load_sys_table(dict_sys->sys_indexes);
- dict_load_sys_table(dict_sys->sys_fields);
-
- mutex_exit(&(dict_sys->mutex));
-}
-
-/*********************************************************************
-Inserts the basic system table data into themselves in the database
-creation. */
-static
-void
-dict_insert_initial_data(void)
-/*==========================*/
-{
- /* Does nothing yet */
-}
-
-/*********************************************************************
-Creates and initializes the data dictionary at the database creation. */
-
-void
-dict_create(void)
-/*=============*/
-{
- mtr_t mtr;
-
- mtr_start(&mtr);
-
- dict_hdr_create(&mtr);
-
- mtr_commit(&mtr);
-
- dict_boot();
-
- dict_insert_initial_data();
-}
diff --git a/storage/innobase/dict/dict0crea.c b/storage/innobase/dict/dict0crea.c
deleted file mode 100644
index 4116230347d..00000000000
--- a/storage/innobase/dict/dict0crea.c
+++ /dev/null
@@ -1,1450 +0,0 @@
-/******************************************************
-Database object creation
-
-(c) 1996 Innobase Oy
-
-Created 1/8/1996 Heikki Tuuri
-*******************************************************/
-
-#include "dict0crea.h"
-
-#ifdef UNIV_NONINL
-#include "dict0crea.ic"
-#endif
-
-#include "btr0pcur.h"
-#include "btr0btr.h"
-#include "page0page.h"
-#include "mach0data.h"
-#include "dict0boot.h"
-#include "dict0dict.h"
-#include "que0que.h"
-#include "row0ins.h"
-#include "row0mysql.h"
-#include "pars0pars.h"
-#include "trx0roll.h"
-#include "usr0sess.h"
-#include "ut0vec.h"
-
-/*********************************************************************
-Based on a table object, this function builds the entry to be inserted
-in the SYS_TABLES system table. */
-static
-dtuple_t*
-dict_create_sys_tables_tuple(
-/*=========================*/
- /* out: the tuple which should be inserted */
- dict_table_t* table, /* in: table */
- mem_heap_t* heap) /* in: memory heap from which the memory for
- the built tuple is allocated */
-{
- dict_table_t* sys_tables;
- dtuple_t* entry;
- dfield_t* dfield;
- byte* ptr;
-
- ut_ad(table && heap);
-
- sys_tables = dict_sys->sys_tables;
-
- entry = dtuple_create(heap, 8 + DATA_N_SYS_COLS);
-
- /* 0: NAME -----------------------------*/
- dfield = dtuple_get_nth_field(entry, 0);
-
- dfield_set_data(dfield, table->name, ut_strlen(table->name));
- /* 3: ID -------------------------------*/
- dfield = dtuple_get_nth_field(entry, 1);
-
- ptr = mem_heap_alloc(heap, 8);
- mach_write_to_8(ptr, table->id);
-
- dfield_set_data(dfield, ptr, 8);
- /* 4: N_COLS ---------------------------*/
- dfield = dtuple_get_nth_field(entry, 2);
-
-#if DICT_TF_COMPACT != 1
-#error
-#endif
-
- ptr = mem_heap_alloc(heap, 4);
- mach_write_to_4(ptr, table->n_def
- | ((table->flags & DICT_TF_COMPACT) << 31));
- dfield_set_data(dfield, ptr, 4);
- /* 5: TYPE -----------------------------*/
- dfield = dtuple_get_nth_field(entry, 3);
-
- ptr = mem_heap_alloc(heap, 4);
- mach_write_to_4(ptr, DICT_TABLE_ORDINARY);
-
- dfield_set_data(dfield, ptr, 4);
- /* 6: MIX_ID (obsolete) ---------------------------*/
- dfield = dtuple_get_nth_field(entry, 4);
-
- ptr = mem_heap_alloc(heap, 8);
- memset(ptr, 0, 8);
-
- dfield_set_data(dfield, ptr, 8);
- /* 7: MIX_LEN (obsolete) --------------------------*/
-
- dfield = dtuple_get_nth_field(entry, 5);
-
- ptr = mem_heap_alloc(heap, 4);
- memset(ptr, 0, 4);
-
- dfield_set_data(dfield, ptr, 4);
- /* 8: CLUSTER_NAME ---------------------*/
- dfield = dtuple_get_nth_field(entry, 6);
- dfield_set_data(dfield, NULL, UNIV_SQL_NULL); /* not supported */
-
- /* 9: SPACE ----------------------------*/
- dfield = dtuple_get_nth_field(entry, 7);
-
- ptr = mem_heap_alloc(heap, 4);
- mach_write_to_4(ptr, table->space);
-
- dfield_set_data(dfield, ptr, 4);
- /*----------------------------------*/
-
- dict_table_copy_types(entry, sys_tables);
-
- return(entry);
-}
-
-/*********************************************************************
-Based on a table object, this function builds the entry to be inserted
-in the SYS_COLUMNS system table. */
-static
-dtuple_t*
-dict_create_sys_columns_tuple(
-/*==========================*/
- /* out: the tuple which should be inserted */
- dict_table_t* table, /* in: table */
- ulint i, /* in: column number */
- mem_heap_t* heap) /* in: memory heap from which the memory for
- the built tuple is allocated */
-{
- dict_table_t* sys_columns;
- dtuple_t* entry;
- const dict_col_t* column;
- dfield_t* dfield;
- byte* ptr;
- const char* col_name;
-
- ut_ad(table && heap);
-
- column = dict_table_get_nth_col(table, i);
-
- sys_columns = dict_sys->sys_columns;
-
- entry = dtuple_create(heap, 7 + DATA_N_SYS_COLS);
-
- /* 0: TABLE_ID -----------------------*/
- dfield = dtuple_get_nth_field(entry, 0);
-
- ptr = mem_heap_alloc(heap, 8);
- mach_write_to_8(ptr, table->id);
-
- dfield_set_data(dfield, ptr, 8);
- /* 1: POS ----------------------------*/
- dfield = dtuple_get_nth_field(entry, 1);
-
- ptr = mem_heap_alloc(heap, 4);
- mach_write_to_4(ptr, i);
-
- dfield_set_data(dfield, ptr, 4);
- /* 4: NAME ---------------------------*/
- dfield = dtuple_get_nth_field(entry, 2);
-
- col_name = dict_table_get_col_name(table, i);
- dfield_set_data(dfield, col_name, ut_strlen(col_name));
- /* 5: MTYPE --------------------------*/
- dfield = dtuple_get_nth_field(entry, 3);
-
- ptr = mem_heap_alloc(heap, 4);
- mach_write_to_4(ptr, column->mtype);
-
- dfield_set_data(dfield, ptr, 4);
- /* 6: PRTYPE -------------------------*/
- dfield = dtuple_get_nth_field(entry, 4);
-
- ptr = mem_heap_alloc(heap, 4);
- mach_write_to_4(ptr, column->prtype);
-
- dfield_set_data(dfield, ptr, 4);
- /* 7: LEN ----------------------------*/
- dfield = dtuple_get_nth_field(entry, 5);
-
- ptr = mem_heap_alloc(heap, 4);
- mach_write_to_4(ptr, column->len);
-
- dfield_set_data(dfield, ptr, 4);
- /* 8: PREC ---------------------------*/
- dfield = dtuple_get_nth_field(entry, 6);
-
- ptr = mem_heap_alloc(heap, 4);
- mach_write_to_4(ptr, 0/* unused */);
-
- dfield_set_data(dfield, ptr, 4);
- /*---------------------------------*/
-
- dict_table_copy_types(entry, sys_columns);
-
- return(entry);
-}
-
-/*******************************************************************
-Builds a table definition to insert. */
-static
-ulint
-dict_build_table_def_step(
-/*======================*/
- /* out: DB_SUCCESS or error code */
- que_thr_t* thr, /* in: query thread */
- tab_node_t* node) /* in: table create node */
-{
- dict_table_t* table;
- dtuple_t* row;
- ulint error;
- const char* path_or_name;
- ibool is_path;
- mtr_t mtr;
- ulint i;
- ulint row_len;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- table = node->table;
-
- table->id = dict_hdr_get_new_id(DICT_HDR_TABLE_ID);
-
- thr_get_trx(thr)->table_id = table->id;
-
- row_len = 0;
- for (i = 0; i < table->n_def; i++) {
- row_len += dict_col_get_min_size(&table->cols[i]);
- }
- if (row_len > BTR_PAGE_MAX_REC_SIZE) {
- return(DB_TOO_BIG_RECORD);
- }
-
- if (srv_file_per_table) {
- /* We create a new single-table tablespace for the table.
- We initially let it be 4 pages:
- - page 0 is the fsp header and an extent descriptor page,
- - page 1 is an ibuf bitmap page,
- - page 2 is the first inode page,
- - page 3 will contain the root of the clustered index of the
- table we create here. */
-
- ulint space = 0; /* reset to zero for the call below */
-
- if (table->dir_path_of_temp_table) {
- /* We place tables created with CREATE TEMPORARY
- TABLE in the tmp dir of mysqld server */
-
- path_or_name = table->dir_path_of_temp_table;
- is_path = TRUE;
- } else {
- path_or_name = table->name;
- is_path = FALSE;
- }
-
- error = fil_create_new_single_table_tablespace(
- &space, path_or_name, is_path,
- FIL_IBD_FILE_INITIAL_SIZE);
- table->space = (unsigned int) space;
-
- if (error != DB_SUCCESS) {
-
- return(error);
- }
-
- mtr_start(&mtr);
-
- fsp_header_init(table->space, FIL_IBD_FILE_INITIAL_SIZE, &mtr);
-
- mtr_commit(&mtr);
- }
-
- row = dict_create_sys_tables_tuple(table, node->heap);
-
- ins_node_set_new_row(node->tab_def, row);
-
- return(DB_SUCCESS);
-}
-
-/*******************************************************************
-Builds a column definition to insert. */
-static
-ulint
-dict_build_col_def_step(
-/*====================*/
- /* out: DB_SUCCESS */
- tab_node_t* node) /* in: table create node */
-{
- dtuple_t* row;
-
- row = dict_create_sys_columns_tuple(node->table, node->col_no,
- node->heap);
- ins_node_set_new_row(node->col_def, row);
-
- return(DB_SUCCESS);
-}
-
-/*********************************************************************
-Based on an index object, this function builds the entry to be inserted
-in the SYS_INDEXES system table. */
-static
-dtuple_t*
-dict_create_sys_indexes_tuple(
-/*==========================*/
- /* out: the tuple which should be inserted */
- dict_index_t* index, /* in: index */
- mem_heap_t* heap) /* in: memory heap from which the memory for
- the built tuple is allocated */
-{
- dict_table_t* sys_indexes;
- dict_table_t* table;
- dtuple_t* entry;
- dfield_t* dfield;
- byte* ptr;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
- ut_ad(index && heap);
-
- sys_indexes = dict_sys->sys_indexes;
-
- table = dict_table_get_low(index->table_name);
-
- entry = dtuple_create(heap, 7 + DATA_N_SYS_COLS);
-
- /* 0: TABLE_ID -----------------------*/
- dfield = dtuple_get_nth_field(entry, 0);
-
- ptr = mem_heap_alloc(heap, 8);
- mach_write_to_8(ptr, table->id);
-
- dfield_set_data(dfield, ptr, 8);
- /* 1: ID ----------------------------*/
- dfield = dtuple_get_nth_field(entry, 1);
-
- ptr = mem_heap_alloc(heap, 8);
- mach_write_to_8(ptr, index->id);
-
- dfield_set_data(dfield, ptr, 8);
- /* 4: NAME --------------------------*/
- dfield = dtuple_get_nth_field(entry, 2);
-
- dfield_set_data(dfield, index->name, ut_strlen(index->name));
- /* 5: N_FIELDS ----------------------*/
- dfield = dtuple_get_nth_field(entry, 3);
-
- ptr = mem_heap_alloc(heap, 4);
- mach_write_to_4(ptr, index->n_fields);
-
- dfield_set_data(dfield, ptr, 4);
- /* 6: TYPE --------------------------*/
- dfield = dtuple_get_nth_field(entry, 4);
-
- ptr = mem_heap_alloc(heap, 4);
- mach_write_to_4(ptr, index->type);
-
- dfield_set_data(dfield, ptr, 4);
- /* 7: SPACE --------------------------*/
-
-#if DICT_SYS_INDEXES_SPACE_NO_FIELD != 7
-#error "DICT_SYS_INDEXES_SPACE_NO_FIELD != 7"
-#endif
-
- dfield = dtuple_get_nth_field(entry, 5);
-
- ptr = mem_heap_alloc(heap, 4);
- mach_write_to_4(ptr, index->space);
-
- dfield_set_data(dfield, ptr, 4);
- /* 8: PAGE_NO --------------------------*/
-
-#if DICT_SYS_INDEXES_PAGE_NO_FIELD != 8
-#error "DICT_SYS_INDEXES_PAGE_NO_FIELD != 8"
-#endif
-
- dfield = dtuple_get_nth_field(entry, 6);
-
- ptr = mem_heap_alloc(heap, 4);
- mach_write_to_4(ptr, FIL_NULL);
-
- dfield_set_data(dfield, ptr, 4);
- /*--------------------------------*/
-
- dict_table_copy_types(entry, sys_indexes);
-
- return(entry);
-}
-
-/*********************************************************************
-Based on an index object, this function builds the entry to be inserted
-in the SYS_FIELDS system table. */
-static
-dtuple_t*
-dict_create_sys_fields_tuple(
-/*=========================*/
- /* out: the tuple which should be inserted */
- dict_index_t* index, /* in: index */
- ulint i, /* in: field number */
- mem_heap_t* heap) /* in: memory heap from which the memory for
- the built tuple is allocated */
-{
- dict_table_t* sys_fields;
- dtuple_t* entry;
- dict_field_t* field;
- dfield_t* dfield;
- byte* ptr;
- ibool index_contains_column_prefix_field = FALSE;
- ulint j;
-
- ut_ad(index && heap);
-
- for (j = 0; j < index->n_fields; j++) {
- if (dict_index_get_nth_field(index, j)->prefix_len > 0) {
- index_contains_column_prefix_field = TRUE;
- }
- }
-
- field = dict_index_get_nth_field(index, i);
-
- sys_fields = dict_sys->sys_fields;
-
- entry = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
-
- /* 0: INDEX_ID -----------------------*/
- dfield = dtuple_get_nth_field(entry, 0);
-
- ptr = mem_heap_alloc(heap, 8);
- mach_write_to_8(ptr, index->id);
-
- dfield_set_data(dfield, ptr, 8);
- /* 1: POS + PREFIX LENGTH ----------------------------*/
-
- dfield = dtuple_get_nth_field(entry, 1);
-
- ptr = mem_heap_alloc(heap, 4);
-
- if (index_contains_column_prefix_field) {
- /* If there are column prefix fields in the index, then
- we store the number of the field to the 2 HIGH bytes
- and the prefix length to the 2 low bytes, */
-
- mach_write_to_4(ptr, (i << 16) + field->prefix_len);
- } else {
- /* Else we store the number of the field to the 2 LOW bytes.
- This is to keep the storage format compatible with
- InnoDB versions < 4.0.14. */
-
- mach_write_to_4(ptr, i);
- }
-
- dfield_set_data(dfield, ptr, 4);
- /* 4: COL_NAME -------------------------*/
- dfield = dtuple_get_nth_field(entry, 2);
-
- dfield_set_data(dfield, field->name,
- ut_strlen(field->name));
- /*---------------------------------*/
-
- dict_table_copy_types(entry, sys_fields);
-
- return(entry);
-}
-
-/*********************************************************************
-Creates the tuple with which the index entry is searched for writing the index
-tree root page number, if such a tree is created. */
-static
-dtuple_t*
-dict_create_search_tuple(
-/*=====================*/
- /* out: the tuple for search */
- dtuple_t* tuple, /* in: the tuple inserted in the SYS_INDEXES
- table */
- mem_heap_t* heap) /* in: memory heap from which the memory for
- the built tuple is allocated */
-{
- dtuple_t* search_tuple;
- dfield_t* field1;
- dfield_t* field2;
-
- ut_ad(tuple && heap);
-
- search_tuple = dtuple_create(heap, 2);
-
- field1 = dtuple_get_nth_field(tuple, 0);
- field2 = dtuple_get_nth_field(search_tuple, 0);
-
- dfield_copy(field2, field1);
-
- field1 = dtuple_get_nth_field(tuple, 1);
- field2 = dtuple_get_nth_field(search_tuple, 1);
-
- dfield_copy(field2, field1);
-
- ut_ad(dtuple_validate(search_tuple));
-
- return(search_tuple);
-}
-
-/*******************************************************************
-Builds an index definition row to insert. */
-static
-ulint
-dict_build_index_def_step(
-/*======================*/
- /* out: DB_SUCCESS or error code */
- que_thr_t* thr, /* in: query thread */
- ind_node_t* node) /* in: index create node */
-{
- dict_table_t* table;
- dict_index_t* index;
- dtuple_t* row;
- trx_t* trx;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- trx = thr_get_trx(thr);
-
- index = node->index;
-
- table = dict_table_get_low(index->table_name);
-
- if (table == NULL) {
- return(DB_TABLE_NOT_FOUND);
- }
-
- trx->table_id = table->id;
-
- node->table = table;
-
- ut_ad((UT_LIST_GET_LEN(table->indexes) > 0)
- || (index->type & DICT_CLUSTERED));
-
- index->id = dict_hdr_get_new_id(DICT_HDR_INDEX_ID);
-
- /* Inherit the space id from the table; we store all indexes of a
- table in the same tablespace */
-
- index->space = table->space;
- node->page_no = FIL_NULL;
- row = dict_create_sys_indexes_tuple(index, node->heap);
- node->ind_row = row;
-
- ins_node_set_new_row(node->ind_def, row);
-
- return(DB_SUCCESS);
-}
-
-/*******************************************************************
-Builds a field definition row to insert. */
-static
-ulint
-dict_build_field_def_step(
-/*======================*/
- /* out: DB_SUCCESS */
- ind_node_t* node) /* in: index create node */
-{
- dict_index_t* index;
- dtuple_t* row;
-
- index = node->index;
-
- row = dict_create_sys_fields_tuple(index, node->field_no, node->heap);
-
- ins_node_set_new_row(node->field_def, row);
-
- return(DB_SUCCESS);
-}
-
-/*******************************************************************
-Creates an index tree for the index if it is not a member of a cluster. */
-static
-ulint
-dict_create_index_tree_step(
-/*========================*/
- /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
- ind_node_t* node) /* in: index create node */
-{
- dict_index_t* index;
- dict_table_t* sys_indexes;
- dict_table_t* table;
- dtuple_t* search_tuple;
- btr_pcur_t pcur;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- index = node->index;
- table = node->table;
-
- sys_indexes = dict_sys->sys_indexes;
-
- /* Run a mini-transaction in which the index tree is allocated for
- the index and its root address is written to the index entry in
- sys_indexes */
-
- mtr_start(&mtr);
-
- search_tuple = dict_create_search_tuple(node->ind_row, node->heap);
-
- btr_pcur_open(UT_LIST_GET_FIRST(sys_indexes->indexes),
- search_tuple, PAGE_CUR_L, BTR_MODIFY_LEAF,
- &pcur, &mtr);
-
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
-
- node->page_no = btr_create(index->type, index->space, index->id,
- dict_table_is_comp(table), &mtr);
- /* printf("Created a new index tree in space %lu root page %lu\n",
- index->space, index->page_no); */
-
- page_rec_write_index_page_no(btr_pcur_get_rec(&pcur),
- DICT_SYS_INDEXES_PAGE_NO_FIELD,
- node->page_no, &mtr);
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- if (node->page_no == FIL_NULL) {
-
- return(DB_OUT_OF_FILE_SPACE);
- }
-
- return(DB_SUCCESS);
-}
-
-/***********************************************************************
-Drops the index tree associated with a row in SYS_INDEXES table. */
-
-void
-dict_drop_index_tree(
-/*=================*/
- rec_t* rec, /* in: record in the clustered index of SYS_INDEXES
- table */
- mtr_t* mtr) /* in: mtr having the latch on the record page */
-{
- ulint root_page_no;
- ulint space;
- byte* ptr;
- ulint len;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
- ut_a(!dict_table_is_comp(dict_sys->sys_indexes));
- ptr = rec_get_nth_field_old(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD, &len);
-
- ut_ad(len == 4);
-
- root_page_no = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
-
- if (root_page_no == FIL_NULL) {
- /* The tree has already been freed */
-
- return;
- }
-
- ptr = rec_get_nth_field_old(rec,
- DICT_SYS_INDEXES_SPACE_NO_FIELD, &len);
-
- ut_ad(len == 4);
-
- space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
-
- if (!fil_tablespace_exists_in_mem(space)) {
- /* It is a single table tablespace and the .ibd file is
- missing: do nothing */
-
- return;
- }
-
- /* We free all the pages but the root page first; this operation
- may span several mini-transactions */
-
- btr_free_but_not_root(space, root_page_no);
-
- /* Then we free the root page in the same mini-transaction where
- we write FIL_NULL to the appropriate field in the SYS_INDEXES
- record: this mini-transaction marks the B-tree totally freed */
-
- /* printf("Dropping index tree in space %lu root page %lu\n", space,
- root_page_no); */
- btr_free_root(space, root_page_no, mtr);
-
- page_rec_write_index_page_no(rec,
- DICT_SYS_INDEXES_PAGE_NO_FIELD,
- FIL_NULL, mtr);
-}
-
-/***********************************************************************
-Truncates the index tree associated with a row in SYS_INDEXES table. */
-
-ulint
-dict_truncate_index_tree(
-/*=====================*/
- /* out: new root page number, or
- FIL_NULL on failure */
- dict_table_t* table, /* in: the table the index belongs to */
- btr_pcur_t* pcur, /* in/out: persistent cursor pointing to
- record in the clustered index of
- SYS_INDEXES table. The cursor may be
- repositioned in this call. */
- mtr_t* mtr) /* in: mtr having the latch
- on the record page. The mtr may be
- committed and restarted in this call. */
-{
- ulint root_page_no;
- ulint space;
- ulint type;
- dulint index_id;
- rec_t* rec;
- byte* ptr;
- ulint len;
- ulint comp;
- dict_index_t* index;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
- ut_a(!dict_table_is_comp(dict_sys->sys_indexes));
- rec = btr_pcur_get_rec(pcur);
- ptr = rec_get_nth_field_old(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD, &len);
-
- ut_ad(len == 4);
-
- root_page_no = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
-
- if (root_page_no == FIL_NULL) {
- /* The tree has been freed. */
-
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Trying to TRUNCATE"
- " a missing index of table %s!\n", table->name);
- return(FIL_NULL);
- }
-
- ptr = rec_get_nth_field_old(rec,
- DICT_SYS_INDEXES_SPACE_NO_FIELD, &len);
-
- ut_ad(len == 4);
-
- space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
-
- if (!fil_tablespace_exists_in_mem(space)) {
- /* It is a single table tablespace and the .ibd file is
- missing: do nothing */
-
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Trying to TRUNCATE"
- " a missing .ibd file of table %s!\n", table->name);
- return(FIL_NULL);
- }
-
- ptr = rec_get_nth_field_old(rec,
- DICT_SYS_INDEXES_TYPE_FIELD, &len);
- ut_ad(len == 4);
- type = mach_read_from_4(ptr);
-
- ptr = rec_get_nth_field_old(rec, 1, &len);
- ut_ad(len == 8);
- index_id = mach_read_from_8(ptr);
-
- /* We free all the pages but the root page first; this operation
- may span several mini-transactions */
-
- btr_free_but_not_root(space, root_page_no);
-
- /* Then we free the root page in the same mini-transaction where
- we create the b-tree and write its new root page number to the
- appropriate field in the SYS_INDEXES record: this mini-transaction
- marks the B-tree totally truncated */
-
- comp = page_is_comp(btr_page_get(space, root_page_no, RW_X_LATCH,
- mtr));
-
- btr_free_root(space, root_page_no, mtr);
- /* We will temporarily write FIL_NULL to the PAGE_NO field
- in SYS_INDEXES, so that the database will not get into an
- inconsistent state in case it crashes between the mtr_commit()
- below and the following mtr_commit() call. */
- page_rec_write_index_page_no(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD,
- FIL_NULL, mtr);
-
- /* We will need to commit the mini-transaction in order to avoid
- deadlocks in the btr_create() call, because otherwise we would
- be freeing and allocating pages in the same mini-transaction. */
- btr_pcur_store_position(pcur, mtr);
- mtr_commit(mtr);
-
- mtr_start(mtr);
- btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr);
-
- /* Find the index corresponding to this SYS_INDEXES record. */
- for (index = UT_LIST_GET_FIRST(table->indexes);
- index;
- index = UT_LIST_GET_NEXT(indexes, index)) {
- if (!ut_dulint_cmp(index->id, index_id)) {
- break;
- }
- }
-
- root_page_no = btr_create(type, space, index_id, comp, mtr);
- if (index) {
- index->page = (unsigned int) root_page_no;
- } else {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Index %lu %lu of table %s is missing\n"
- "InnoDB: from the data dictionary during TRUNCATE!\n",
- ut_dulint_get_high(index_id),
- ut_dulint_get_low(index_id),
- table->name);
- }
-
- return(root_page_no);
-}
-
-/*************************************************************************
-Creates a table create graph. */
-
-tab_node_t*
-tab_create_graph_create(
-/*====================*/
- /* out, own: table create node */
- dict_table_t* table, /* in: table to create, built as a memory data
- structure */
- mem_heap_t* heap) /* in: heap where created */
-{
- tab_node_t* node;
-
- node = mem_heap_alloc(heap, sizeof(tab_node_t));
-
- node->common.type = QUE_NODE_CREATE_TABLE;
-
- node->table = table;
-
- node->state = TABLE_BUILD_TABLE_DEF;
- node->heap = mem_heap_create(256);
-
- node->tab_def = ins_node_create(INS_DIRECT, dict_sys->sys_tables,
- heap);
- node->tab_def->common.parent = node;
-
- node->col_def = ins_node_create(INS_DIRECT, dict_sys->sys_columns,
- heap);
- node->col_def->common.parent = node;
-
- node->commit_node = commit_node_create(heap);
- node->commit_node->common.parent = node;
-
- return(node);
-}
-
-/*************************************************************************
-Creates an index create graph. */
-
-ind_node_t*
-ind_create_graph_create(
-/*====================*/
- /* out, own: index create node */
- dict_index_t* index, /* in: index to create, built as a memory data
- structure */
- mem_heap_t* heap) /* in: heap where created */
-{
- ind_node_t* node;
-
- node = mem_heap_alloc(heap, sizeof(ind_node_t));
-
- node->common.type = QUE_NODE_CREATE_INDEX;
-
- node->index = index;
-
- node->state = INDEX_BUILD_INDEX_DEF;
- node->page_no = FIL_NULL;
- node->heap = mem_heap_create(256);
-
- node->ind_def = ins_node_create(INS_DIRECT,
- dict_sys->sys_indexes, heap);
- node->ind_def->common.parent = node;
-
- node->field_def = ins_node_create(INS_DIRECT,
- dict_sys->sys_fields, heap);
- node->field_def->common.parent = node;
-
- node->commit_node = commit_node_create(heap);
- node->commit_node->common.parent = node;
-
- return(node);
-}
-
-/***************************************************************
-Creates a table. This is a high-level function used in SQL execution graphs. */
-
-que_thr_t*
-dict_create_table_step(
-/*===================*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr) /* in: query thread */
-{
- tab_node_t* node;
- ulint err = DB_ERROR;
- trx_t* trx;
-
- ut_ad(thr);
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- trx = thr_get_trx(thr);
-
- node = thr->run_node;
-
- ut_ad(que_node_get_type(node) == QUE_NODE_CREATE_TABLE);
-
- if (thr->prev_node == que_node_get_parent(node)) {
- node->state = TABLE_BUILD_TABLE_DEF;
- }
-
- if (node->state == TABLE_BUILD_TABLE_DEF) {
-
- /* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */
-
- err = dict_build_table_def_step(thr, node);
-
- if (err != DB_SUCCESS) {
-
- goto function_exit;
- }
-
- node->state = TABLE_BUILD_COL_DEF;
- node->col_no = 0;
-
- thr->run_node = node->tab_def;
-
- return(thr);
- }
-
- if (node->state == TABLE_BUILD_COL_DEF) {
-
- if (node->col_no < (node->table)->n_def) {
-
- err = dict_build_col_def_step(node);
-
- if (err != DB_SUCCESS) {
-
- goto function_exit;
- }
-
- node->col_no++;
-
- thr->run_node = node->col_def;
-
- return(thr);
- } else {
- node->state = TABLE_COMMIT_WORK;
- }
- }
-
- if (node->state == TABLE_COMMIT_WORK) {
-
- /* Table was correctly defined: do NOT commit the transaction
- (CREATE TABLE does NOT do an implicit commit of the current
- transaction) */
-
- node->state = TABLE_ADD_TO_CACHE;
-
- /* thr->run_node = node->commit_node;
-
- return(thr); */
- }
-
- if (node->state == TABLE_ADD_TO_CACHE) {
-
- dict_table_add_to_cache(node->table, node->heap);
-
- err = DB_SUCCESS;
- }
-
-function_exit:
- trx->error_state = err;
-
- if (err == DB_SUCCESS) {
- /* Ok: do nothing */
-
- } else if (err == DB_LOCK_WAIT) {
-
- return(NULL);
- } else {
- /* SQL error detected */
-
- return(NULL);
- }
-
- thr->run_node = que_node_get_parent(node);
-
- return(thr);
-}
-
-/***************************************************************
-Creates an index. This is a high-level function used in SQL execution
-graphs. */
-
-que_thr_t*
-dict_create_index_step(
-/*===================*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr) /* in: query thread */
-{
- ind_node_t* node;
- ulint err = DB_ERROR;
- trx_t* trx;
-
- ut_ad(thr);
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- trx = thr_get_trx(thr);
-
- node = thr->run_node;
-
- ut_ad(que_node_get_type(node) == QUE_NODE_CREATE_INDEX);
-
- if (thr->prev_node == que_node_get_parent(node)) {
- node->state = INDEX_BUILD_INDEX_DEF;
- }
-
- if (node->state == INDEX_BUILD_INDEX_DEF) {
- /* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */
- err = dict_build_index_def_step(thr, node);
-
- if (err != DB_SUCCESS) {
-
- goto function_exit;
- }
-
- node->state = INDEX_BUILD_FIELD_DEF;
- node->field_no = 0;
-
- thr->run_node = node->ind_def;
-
- return(thr);
- }
-
- if (node->state == INDEX_BUILD_FIELD_DEF) {
-
- if (node->field_no < (node->index)->n_fields) {
-
- err = dict_build_field_def_step(node);
-
- if (err != DB_SUCCESS) {
-
- goto function_exit;
- }
-
- node->field_no++;
-
- thr->run_node = node->field_def;
-
- return(thr);
- } else {
- node->state = INDEX_CREATE_INDEX_TREE;
- }
- }
-
- if (node->state == INDEX_CREATE_INDEX_TREE) {
-
- err = dict_create_index_tree_step(node);
-
- if (err != DB_SUCCESS) {
-
- goto function_exit;
- }
-
- node->state = INDEX_COMMIT_WORK;
- }
-
- if (node->state == INDEX_COMMIT_WORK) {
-
- /* Index was correctly defined: do NOT commit the transaction
- (CREATE INDEX does NOT currently do an implicit commit of
- the current transaction) */
-
- node->state = INDEX_ADD_TO_CACHE;
-
- /* thr->run_node = node->commit_node;
-
- return(thr); */
- }
-
- if (node->state == INDEX_ADD_TO_CACHE) {
-
- dict_index_add_to_cache(node->table, node->index,
- node->page_no);
-
- err = DB_SUCCESS;
- }
-
-function_exit:
- trx->error_state = err;
-
- if (err == DB_SUCCESS) {
- /* Ok: do nothing */
-
- } else if (err == DB_LOCK_WAIT) {
-
- return(NULL);
- } else {
- /* SQL error detected */
-
- return(NULL);
- }
-
- thr->run_node = que_node_get_parent(node);
-
- return(thr);
-}
-
-/********************************************************************
-Creates the foreign key constraints system tables inside InnoDB
-at database creation or database start if they are not found or are
-not of the right form. */
-
-ulint
-dict_create_or_check_foreign_constraint_tables(void)
-/*================================================*/
- /* out: DB_SUCCESS or error code */
-{
- dict_table_t* table1;
- dict_table_t* table2;
- ulint error;
- trx_t* trx;
-
- mutex_enter(&(dict_sys->mutex));
-
- table1 = dict_table_get_low("SYS_FOREIGN");
- table2 = dict_table_get_low("SYS_FOREIGN_COLS");
-
- if (table1 && table2
- && UT_LIST_GET_LEN(table1->indexes) == 3
- && UT_LIST_GET_LEN(table2->indexes) == 1) {
-
- /* Foreign constraint system tables have already been
- created, and they are ok */
-
- mutex_exit(&(dict_sys->mutex));
-
- return(DB_SUCCESS);
- }
-
- mutex_exit(&(dict_sys->mutex));
-
- trx = trx_allocate_for_mysql();
-
- trx->op_info = "creating foreign key sys tables";
-
- row_mysql_lock_data_dictionary(trx);
-
- if (table1) {
- fprintf(stderr,
- "InnoDB: dropping incompletely created"
- " SYS_FOREIGN table\n");
- row_drop_table_for_mysql("SYS_FOREIGN", trx, TRUE);
- }
-
- if (table2) {
- fprintf(stderr,
- "InnoDB: dropping incompletely created"
- " SYS_FOREIGN_COLS table\n");
- row_drop_table_for_mysql("SYS_FOREIGN_COLS", trx, TRUE);
- }
-
- fprintf(stderr,
- "InnoDB: Creating foreign key constraint system tables\n");
-
- /* NOTE: in dict_load_foreigns we use the fact that
- there are 2 secondary indexes on SYS_FOREIGN, and they
- are defined just like below */
-
- /* NOTE: when designing InnoDB's foreign key support in 2001, we made
- an error and made the table names and the foreign key id of type
- 'CHAR' (internally, really a VARCHAR). We should have made the type
- VARBINARY, like in other InnoDB system tables, to get a clean
- design. */
-
- error = que_eval_sql(NULL,
- "PROCEDURE CREATE_FOREIGN_SYS_TABLES_PROC () IS\n"
- "BEGIN\n"
- "CREATE TABLE\n"
- "SYS_FOREIGN(ID CHAR, FOR_NAME CHAR,"
- " REF_NAME CHAR, N_COLS INT);\n"
- "CREATE UNIQUE CLUSTERED INDEX ID_IND"
- " ON SYS_FOREIGN (ID);\n"
- "CREATE INDEX FOR_IND"
- " ON SYS_FOREIGN (FOR_NAME);\n"
- "CREATE INDEX REF_IND"
- " ON SYS_FOREIGN (REF_NAME);\n"
- "CREATE TABLE\n"
- "SYS_FOREIGN_COLS(ID CHAR, POS INT,"
- " FOR_COL_NAME CHAR, REF_COL_NAME CHAR);\n"
- "CREATE UNIQUE CLUSTERED INDEX ID_IND"
- " ON SYS_FOREIGN_COLS (ID, POS);\n"
- "COMMIT WORK;\n"
- "END;\n"
- , FALSE, trx);
-
- if (error != DB_SUCCESS) {
- fprintf(stderr, "InnoDB: error %lu in creation\n",
- (ulong) error);
-
- ut_a(error == DB_OUT_OF_FILE_SPACE
- || error == DB_TOO_MANY_CONCURRENT_TRXS);
-
- fprintf(stderr,
- "InnoDB: creation failed\n"
- "InnoDB: tablespace is full\n"
- "InnoDB: dropping incompletely created"
- " SYS_FOREIGN tables\n");
-
- row_drop_table_for_mysql("SYS_FOREIGN", trx, TRUE);
- row_drop_table_for_mysql("SYS_FOREIGN_COLS", trx, TRUE);
-
- error = DB_MUST_GET_MORE_FILE_SPACE;
- }
-
- trx->op_info = "";
-
- row_mysql_unlock_data_dictionary(trx);
-
- trx_free_for_mysql(trx);
-
- if (error == DB_SUCCESS) {
- fprintf(stderr,
- "InnoDB: Foreign key constraint system tables"
- " created\n");
- }
-
- return(error);
-}
-
-/********************************************************************
-Evaluate the given foreign key SQL statement. */
-
-ulint
-dict_foreign_eval_sql(
-/*==================*/
- /* out: error code or DB_SUCCESS */
- pars_info_t* info, /* in: info struct, or NULL */
- const char* sql, /* in: SQL string to evaluate */
- dict_table_t* table, /* in: table */
- dict_foreign_t* foreign,/* in: foreign */
- trx_t* trx) /* in: transaction */
-{
- ulint error;
- FILE* ef = dict_foreign_err_file;
-
- error = que_eval_sql(info, sql, FALSE, trx);
-
- if (error == DB_DUPLICATE_KEY) {
- mutex_enter(&dict_foreign_err_mutex);
- rewind(ef);
- ut_print_timestamp(ef);
- fputs(" Error in foreign key constraint creation for table ",
- ef);
- ut_print_name(ef, trx, TRUE, table->name);
- fputs(".\nA foreign key constraint of name ", ef);
- ut_print_name(ef, trx, FALSE, foreign->id);
- fputs("\nalready exists."
- " (Note that internally InnoDB adds 'databasename/'\n"
- "in front of the user-defined constraint name).\n",
- ef);
- fputs("Note that InnoDB's FOREIGN KEY system tables store\n"
- "constraint names as case-insensitive, with the\n"
- "MySQL standard latin1_swedish_ci collation. If you\n"
- "create tables or databases whose names differ only in\n"
- "the character case, then collisions in constraint\n"
- "names can occur. Workaround: name your constraints\n"
- "explicitly with unique names.\n",
- ef);
-
- mutex_exit(&dict_foreign_err_mutex);
-
- return(error);
- }
-
- if (error != DB_SUCCESS) {
- fprintf(stderr,
- "InnoDB: Foreign key constraint creation failed:\n"
- "InnoDB: internal error number %lu\n", (ulong) error);
-
- mutex_enter(&dict_foreign_err_mutex);
- ut_print_timestamp(ef);
- fputs(" Internal error in foreign key constraint creation"
- " for table ", ef);
- ut_print_name(ef, trx, TRUE, table->name);
- fputs(".\n"
- "See the MySQL .err log in the datadir"
- " for more information.\n", ef);
- mutex_exit(&dict_foreign_err_mutex);
-
- return(error);
- }
-
- return(DB_SUCCESS);
-}
-
-/************************************************************************
-Add a single foreign key field definition to the data dictionary tables in
-the database. */
-static
-ulint
-dict_create_add_foreign_field_to_dictionary(
-/*========================================*/
- /* out: error code or DB_SUCCESS */
- ulint field_nr, /* in: foreign field number */
- dict_table_t* table, /* in: table */
- dict_foreign_t* foreign, /* in: foreign */
- trx_t* trx) /* in: transaction */
-{
- pars_info_t* info = pars_info_create();
-
- pars_info_add_str_literal(info, "id", foreign->id);
-
- pars_info_add_int4_literal(info, "pos", field_nr);
-
- pars_info_add_str_literal(info, "for_col_name",
- foreign->foreign_col_names[field_nr]);
-
- pars_info_add_str_literal(info, "ref_col_name",
- foreign->referenced_col_names[field_nr]);
-
- return(dict_foreign_eval_sql(
- info,
- "PROCEDURE P () IS\n"
- "BEGIN\n"
- "INSERT INTO SYS_FOREIGN_COLS VALUES"
- "(:id, :pos, :for_col_name, :ref_col_name);\n"
- "END;\n",
- table, foreign, trx));
-}
-
-/************************************************************************
-Add a single foreign key definition to the data dictionary tables in the
-database. We also generate names to constraints that were not named by the
-user. A generated constraint has a name of the format
-databasename/tablename_ibfk_<number>, where the numbers start from 1, and
-are given locally for this table, that is, the number is not global, as in
-the old format constraints < 4.0.18 it used to be. */
-static
-ulint
-dict_create_add_foreign_to_dictionary(
-/*==================================*/
- /* out: error code or DB_SUCCESS */
- ulint* id_nr, /* in/out: number to use in id generation;
- incremented if used */
- dict_table_t* table, /* in: table */
- dict_foreign_t* foreign,/* in: foreign */
- trx_t* trx) /* in: transaction */
-{
- ulint error;
- ulint i;
-
- pars_info_t* info = pars_info_create();
-
- if (foreign->id == NULL) {
- /* Generate a new constraint id */
- ulint namelen = strlen(table->name);
- char* id = mem_heap_alloc(foreign->heap, namelen + 20);
- /* no overflow if number < 1e13 */
- sprintf(id, "%s_ibfk_%lu", table->name, (ulong) (*id_nr)++);
- foreign->id = id;
- }
-
- pars_info_add_str_literal(info, "id", foreign->id);
-
- pars_info_add_str_literal(info, "for_name", table->name);
-
- pars_info_add_str_literal(info, "ref_name",
- foreign->referenced_table_name);
-
- pars_info_add_int4_literal(info, "n_cols",
- foreign->n_fields + (foreign->type << 24));
-
- error = dict_foreign_eval_sql(info,
- "PROCEDURE P () IS\n"
- "BEGIN\n"
- "INSERT INTO SYS_FOREIGN VALUES"
- "(:id, :for_name, :ref_name, :n_cols);\n"
- "END;\n"
- , table, foreign, trx);
-
- if (error != DB_SUCCESS) {
-
- return(error);
- }
-
- for (i = 0; i < foreign->n_fields; i++) {
- error = dict_create_add_foreign_field_to_dictionary(
- i, table, foreign, trx);
-
- if (error != DB_SUCCESS) {
-
- return(error);
- }
- }
-
- error = dict_foreign_eval_sql(NULL,
- "PROCEDURE P () IS\n"
- "BEGIN\n"
- "COMMIT WORK;\n"
- "END;\n"
- , table, foreign, trx);
-
- return(error);
-}
-
-/************************************************************************
-Adds foreign key definitions to data dictionary tables in the database. */
-
-ulint
-dict_create_add_foreigns_to_dictionary(
-/*===================================*/
- /* out: error code or DB_SUCCESS */
- ulint start_id,/* in: if we are actually doing ALTER TABLE
- ADD CONSTRAINT, we want to generate constraint
- numbers which are bigger than in the table so
- far; we number the constraints from
- start_id + 1 up; start_id should be set to 0 if
- we are creating a new table, or if the table
- so far has no constraints for which the name
- was generated here */
- dict_table_t* table, /* in: table */
- trx_t* trx) /* in: transaction */
-{
- dict_foreign_t* foreign;
- ulint number = start_id + 1;
- ulint error;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- if (NULL == dict_table_get_low("SYS_FOREIGN")) {
- fprintf(stderr,
- "InnoDB: table SYS_FOREIGN not found"
- " in internal data dictionary\n");
-
- return(DB_ERROR);
- }
-
- for (foreign = UT_LIST_GET_FIRST(table->foreign_list);
- foreign;
- foreign = UT_LIST_GET_NEXT(foreign_list, foreign)) {
-
- error = dict_create_add_foreign_to_dictionary(&number, table,
- foreign, trx);
-
- if (error != DB_SUCCESS) {
-
- return(error);
- }
- }
-
- return(DB_SUCCESS);
-}
diff --git a/storage/innobase/dict/dict0dict.c b/storage/innobase/dict/dict0dict.c
deleted file mode 100644
index c7a57d6a2b8..00000000000
--- a/storage/innobase/dict/dict0dict.c
+++ /dev/null
@@ -1,4253 +0,0 @@
-/**********************************************************************
-Data dictionary system
-
-(c) 1996 Innobase Oy
-
-Created 1/8/1996 Heikki Tuuri
-***********************************************************************/
-
-#include "dict0dict.h"
-
-#ifdef UNIV_NONINL
-#include "dict0dict.ic"
-#endif
-
-#include "buf0buf.h"
-#include "data0type.h"
-#include "mach0data.h"
-#include "dict0boot.h"
-#include "dict0mem.h"
-#include "dict0crea.h"
-#include "trx0undo.h"
-#include "btr0btr.h"
-#include "btr0cur.h"
-#include "btr0sea.h"
-#include "pars0pars.h"
-#include "pars0sym.h"
-#include "que0que.h"
-#include "rem0cmp.h"
-#ifndef UNIV_HOTBACKUP
-# include "m_ctype.h" /* my_isspace() */
-#endif /* !UNIV_HOTBACKUP */
-
-#include <ctype.h>
-
-dict_sys_t* dict_sys = NULL; /* the dictionary system */
-
-rw_lock_t dict_operation_lock; /* table create, drop, etc. reserve
- this in X-mode; implicit or backround
- operations purge, rollback, foreign
- key checks reserve this in S-mode; we
- cannot trust that MySQL protects
- implicit or background operations
- a table drop since MySQL does not
- know of them; therefore we need this;
- NOTE: a transaction which reserves
- this must keep book on the mode in
- trx->dict_operation_lock_mode */
-
-#define DICT_HEAP_SIZE 100 /* initial memory heap size when
- creating a table or index object */
-#define DICT_POOL_PER_TABLE_HASH 512 /* buffer pool max size per table
- hash table fixed size in bytes */
-#define DICT_POOL_PER_VARYING 4 /* buffer pool max size per data
- dictionary varying size in bytes */
-
-/* Identifies generated InnoDB foreign key names */
-static char dict_ibfk[] = "_ibfk_";
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************
-Converts an identifier to a table name.
-
-NOTE: the prototype of this function is copied from ha_innodb.cc! If you change
-this function, you MUST change also the prototype here! */
-extern
-void
-innobase_convert_from_table_id(
-/*===========================*/
- char* to, /* out: converted identifier */
- const char* from, /* in: identifier to convert */
- ulint len); /* in: length of 'to', in bytes;
- should be at least 5 * strlen(to) + 1 */
-/**********************************************************************
-Converts an identifier to UTF-8.
-
-NOTE: the prototype of this function is copied from ha_innodb.cc! If you change
-this function, you MUST change also the prototype here! */
-extern
-void
-innobase_convert_from_id(
-/*=====================*/
- char* to, /* out: converted identifier */
- const char* from, /* in: identifier to convert */
- ulint len); /* in: length of 'to', in bytes;
- should be at least 3 * strlen(to) + 1 */
-/**********************************************************************
-Compares NUL-terminated UTF-8 strings case insensitively.
-
-NOTE: the prototype of this function is copied from ha_innodb.cc! If you change
-this function, you MUST change also the prototype here! */
-extern
-int
-innobase_strcasecmp(
-/*================*/
- /* out: 0 if a=b, <0 if a<b, >1 if a>b */
- const char* a, /* in: first string to compare */
- const char* b); /* in: second string to compare */
-
-/**********************************************************************
-Makes all characters in a NUL-terminated UTF-8 string lower case.
-
-NOTE: the prototype of this function is copied from ha_innodb.cc! If you change
-this function, you MUST change also the prototype here! */
-extern
-void
-innobase_casedn_str(
-/*================*/
- char* a); /* in/out: string to put in lower case */
-
-/**************************************************************************
-Determines the connection character set.
-
-NOTE: the prototype of this function is copied from ha_innodb.cc! If you change
-this function, you MUST change also the prototype here! */
-struct charset_info_st*
-innobase_get_charset(
-/*=================*/
- /* out: connection character set */
- void* mysql_thd); /* in: MySQL thread handle */
-#endif /* !UNIV_HOTBACKUP */
-
-/**************************************************************************
-Removes an index from the dictionary cache. */
-static
-void
-dict_index_remove_from_cache(
-/*=========================*/
- dict_table_t* table, /* in: table */
- dict_index_t* index); /* in, own: index */
-/***********************************************************************
-Copies fields contained in index2 to index1. */
-static
-void
-dict_index_copy(
-/*============*/
- dict_index_t* index1, /* in: index to copy to */
- dict_index_t* index2, /* in: index to copy from */
- dict_table_t* table, /* in: table */
- ulint start, /* in: first position to copy */
- ulint end); /* in: last position to copy */
-/***********************************************************************
-Tries to find column names for the index and sets the col field of the
-index. */
-static
-void
-dict_index_find_cols(
-/*=================*/
- dict_table_t* table, /* in: table */
- dict_index_t* index); /* in: index */
-/***********************************************************************
-Builds the internal dictionary cache representation for a clustered
-index, containing also system fields not defined by the user. */
-static
-dict_index_t*
-dict_index_build_internal_clust(
-/*============================*/
- /* out, own: the internal representation
- of the clustered index */
- dict_table_t* table, /* in: table */
- dict_index_t* index); /* in: user representation of a clustered
- index */
-/***********************************************************************
-Builds the internal dictionary cache representation for a non-clustered
-index, containing also system fields not defined by the user. */
-static
-dict_index_t*
-dict_index_build_internal_non_clust(
-/*================================*/
- /* out, own: the internal representation
- of the non-clustered index */
- dict_table_t* table, /* in: table */
- dict_index_t* index); /* in: user representation of a non-clustered
- index */
-/**************************************************************************
-Removes a foreign constraint struct from the dictionary cache. */
-static
-void
-dict_foreign_remove_from_cache(
-/*===========================*/
- dict_foreign_t* foreign); /* in, own: foreign constraint */
-/**************************************************************************
-Prints a column data. */
-static
-void
-dict_col_print_low(
-/*===============*/
- const dict_table_t* table, /* in: table */
- const dict_col_t* col); /* in: column */
-/**************************************************************************
-Prints an index data. */
-static
-void
-dict_index_print_low(
-/*=================*/
- dict_index_t* index); /* in: index */
-/**************************************************************************
-Prints a field data. */
-static
-void
-dict_field_print_low(
-/*=================*/
- dict_field_t* field); /* in: field */
-/*************************************************************************
-Frees a foreign key struct. */
-static
-void
-dict_foreign_free(
-/*==============*/
- dict_foreign_t* foreign); /* in, own: foreign key struct */
-
-/* Stream for storing detailed information about the latest foreign key
-and unique key errors */
-FILE* dict_foreign_err_file = NULL;
-mutex_t dict_foreign_err_mutex; /* mutex protecting the foreign
- and unique error buffers */
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************
-Makes all characters in a NUL-terminated UTF-8 string lower case. */
-
-void
-dict_casedn_str(
-/*============*/
- char* a) /* in/out: string to put in lower case */
-{
- innobase_casedn_str(a);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/************************************************************************
-Checks if the database name in two table names is the same. */
-
-ibool
-dict_tables_have_same_db(
-/*=====================*/
- /* out: TRUE if same db name */
- const char* name1, /* in: table name in the form
- dbname '/' tablename */
- const char* name2) /* in: table name in the form
- dbname '/' tablename */
-{
- for (; *name1 == *name2; name1++, name2++) {
- if (*name1 == '/') {
- return(TRUE);
- }
- ut_a(*name1); /* the names must contain '/' */
- }
- return(FALSE);
-}
-
-/************************************************************************
-Return the end of table name where we have removed dbname and '/'. */
-
-const char*
-dict_remove_db_name(
-/*================*/
- /* out: table name */
- const char* name) /* in: table name in the form
- dbname '/' tablename */
-{
- const char* s = strchr(name, '/');
- ut_a(s);
-
- return(s + 1);
-}
-
-/************************************************************************
-Get the database name length in a table name. */
-
-ulint
-dict_get_db_name_len(
-/*=================*/
- /* out: database name length */
- const char* name) /* in: table name in the form
- dbname '/' tablename */
-{
- const char* s;
- s = strchr(name, '/');
- ut_a(s);
- return(s - name);
-}
-
-/************************************************************************
-Reserves the dictionary system mutex for MySQL. */
-
-void
-dict_mutex_enter_for_mysql(void)
-/*============================*/
-{
- mutex_enter(&(dict_sys->mutex));
-}
-
-/************************************************************************
-Releases the dictionary system mutex for MySQL. */
-
-void
-dict_mutex_exit_for_mysql(void)
-/*===========================*/
-{
- mutex_exit(&(dict_sys->mutex));
-}
-
-/************************************************************************
-Decrements the count of open MySQL handles to a table. */
-
-void
-dict_table_decrement_handle_count(
-/*==============================*/
- dict_table_t* table) /* in: table */
-{
- mutex_enter(&(dict_sys->mutex));
-
- ut_a(table->n_mysql_handles_opened > 0);
-
- table->n_mysql_handles_opened--;
-
- mutex_exit(&(dict_sys->mutex));
-}
-
-/*************************************************************************
-Gets the column data type. */
-
-void
-dict_col_copy_type_noninline(
-/*=========================*/
- const dict_col_t* col, /* in: column */
- dtype_t* type) /* out: data type */
-{
- dict_col_copy_type(col, type);
-}
-
-/************************************************************************
-Gets the nth column of a table. */
-
-const dict_col_t*
-dict_table_get_nth_col_noninline(
-/*=============================*/
- /* out: pointer to column object */
- const dict_table_t* table, /* in: table */
- ulint pos) /* in: position of column */
-{
- return(dict_table_get_nth_col(table, pos));
-}
-
-/************************************************************************
-Gets the first index on the table (the clustered index). */
-
-dict_index_t*
-dict_table_get_first_index_noninline(
-/*=================================*/
- /* out: index, NULL if none exists */
- dict_table_t* table) /* in: table */
-{
- return(dict_table_get_first_index(table));
-}
-
-/************************************************************************
-Gets the next index on the table. */
-
-dict_index_t*
-dict_table_get_next_index_noninline(
-/*================================*/
- /* out: index, NULL if none left */
- dict_index_t* index) /* in: index */
-{
- return(dict_table_get_next_index(index));
-}
-
-/**************************************************************************
-Returns an index object. */
-
-dict_index_t*
-dict_table_get_index_noninline(
-/*===========================*/
- /* out: index, NULL if does not exist */
- dict_table_t* table, /* in: table */
- const char* name) /* in: index name */
-{
- return(dict_table_get_index(table, name));
-}
-
-/**************************************************************************
-Returns a column's name. */
-
-const char*
-dict_table_get_col_name(
-/*====================*/
- /* out: column name. NOTE: not
- guaranteed to stay valid if table is
- modified in any way (columns added,
- etc.). */
- const dict_table_t* table, /* in: table */
- ulint col_nr) /* in: column number */
-{
- ulint i;
- const char* s;
-
- ut_ad(table);
- ut_ad(col_nr < table->n_def);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- s = table->col_names;
- if (s) {
- for (i = 0; i < col_nr; i++) {
- s += strlen(s) + 1;
- }
- }
-
- return(s);
-}
-
-
-/************************************************************************
-Acquire the autoinc lock.*/
-
-void
-dict_table_autoinc_lock(
-/*====================*/
- dict_table_t* table)
-{
- mutex_enter(&table->autoinc_mutex);
-}
-
-/************************************************************************
-Unconditionally set the autoinc counter. */
-
-void
-dict_table_autoinc_initialize(
-/*==========================*/
- dict_table_t* table, /* in: table */
- ib_ulonglong value) /* in: next value to assign to a row */
-{
- ut_ad(mutex_own(&table->autoinc_mutex));
-
- table->autoinc = value;
-}
-
-/************************************************************************
-Reads the next autoinc value (== autoinc counter value), 0 if not yet
-initialized. */
-
-ib_ulonglong
-dict_table_autoinc_read(
-/*====================*/
- /* out: value for a new row, or 0 */
- dict_table_t* table) /* in: table */
-{
- ut_ad(mutex_own(&table->autoinc_mutex));
-
- return(table->autoinc);
-}
-
-/************************************************************************
-Updates the autoinc counter if the value supplied is greater than the
-current value. */
-
-void
-dict_table_autoinc_update_if_greater(
-/*=================================*/
-
- dict_table_t* table, /* in: table */
- ib_ulonglong value) /* in: value which was assigned to a row */
-{
- ut_ad(mutex_own(&table->autoinc_mutex));
-
- if (value > table->autoinc) {
-
- table->autoinc = value;
- }
-}
-
-/************************************************************************
-Release the autoinc lock.*/
-
-void
-dict_table_autoinc_unlock(
-/*======================*/
- dict_table_t* table) /* in: release autoinc lock for this table */
-{
- mutex_exit(&table->autoinc_mutex);
-}
-
-/************************************************************************
-Looks for column n in an index. */
-
-ulint
-dict_index_get_nth_col_pos(
-/*=======================*/
- /* out: position in internal representation
- of the index; if not contained, returns
- ULINT_UNDEFINED */
- dict_index_t* index, /* in: index */
- ulint n) /* in: column number */
-{
- const dict_field_t* field;
- const dict_col_t* col;
- ulint pos;
- ulint n_fields;
-
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- col = dict_table_get_nth_col(index->table, n);
-
- if (index->type & DICT_CLUSTERED) {
-
- return(dict_col_get_clust_pos(col, index));
- }
-
- n_fields = dict_index_get_n_fields(index);
-
- for (pos = 0; pos < n_fields; pos++) {
- field = dict_index_get_nth_field(index, pos);
-
- if (col == field->col && field->prefix_len == 0) {
-
- return(pos);
- }
- }
-
- return(ULINT_UNDEFINED);
-}
-
-/************************************************************************
-Returns TRUE if the index contains a column or a prefix of that column. */
-
-ibool
-dict_index_contains_col_or_prefix(
-/*==============================*/
- /* out: TRUE if contains the column or its
- prefix */
- dict_index_t* index, /* in: index */
- ulint n) /* in: column number */
-{
- const dict_field_t* field;
- const dict_col_t* col;
- ulint pos;
- ulint n_fields;
-
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- if (index->type & DICT_CLUSTERED) {
-
- return(TRUE);
- }
-
- col = dict_table_get_nth_col(index->table, n);
-
- n_fields = dict_index_get_n_fields(index);
-
- for (pos = 0; pos < n_fields; pos++) {
- field = dict_index_get_nth_field(index, pos);
-
- if (col == field->col) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/************************************************************************
-Looks for a matching field in an index. The column has to be the same. The
-column in index must be complete, or must contain a prefix longer than the
-column in index2. That is, we must be able to construct the prefix in index2
-from the prefix in index. */
-
-ulint
-dict_index_get_nth_field_pos(
-/*=========================*/
- /* out: position in internal representation
- of the index; if not contained, returns
- ULINT_UNDEFINED */
- dict_index_t* index, /* in: index from which to search */
- dict_index_t* index2, /* in: index */
- ulint n) /* in: field number in index2 */
-{
- dict_field_t* field;
- dict_field_t* field2;
- ulint n_fields;
- ulint pos;
-
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- field2 = dict_index_get_nth_field(index2, n);
-
- n_fields = dict_index_get_n_fields(index);
-
- for (pos = 0; pos < n_fields; pos++) {
- field = dict_index_get_nth_field(index, pos);
-
- if (field->col == field2->col
- && (field->prefix_len == 0
- || (field->prefix_len >= field2->prefix_len
- && field2->prefix_len != 0))) {
-
- return(pos);
- }
- }
-
- return(ULINT_UNDEFINED);
-}
-
-/**************************************************************************
-Returns a table object based on table id. */
-
-dict_table_t*
-dict_table_get_on_id(
-/*=================*/
- /* out: table, NULL if does not exist */
- dulint table_id, /* in: table id */
- trx_t* trx) /* in: transaction handle */
-{
- dict_table_t* table;
-
- if (ut_dulint_cmp(table_id, DICT_FIELDS_ID) <= 0
- || trx->dict_operation_lock_mode == RW_X_LATCH) {
- /* It is a system table which will always exist in the table
- cache: we avoid acquiring the dictionary mutex, because
- if we are doing a rollback to handle an error in TABLE
- CREATE, for example, we already have the mutex! */
-
- ut_ad(mutex_own(&(dict_sys->mutex))
- || trx->dict_operation_lock_mode == RW_X_LATCH);
-
- return(dict_table_get_on_id_low(table_id));
- }
-
- mutex_enter(&(dict_sys->mutex));
-
- table = dict_table_get_on_id_low(table_id);
-
- mutex_exit(&(dict_sys->mutex));
-
- return(table);
-}
-
-/************************************************************************
-Looks for column n position in the clustered index. */
-
-ulint
-dict_table_get_nth_col_pos(
-/*=======================*/
- /* out: position in internal representation
- of the clustered index */
- dict_table_t* table, /* in: table */
- ulint n) /* in: column number */
-{
- return(dict_index_get_nth_col_pos(dict_table_get_first_index(table),
- n));
-}
-
-/************************************************************************
-Check whether the table uses the compact page format. */
-
-ibool
-dict_table_is_comp_noninline(
-/*=========================*/
- /* out: TRUE if table uses the
- compact page format */
- const dict_table_t* table) /* in: table */
-{
- return(dict_table_is_comp(table));
-}
-
-/************************************************************************
-Checks if a column is in the ordering columns of the clustered index of a
-table. Column prefixes are treated like whole columns. */
-
-ibool
-dict_table_col_in_clustered_key(
-/*============================*/
- /* out: TRUE if the column, or its prefix, is
- in the clustered key */
- dict_table_t* table, /* in: table */
- ulint n) /* in: column number */
-{
- dict_index_t* index;
- const dict_field_t* field;
- const dict_col_t* col;
- ulint pos;
- ulint n_fields;
-
- ut_ad(table);
-
- col = dict_table_get_nth_col(table, n);
-
- index = dict_table_get_first_index(table);
-
- n_fields = dict_index_get_n_unique(index);
-
- for (pos = 0; pos < n_fields; pos++) {
- field = dict_index_get_nth_field(index, pos);
-
- if (col == field->col) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/**************************************************************************
-Inits the data dictionary module. */
-
-void
-dict_init(void)
-/*===========*/
-{
- dict_sys = mem_alloc(sizeof(dict_sys_t));
-
- mutex_create(&dict_sys->mutex, SYNC_DICT);
-
- dict_sys->table_hash = hash_create(buf_pool_get_max_size()
- / (DICT_POOL_PER_TABLE_HASH
- * UNIV_WORD_SIZE));
- dict_sys->table_id_hash = hash_create(buf_pool_get_max_size()
- / (DICT_POOL_PER_TABLE_HASH
- * UNIV_WORD_SIZE));
- dict_sys->size = 0;
-
- UT_LIST_INIT(dict_sys->table_LRU);
-
- rw_lock_create(&dict_operation_lock, SYNC_DICT_OPERATION);
-
- dict_foreign_err_file = os_file_create_tmpfile();
- ut_a(dict_foreign_err_file);
-
- mutex_create(&dict_foreign_err_mutex, SYNC_ANY_LATCH);
-}
-
-/**************************************************************************
-Returns a table object and optionally increment its MySQL open handle count.
-NOTE! This is a high-level function to be used mainly from outside the
-'dict' directory. Inside this directory dict_table_get_low is usually the
-appropriate function. */
-
-dict_table_t*
-dict_table_get(
-/*===========*/
- /* out: table, NULL if
- does not exist */
- const char* table_name, /* in: table name */
- ibool inc_mysql_count)
- /* in: whether to increment the open
- handle count on the table */
-{
- dict_table_t* table;
-
- mutex_enter(&(dict_sys->mutex));
-
- table = dict_table_get_low(table_name);
-
- if (inc_mysql_count && table) {
- table->n_mysql_handles_opened++;
- }
-
- mutex_exit(&(dict_sys->mutex));
-
- if (table != NULL) {
- if (!table->stat_initialized) {
- /* If table->ibd_file_missing == TRUE, this will
- print an error message and return without doing
- anything. */
- dict_update_statistics(table);
- }
- }
-
- return(table);
-}
-
-/**************************************************************************
-Adds system columns to a table object. */
-
-void
-dict_table_add_system_columns(
-/*==========================*/
- dict_table_t* table, /* in/out: table */
- mem_heap_t* heap) /* in: temporary heap */
-{
- ut_ad(table);
- ut_ad(table->n_def == table->n_cols - DATA_N_SYS_COLS);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
- ut_ad(!table->cached);
-
- /* NOTE: the system columns MUST be added in the following order
- (so that they can be indexed by the numerical value of DATA_ROW_ID,
- etc.) and as the last columns of the table memory object.
- The clustered index will not always physically contain all
- system columns. */
-
- dict_mem_table_add_col(table, heap, "DB_ROW_ID", DATA_SYS,
- DATA_ROW_ID | DATA_NOT_NULL,
- DATA_ROW_ID_LEN);
-#if DATA_ROW_ID != 0
-#error "DATA_ROW_ID != 0"
-#endif
- dict_mem_table_add_col(table, heap, "DB_TRX_ID", DATA_SYS,
- DATA_TRX_ID | DATA_NOT_NULL,
- DATA_TRX_ID_LEN);
-#if DATA_TRX_ID != 1
-#error "DATA_TRX_ID != 1"
-#endif
- dict_mem_table_add_col(table, heap, "DB_ROLL_PTR", DATA_SYS,
- DATA_ROLL_PTR | DATA_NOT_NULL,
- DATA_ROLL_PTR_LEN);
-#if DATA_ROLL_PTR != 2
-#error "DATA_ROLL_PTR != 2"
-#endif
-
- /* This check reminds that if a new system column is added to
- the program, it should be dealt with here */
-#if DATA_N_SYS_COLS != 3
-#error "DATA_N_SYS_COLS != 3"
-#endif
-}
-
-/**************************************************************************
-Adds a table object to the dictionary cache. */
-
-void
-dict_table_add_to_cache(
-/*====================*/
- dict_table_t* table, /* in: table */
- mem_heap_t* heap) /* in: temporary heap */
-{
- ulint fold;
- ulint id_fold;
- ulint i;
- ulint row_len;
-
- /* The lower limit for what we consider a "big" row */
-#define BIG_ROW_SIZE 1024
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- dict_table_add_system_columns(table, heap);
-
- table->cached = TRUE;
-
- fold = ut_fold_string(table->name);
- id_fold = ut_fold_dulint(table->id);
-
- row_len = 0;
- for (i = 0; i < table->n_def; i++) {
- ulint col_len = dict_col_get_max_size(
- dict_table_get_nth_col(table, i));
-
- row_len += col_len;
-
- /* If we have a single unbounded field, or several gigantic
- fields, mark the maximum row size as BIG_ROW_SIZE. */
- if (row_len >= BIG_ROW_SIZE || col_len >= BIG_ROW_SIZE) {
- row_len = BIG_ROW_SIZE;
-
- break;
- }
- }
-
- table->big_rows = row_len >= BIG_ROW_SIZE;
-
- /* Look for a table with the same name: error if such exists */
- {
- dict_table_t* table2;
- HASH_SEARCH(name_hash, dict_sys->table_hash, fold, table2,
- (ut_strcmp(table2->name, table->name) == 0));
- ut_a(table2 == NULL);
- }
-
- /* Look for a table with the same id: error if such exists */
- {
- dict_table_t* table2;
- HASH_SEARCH(id_hash, dict_sys->table_id_hash, id_fold, table2,
- (ut_dulint_cmp(table2->id, table->id) == 0));
- ut_a(table2 == NULL);
- }
-
- /* Add table to hash table of tables */
- HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold,
- table);
-
- /* Add table to hash table of tables based on table id */
- HASH_INSERT(dict_table_t, id_hash, dict_sys->table_id_hash, id_fold,
- table);
- /* Add table to LRU list of tables */
- UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_LRU, table);
-
- dict_sys->size += mem_heap_get_size(table->heap);
-}
-
-/**************************************************************************
-Looks for an index with the given id. NOTE that we do not reserve
-the dictionary mutex: this function is for emergency purposes like
-printing info of a corrupt database page! */
-
-dict_index_t*
-dict_index_find_on_id_low(
-/*======================*/
- /* out: index or NULL if not found from cache */
- dulint id) /* in: index id */
-{
- dict_table_t* table;
- dict_index_t* index;
-
- table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
-
- while (table) {
- index = dict_table_get_first_index(table);
-
- while (index) {
- if (0 == ut_dulint_cmp(id, index->id)) {
- /* Found */
-
- return(index);
- }
-
- index = dict_table_get_next_index(index);
- }
-
- table = UT_LIST_GET_NEXT(table_LRU, table);
- }
-
- return(NULL);
-}
-
-/**************************************************************************
-Renames a table object. */
-
-ibool
-dict_table_rename_in_cache(
-/*=======================*/
- /* out: TRUE if success */
- dict_table_t* table, /* in: table */
- const char* new_name, /* in: new name */
- ibool rename_also_foreigns)/* in: in ALTER TABLE we want
- to preserve the original table name
- in constraints which reference it */
-{
- dict_foreign_t* foreign;
- dict_index_t* index;
- ulint fold;
- ulint old_size;
- char* old_name;
- ibool success;
-
- ut_ad(table);
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- old_size = mem_heap_get_size(table->heap);
-
- fold = ut_fold_string(new_name);
-
- /* Look for a table with the same name: error if such exists */
- {
- dict_table_t* table2;
- HASH_SEARCH(name_hash, dict_sys->table_hash, fold, table2,
- (ut_strcmp(table2->name, new_name) == 0));
- if (table2) {
- fprintf(stderr,
- "InnoDB: Error: dictionary cache"
- " already contains a table of name %s\n",
- new_name);
- return(FALSE);
- }
- }
-
- /* If the table is stored in a single-table tablespace, rename the
- .ibd file */
-
- if (table->space != 0) {
- if (table->dir_path_of_temp_table != NULL) {
- fprintf(stderr,
- "InnoDB: Error: trying to rename a table"
- " %s (%s) created with CREATE\n"
- "InnoDB: TEMPORARY TABLE\n",
- table->name, table->dir_path_of_temp_table);
- success = FALSE;
- } else {
- success = fil_rename_tablespace(
- table->name, table->space, new_name);
- }
-
- if (!success) {
-
- return(FALSE);
- }
- }
-
- /* Remove table from the hash tables of tables */
- HASH_DELETE(dict_table_t, name_hash, dict_sys->table_hash,
- ut_fold_string(table->name), table);
- old_name = mem_heap_strdup(table->heap, table->name);
- table->name = mem_heap_strdup(table->heap, new_name);
-
- /* Add table to hash table of tables */
- HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold,
- table);
- dict_sys->size += (mem_heap_get_size(table->heap) - old_size);
-
- /* Update the table_name field in indexes */
- index = dict_table_get_first_index(table);
-
- while (index != NULL) {
- index->table_name = table->name;
-
- index = dict_table_get_next_index(index);
- }
-
- if (!rename_also_foreigns) {
- /* In ALTER TABLE we think of the rename table operation
- in the direction table -> temporary table (#sql...)
- as dropping the table with the old name and creating
- a new with the new name. Thus we kind of drop the
- constraints from the dictionary cache here. The foreign key
- constraints will be inherited to the new table from the
- system tables through a call of dict_load_foreigns. */
-
- /* Remove the foreign constraints from the cache */
- foreign = UT_LIST_GET_LAST(table->foreign_list);
-
- while (foreign != NULL) {
- dict_foreign_remove_from_cache(foreign);
- foreign = UT_LIST_GET_LAST(table->foreign_list);
- }
-
- /* Reset table field in referencing constraints */
-
- foreign = UT_LIST_GET_FIRST(table->referenced_list);
-
- while (foreign != NULL) {
- foreign->referenced_table = NULL;
- foreign->referenced_index = NULL;
-
- foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
- }
-
- /* Make the list of referencing constraints empty */
-
- UT_LIST_INIT(table->referenced_list);
-
- return(TRUE);
- }
-
- /* Update the table name fields in foreign constraints, and update also
- the constraint id of new format >= 4.0.18 constraints. Note that at
- this point we have already changed table->name to the new name. */
-
- foreign = UT_LIST_GET_FIRST(table->foreign_list);
-
- while (foreign != NULL) {
- if (ut_strlen(foreign->foreign_table_name)
- < ut_strlen(table->name)) {
- /* Allocate a longer name buffer;
- TODO: store buf len to save memory */
-
- foreign->foreign_table_name
- = mem_heap_alloc(foreign->heap,
- ut_strlen(table->name) + 1);
- }
-
- strcpy(foreign->foreign_table_name, table->name);
-
- if (strchr(foreign->id, '/')) {
- ulint db_len;
- char* old_id;
-
- /* This is a >= 4.0.18 format id */
-
- old_id = mem_strdup(foreign->id);
-
- if (ut_strlen(foreign->id) > ut_strlen(old_name)
- + ((sizeof dict_ibfk) - 1)
- && !memcmp(foreign->id, old_name,
- ut_strlen(old_name))
- && !memcmp(foreign->id + ut_strlen(old_name),
- dict_ibfk, (sizeof dict_ibfk) - 1)) {
-
- /* This is a generated >= 4.0.18 format id */
-
- if (strlen(table->name) > strlen(old_name)) {
- foreign->id = mem_heap_alloc(
- foreign->heap,
- strlen(table->name)
- + strlen(old_id) + 1);
- }
-
- /* Replace the prefix 'databasename/tablename'
- with the new names */
- strcpy(foreign->id, table->name);
- strcat(foreign->id,
- old_id + ut_strlen(old_name));
- } else {
- /* This is a >= 4.0.18 format id where the user
- gave the id name */
- db_len = dict_get_db_name_len(table->name) + 1;
-
- if (dict_get_db_name_len(table->name)
- > dict_get_db_name_len(foreign->id)) {
-
- foreign->id = mem_heap_alloc(
- foreign->heap,
- db_len + strlen(old_id) + 1);
- }
-
- /* Replace the database prefix in id with the
- one from table->name */
-
- ut_memcpy(foreign->id, table->name, db_len);
-
- strcpy(foreign->id + db_len,
- dict_remove_db_name(old_id));
- }
-
- mem_free(old_id);
- }
-
- foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
- }
-
- foreign = UT_LIST_GET_FIRST(table->referenced_list);
-
- while (foreign != NULL) {
- if (ut_strlen(foreign->referenced_table_name)
- < ut_strlen(table->name)) {
- /* Allocate a longer name buffer;
- TODO: store buf len to save memory */
-
- foreign->referenced_table_name = mem_heap_alloc(
- foreign->heap, strlen(table->name) + 1);
- }
-
- strcpy(foreign->referenced_table_name, table->name);
-
- foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
- }
-
- return(TRUE);
-}
-
-/**************************************************************************
-Change the id of a table object in the dictionary cache. This is used in
-DISCARD TABLESPACE. */
-
-void
-dict_table_change_id_in_cache(
-/*==========================*/
- dict_table_t* table, /* in: table object already in cache */
- dulint new_id) /* in: new id to set */
-{
- ut_ad(table);
- ut_ad(mutex_own(&(dict_sys->mutex)));
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- /* Remove the table from the hash table of id's */
-
- HASH_DELETE(dict_table_t, id_hash, dict_sys->table_id_hash,
- ut_fold_dulint(table->id), table);
- table->id = new_id;
-
- /* Add the table back to the hash table */
- HASH_INSERT(dict_table_t, id_hash, dict_sys->table_id_hash,
- ut_fold_dulint(table->id), table);
-}
-
-/**************************************************************************
-Removes a table object from the dictionary cache. */
-
-void
-dict_table_remove_from_cache(
-/*=========================*/
- dict_table_t* table) /* in, own: table */
-{
- dict_foreign_t* foreign;
- dict_index_t* index;
- ulint size;
-
- ut_ad(table);
- ut_ad(mutex_own(&(dict_sys->mutex)));
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
-#if 0
- fputs("Removing table ", stderr);
- ut_print_name(stderr, table->name, ULINT_UNDEFINED);
- fputs(" from dictionary cache\n", stderr);
-#endif
-
- /* Remove the foreign constraints from the cache */
- foreign = UT_LIST_GET_LAST(table->foreign_list);
-
- while (foreign != NULL) {
- dict_foreign_remove_from_cache(foreign);
- foreign = UT_LIST_GET_LAST(table->foreign_list);
- }
-
- /* Reset table field in referencing constraints */
-
- foreign = UT_LIST_GET_FIRST(table->referenced_list);
-
- while (foreign != NULL) {
- foreign->referenced_table = NULL;
- foreign->referenced_index = NULL;
-
- foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
- }
-
- /* Remove the indexes from the cache */
- index = UT_LIST_GET_LAST(table->indexes);
-
- while (index != NULL) {
- dict_index_remove_from_cache(table, index);
- index = UT_LIST_GET_LAST(table->indexes);
- }
-
- /* Remove table from the hash tables of tables */
- HASH_DELETE(dict_table_t, name_hash, dict_sys->table_hash,
- ut_fold_string(table->name), table);
- HASH_DELETE(dict_table_t, id_hash, dict_sys->table_id_hash,
- ut_fold_dulint(table->id), table);
-
- /* Remove table from LRU list of tables */
- UT_LIST_REMOVE(table_LRU, dict_sys->table_LRU, table);
-
- size = mem_heap_get_size(table->heap);
-
- ut_ad(dict_sys->size >= size);
-
- dict_sys->size -= size;
-
- dict_mem_table_free(table);
-}
-
-/*************************************************************************
-Gets the column position in the clustered index. */
-
-ulint
-dict_col_get_clust_pos_noninline(
-/*=============================*/
- const dict_col_t* col, /* in: table column */
- const dict_index_t* clust_index) /* in: clustered index */
-{
- return(dict_col_get_clust_pos(col, clust_index));
-}
-
-/********************************************************************
-If the given column name is reserved for InnoDB system columns, return
-TRUE. */
-
-ibool
-dict_col_name_is_reserved(
-/*======================*/
- /* out: TRUE if name is reserved */
- const char* name) /* in: column name */
-{
- /* This check reminds that if a new system column is added to
- the program, it should be dealt with here. */
-#if DATA_N_SYS_COLS != 3
-#error "DATA_N_SYS_COLS != 3"
-#endif
-
- static const char* reserved_names[] = {
- "DB_ROW_ID", "DB_TRX_ID", "DB_ROLL_PTR"
- };
-
- ulint i;
-
- for (i = 0; i < UT_ARR_SIZE(reserved_names); i++) {
- if (strcmp(name, reserved_names[i]) == 0) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/**************************************************************************
-Adds an index to the dictionary cache. */
-
-void
-dict_index_add_to_cache(
-/*====================*/
- dict_table_t* table, /* in: table on which the index is */
- dict_index_t* index, /* in, own: index; NOTE! The index memory
- object is freed in this function! */
- ulint page_no)/* in: root page number of the index */
-{
- dict_index_t* new_index;
- ulint n_ord;
- ulint i;
-
- ut_ad(index);
- ut_ad(mutex_own(&(dict_sys->mutex)));
- ut_ad(index->n_def == index->n_fields);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- ut_ad(mem_heap_validate(index->heap));
-
-#ifdef UNIV_DEBUG
- {
- dict_index_t* index2;
- index2 = UT_LIST_GET_FIRST(table->indexes);
-
- while (index2 != NULL) {
- ut_ad(ut_strcmp(index->name, index2->name) != 0);
-
- index2 = UT_LIST_GET_NEXT(indexes, index2);
- }
- }
-#endif /* UNIV_DEBUG */
-
- ut_a(!(index->type & DICT_CLUSTERED)
- || UT_LIST_GET_LEN(table->indexes) == 0);
-
- dict_index_find_cols(table, index);
-
- /* Build the cache internal representation of the index,
- containing also the added system fields */
-
- if (index->type & DICT_CLUSTERED) {
- new_index = dict_index_build_internal_clust(table, index);
- } else {
- new_index = dict_index_build_internal_non_clust(table, index);
- }
-
- new_index->search_info = btr_search_info_create(new_index->heap);
-
- /* Set the n_fields value in new_index to the actual defined
- number of fields in the cache internal representation */
-
- new_index->n_fields = new_index->n_def;
-
- /* Add the new index as the last index for the table */
-
- UT_LIST_ADD_LAST(indexes, table->indexes, new_index);
- new_index->table = table;
- new_index->table_name = table->name;
-
- /* Increment the ord_part counts in columns which are ordering */
-
- if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) {
- n_ord = new_index->n_fields;
- } else {
- n_ord = dict_index_get_n_unique(new_index);
- }
-
- for (i = 0; i < n_ord; i++) {
-
- dict_index_get_nth_field(new_index, i)->col->ord_part = 1;
- }
-
- new_index->page = (unsigned int) page_no;
- rw_lock_create(&new_index->lock, SYNC_INDEX_TREE);
-
- if (!UNIV_UNLIKELY(new_index->type & DICT_UNIVERSAL)) {
-
- new_index->stat_n_diff_key_vals = mem_heap_alloc(
- new_index->heap,
- (1 + dict_index_get_n_unique(new_index))
- * sizeof(ib_longlong));
- /* Give some sensible values to stat_n_... in case we do
- not calculate statistics quickly enough */
-
- for (i = 0; i <= dict_index_get_n_unique(new_index); i++) {
-
- new_index->stat_n_diff_key_vals[i] = 100;
- }
- }
-
- dict_sys->size += mem_heap_get_size(new_index->heap);
-
- dict_mem_index_free(index);
-}
-
-/**************************************************************************
-Removes an index from the dictionary cache. */
-static
-void
-dict_index_remove_from_cache(
-/*=========================*/
- dict_table_t* table, /* in: table */
- dict_index_t* index) /* in, own: index */
-{
- ulint size;
- ulint retries = 0;
- btr_search_t* info;
-
- ut_ad(table && index);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- /* We always create search info whether or not adaptive
- hash index is enabled or not. */
- info = index->search_info;
- ut_ad(info);
-
- /* We are not allowed to free the in-memory index struct
- dict_index_t until all entries in the adaptive hash index
- that point to any of the page belonging to his b-tree index
- are dropped. This is so because dropping of these entries
- require access to dict_index_t struct. To avoid such scenario
- We keep a count of number of such pages in the search_info and
- only free the dict_index_t struct when this count drops to
- zero. */
-
- for (;;) {
- ulint ref_count = btr_search_info_get_ref_count(info);
- if (ref_count == 0) {
- break;
- }
-
- /* Sleep for 10ms before trying again. */
- os_thread_sleep(10000);
- ++retries;
-
- if (retries % 500 == 0) {
- /* No luck after 5 seconds of wait. */
- fprintf(stderr, "InnoDB: Error: Waited for"
- " %lu secs for hash index"
- " ref_count (%lu) to drop"
- " to 0.\n"
- "index: \"%s\""
- " table: \"%s\"\n",
- retries/100,
- ref_count,
- index->name,
- table->name);
- }
-
- /* To avoid a hang here we commit suicide if the
- ref_count doesn't drop to zero in 600 seconds. */
- if (retries >= 60000) {
- ut_error;
- }
- }
-
- rw_lock_free(&index->lock);
-
- /* Remove the index from the list of indexes of the table */
- UT_LIST_REMOVE(indexes, table->indexes, index);
-
- size = mem_heap_get_size(index->heap);
-
- ut_ad(dict_sys->size >= size);
-
- dict_sys->size -= size;
-
- dict_mem_index_free(index);
-}
-
-/***********************************************************************
-Tries to find column names for the index and sets the col field of the
-index. */
-static
-void
-dict_index_find_cols(
-/*=================*/
- dict_table_t* table, /* in: table */
- dict_index_t* index) /* in: index */
-{
- ulint i;
-
- ut_ad(table && index);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- for (i = 0; i < index->n_fields; i++) {
- ulint j;
- dict_field_t* field = dict_index_get_nth_field(index, i);
-
- for (j = 0; j < table->n_cols; j++) {
- if (!strcmp(dict_table_get_col_name(table, j),
- field->name)) {
- field->col = (dict_col_t*)
- dict_table_get_nth_col(table, j);
-
- goto found;
- }
- }
-
- /* It is an error not to find a matching column. */
- ut_error;
-
- found:
- ;
- }
-}
-
-/***********************************************************************
-Adds a column to index. */
-
-void
-dict_index_add_col(
-/*===============*/
- dict_index_t* index, /* in: index */
- dict_table_t* table, /* in: table */
- dict_col_t* col, /* in: column */
- ulint prefix_len) /* in: column prefix length */
-{
- dict_field_t* field;
- const char* col_name;
-
- col_name = dict_table_get_col_name(table, dict_col_get_no(col));
-
- dict_mem_index_add_field(index, col_name, prefix_len);
-
- field = dict_index_get_nth_field(index, index->n_def - 1);
-
- field->col = col;
- field->fixed_len = (unsigned int) dict_col_get_fixed_size(col);
-
- if (prefix_len && field->fixed_len > prefix_len) {
- field->fixed_len = (unsigned int) prefix_len;
- }
-
- /* Long fixed-length fields that need external storage are treated as
- variable-length fields, so that the extern flag can be embedded in
- the length word. */
-
- if (field->fixed_len > DICT_MAX_INDEX_COL_LEN) {
- field->fixed_len = 0;
- }
-#if DICT_MAX_INDEX_COL_LEN != 768
- /* The comparison limit above must be constant. If it were
- changed, the disk format of some fixed-length columns would
- change, which would be a disaster. */
-# error "DICT_MAX_INDEX_COL_LEN != 768"
-#endif
-
- if (!(col->prtype & DATA_NOT_NULL)) {
- index->n_nullable++;
- }
-}
-
-/***********************************************************************
-Copies fields contained in index2 to index1. */
-static
-void
-dict_index_copy(
-/*============*/
- dict_index_t* index1, /* in: index to copy to */
- dict_index_t* index2, /* in: index to copy from */
- dict_table_t* table, /* in: table */
- ulint start, /* in: first position to copy */
- ulint end) /* in: last position to copy */
-{
- dict_field_t* field;
- ulint i;
-
- /* Copy fields contained in index2 */
-
- for (i = start; i < end; i++) {
-
- field = dict_index_get_nth_field(index2, i);
- dict_index_add_col(index1, table, field->col,
- field->prefix_len);
- }
-}
-
-/***********************************************************************
-Copies types of fields contained in index to tuple. */
-
-void
-dict_index_copy_types(
-/*==================*/
- dtuple_t* tuple, /* in: data tuple */
- dict_index_t* index, /* in: index */
- ulint n_fields) /* in: number of field types to copy */
-{
- ulint i;
-
- if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) {
- dtuple_set_types_binary(tuple, n_fields);
-
- return;
- }
-
- for (i = 0; i < n_fields; i++) {
- dict_field_t* ifield;
- dtype_t* dfield_type;
-
- ifield = dict_index_get_nth_field(index, i);
- dfield_type = dfield_get_type(dtuple_get_nth_field(tuple, i));
- dict_col_copy_type(dict_field_get_col(ifield), dfield_type);
- }
-}
-
-/***********************************************************************
-Copies types of columns contained in table to tuple. */
-
-void
-dict_table_copy_types(
-/*==================*/
- dtuple_t* tuple, /* in: data tuple */
- dict_table_t* table) /* in: index */
-{
- dtype_t* dfield_type;
- ulint i;
-
- for (i = 0; i < dtuple_get_n_fields(tuple); i++) {
-
- dfield_type = dfield_get_type(dtuple_get_nth_field(tuple, i));
- dict_col_copy_type(dict_table_get_nth_col(table, i),
- dfield_type);
- }
-}
-
-/***********************************************************************
-Builds the internal dictionary cache representation for a clustered
-index, containing also system fields not defined by the user. */
-static
-dict_index_t*
-dict_index_build_internal_clust(
-/*============================*/
- /* out, own: the internal representation
- of the clustered index */
- dict_table_t* table, /* in: table */
- dict_index_t* index) /* in: user representation of a clustered
- index */
-{
- dict_index_t* new_index;
- dict_field_t* field;
- ulint fixed_size;
- ulint trx_id_pos;
- ulint i;
- ibool* indexed;
-
- ut_ad(table && index);
- ut_ad(index->type & DICT_CLUSTERED);
- ut_ad(mutex_own(&(dict_sys->mutex)));
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- /* Create a new index object with certainly enough fields */
- new_index = dict_mem_index_create(table->name,
- index->name, table->space,
- index->type,
- index->n_fields + table->n_cols);
-
- /* Copy other relevant data from the old index struct to the new
- struct: it inherits the values */
-
- new_index->n_user_defined_cols = index->n_fields;
-
- new_index->id = index->id;
-
- /* Copy the fields of index */
- dict_index_copy(new_index, index, table, 0, index->n_fields);
-
- if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) {
- /* No fixed number of fields determines an entry uniquely */
-
- new_index->n_uniq = REC_MAX_N_FIELDS;
-
- } else if (index->type & DICT_UNIQUE) {
- /* Only the fields defined so far are needed to identify
- the index entry uniquely */
-
- new_index->n_uniq = new_index->n_def;
- } else {
- /* Also the row id is needed to identify the entry */
- new_index->n_uniq = 1 + new_index->n_def;
- }
-
- new_index->trx_id_offset = 0;
-
- if (!(index->type & DICT_IBUF)) {
- /* Add system columns, trx id first */
-
- trx_id_pos = new_index->n_def;
-
-#if DATA_ROW_ID != 0
-# error "DATA_ROW_ID != 0"
-#endif
-#if DATA_TRX_ID != 1
-# error "DATA_TRX_ID != 1"
-#endif
-#if DATA_ROLL_PTR != 2
-# error "DATA_ROLL_PTR != 2"
-#endif
-
- if (!(index->type & DICT_UNIQUE)) {
- dict_index_add_col(new_index, table, (dict_col_t*)
- dict_table_get_sys_col(
- table, DATA_ROW_ID),
- 0);
- trx_id_pos++;
- }
-
- dict_index_add_col(new_index, table, (dict_col_t*)
- dict_table_get_sys_col(table, DATA_TRX_ID),
- 0);
-
- dict_index_add_col(new_index, table, (dict_col_t*)
- dict_table_get_sys_col(table,
- DATA_ROLL_PTR),
- 0);
-
- for (i = 0; i < trx_id_pos; i++) {
-
- fixed_size = dict_col_get_fixed_size(
- dict_index_get_nth_col(new_index, i));
-
- if (fixed_size == 0) {
- new_index->trx_id_offset = 0;
-
- break;
- }
-
- if (dict_index_get_nth_field(new_index, i)->prefix_len
- > 0) {
- new_index->trx_id_offset = 0;
-
- break;
- }
-
- new_index->trx_id_offset += (unsigned int) fixed_size;
- }
-
- }
-
- /* Remember the table columns already contained in new_index */
- indexed = mem_alloc(table->n_cols * sizeof *indexed);
- memset(indexed, 0, table->n_cols * sizeof *indexed);
-
- /* Mark with 0 the table columns already contained in new_index */
- for (i = 0; i < new_index->n_def; i++) {
-
- field = dict_index_get_nth_field(new_index, i);
-
- /* If there is only a prefix of the column in the index
- field, do not mark the column as contained in the index */
-
- if (field->prefix_len == 0) {
-
- indexed[field->col->ind] = TRUE;
- }
- }
-
- /* Add to new_index non-system columns of table not yet included
- there */
- for (i = 0; i + DATA_N_SYS_COLS < (ulint) table->n_cols; i++) {
-
- dict_col_t* col = (dict_col_t*)
- dict_table_get_nth_col(table, i);
- ut_ad(col->mtype != DATA_SYS);
-
- if (!indexed[col->ind]) {
- dict_index_add_col(new_index, table, col, 0);
- }
- }
-
- mem_free(indexed);
-
- ut_ad((index->type & DICT_IBUF)
- || (UT_LIST_GET_LEN(table->indexes) == 0));
-
- new_index->cached = TRUE;
-
- return(new_index);
-}
-
-/***********************************************************************
-Builds the internal dictionary cache representation for a non-clustered
-index, containing also system fields not defined by the user. */
-static
-dict_index_t*
-dict_index_build_internal_non_clust(
-/*================================*/
- /* out, own: the internal representation
- of the non-clustered index */
- dict_table_t* table, /* in: table */
- dict_index_t* index) /* in: user representation of a non-clustered
- index */
-{
- dict_field_t* field;
- dict_index_t* new_index;
- dict_index_t* clust_index;
- ulint i;
- ibool* indexed;
-
- ut_ad(table && index);
- ut_ad(0 == (index->type & DICT_CLUSTERED));
- ut_ad(mutex_own(&(dict_sys->mutex)));
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- /* The clustered index should be the first in the list of indexes */
- clust_index = UT_LIST_GET_FIRST(table->indexes);
-
- ut_ad(clust_index);
- ut_ad(clust_index->type & DICT_CLUSTERED);
- ut_ad(!(clust_index->type & DICT_UNIVERSAL));
-
- /* Create a new index */
- new_index = dict_mem_index_create(
- table->name, index->name, index->space, index->type,
- index->n_fields + 1 + clust_index->n_uniq);
-
- /* Copy other relevant data from the old index
- struct to the new struct: it inherits the values */
-
- new_index->n_user_defined_cols = index->n_fields;
-
- new_index->id = index->id;
-
- /* Copy fields from index to new_index */
- dict_index_copy(new_index, index, table, 0, index->n_fields);
-
- /* Remember the table columns already contained in new_index */
- indexed = mem_alloc(table->n_cols * sizeof *indexed);
- memset(indexed, 0, table->n_cols * sizeof *indexed);
-
- /* Mark with 0 table columns already contained in new_index */
- for (i = 0; i < new_index->n_def; i++) {
-
- field = dict_index_get_nth_field(new_index, i);
-
- /* If there is only a prefix of the column in the index
- field, do not mark the column as contained in the index */
-
- if (field->prefix_len == 0) {
-
- indexed[field->col->ind] = TRUE;
- }
- }
-
- /* Add to new_index the columns necessary to determine the clustered
- index entry uniquely */
-
- for (i = 0; i < clust_index->n_uniq; i++) {
-
- field = dict_index_get_nth_field(clust_index, i);
-
- if (!indexed[field->col->ind]) {
- dict_index_add_col(new_index, table, field->col,
- field->prefix_len);
- }
- }
-
- mem_free(indexed);
-
- if ((index->type) & DICT_UNIQUE) {
- new_index->n_uniq = index->n_fields;
- } else {
- new_index->n_uniq = new_index->n_def;
- }
-
- /* Set the n_fields value in new_index to the actual defined
- number of fields */
-
- new_index->n_fields = new_index->n_def;
-
- new_index->cached = TRUE;
-
- return(new_index);
-}
-
-/*====================== FOREIGN KEY PROCESSING ========================*/
-
-/*************************************************************************
-Checks if a table is referenced by foreign keys. */
-
-ibool
-dict_table_referenced_by_foreign_key(
-/*=================================*/
- /* out: TRUE if table is referenced by a
- foreign key */
- dict_table_t* table) /* in: InnoDB table */
-{
- if (UT_LIST_GET_LEN(table->referenced_list) > 0) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*************************************************************************
-Frees a foreign key struct. */
-static
-void
-dict_foreign_free(
-/*==============*/
- dict_foreign_t* foreign) /* in, own: foreign key struct */
-{
- mem_heap_free(foreign->heap);
-}
-
-/**************************************************************************
-Removes a foreign constraint struct from the dictionary cache. */
-static
-void
-dict_foreign_remove_from_cache(
-/*===========================*/
- dict_foreign_t* foreign) /* in, own: foreign constraint */
-{
- ut_ad(mutex_own(&(dict_sys->mutex)));
- ut_a(foreign);
-
- if (foreign->referenced_table) {
- UT_LIST_REMOVE(referenced_list,
- foreign->referenced_table->referenced_list,
- foreign);
- }
-
- if (foreign->foreign_table) {
- UT_LIST_REMOVE(foreign_list,
- foreign->foreign_table->foreign_list,
- foreign);
- }
-
- dict_foreign_free(foreign);
-}
-
-/**************************************************************************
-Looks for the foreign constraint from the foreign and referenced lists
-of a table. */
-static
-dict_foreign_t*
-dict_foreign_find(
-/*==============*/
- /* out: foreign constraint */
- dict_table_t* table, /* in: table object */
- const char* id) /* in: foreign constraint id */
-{
- dict_foreign_t* foreign;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- foreign = UT_LIST_GET_FIRST(table->foreign_list);
-
- while (foreign) {
- if (ut_strcmp(id, foreign->id) == 0) {
-
- return(foreign);
- }
-
- foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
- }
-
- foreign = UT_LIST_GET_FIRST(table->referenced_list);
-
- while (foreign) {
- if (ut_strcmp(id, foreign->id) == 0) {
-
- return(foreign);
- }
-
- foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
- }
-
- return(NULL);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*************************************************************************
-Tries to find an index whose first fields are the columns in the array,
-in the same order. */
-static
-dict_index_t*
-dict_foreign_find_index(
-/*====================*/
- /* out: matching index, NULL if not found */
- dict_table_t* table, /* in: table */
- const char** columns,/* in: array of column names */
- ulint n_cols, /* in: number of columns */
- dict_index_t* types_idx, /* in: NULL or an index to whose types the
- column types must match */
- ibool check_charsets,
- /* in: whether to check charsets.
- only has an effect if types_idx != NULL */
- ulint check_null)
- /* in: nonzero if none of the columns must
- be declared NOT NULL */
-{
- dict_index_t* index;
- dict_field_t* field;
- const char* col_name;
- ulint i;
-
- index = dict_table_get_first_index(table);
-
- while (index != NULL) {
- if (dict_index_get_n_fields(index) >= n_cols) {
-
- for (i = 0; i < n_cols; i++) {
- field = dict_index_get_nth_field(index, i);
-
- col_name = dict_table_get_col_name(
- table, dict_col_get_no(field->col));
-
- if (field->prefix_len != 0) {
- /* We do not accept column prefix
- indexes here */
-
- break;
- }
-
- if (0 != innobase_strcasecmp(columns[i],
- col_name)) {
- break;
- }
-
- if (check_null
- && (field->col->prtype & DATA_NOT_NULL)) {
-
- return(NULL);
- }
-
- if (types_idx && !cmp_cols_are_equal(
- dict_index_get_nth_col(index, i),
- dict_index_get_nth_col(types_idx,
- i),
- check_charsets)) {
-
- break;
- }
- }
-
- if (i == n_cols) {
- /* We found a matching index */
-
- return(index);
- }
- }
-
- index = dict_table_get_next_index(index);
- }
-
- return(NULL);
-}
-
-/**************************************************************************
-Report an error in a foreign key definition. */
-static
-void
-dict_foreign_error_report_low(
-/*==========================*/
- FILE* file, /* in: output stream */
- const char* name) /* in: table name */
-{
- rewind(file);
- ut_print_timestamp(file);
- fprintf(file, " Error in foreign key constraint of table %s:\n",
- name);
-}
-
-/**************************************************************************
-Report an error in a foreign key definition. */
-static
-void
-dict_foreign_error_report(
-/*======================*/
- FILE* file, /* in: output stream */
- dict_foreign_t* fk, /* in: foreign key constraint */
- const char* msg) /* in: the error message */
-{
- mutex_enter(&dict_foreign_err_mutex);
- dict_foreign_error_report_low(file, fk->foreign_table_name);
- fputs(msg, file);
- fputs(" Constraint:\n", file);
- dict_print_info_on_foreign_key_in_create_format(file, NULL, fk, TRUE);
- putc('\n', file);
- if (fk->foreign_index) {
- fputs("The index in the foreign key in table is ", file);
- ut_print_name(file, NULL, FALSE, fk->foreign_index->name);
- fputs("\n"
- "See http://dev.mysql.com/doc/refman/5.1/en/"
- "innodb-foreign-key-constraints.html\n"
- "for correct foreign key definition.\n",
- file);
- }
- mutex_exit(&dict_foreign_err_mutex);
-}
-
-/**************************************************************************
-Adds a foreign key constraint object to the dictionary cache. May free
-the object if there already is an object with the same identifier in.
-At least one of the foreign table and the referenced table must already
-be in the dictionary cache! */
-
-ulint
-dict_foreign_add_to_cache(
-/*======================*/
- /* out: DB_SUCCESS or error code */
- dict_foreign_t* foreign, /* in, own: foreign key constraint */
- ibool check_charsets) /* in: TRUE=check charset
- compatibility */
-{
- dict_table_t* for_table;
- dict_table_t* ref_table;
- dict_foreign_t* for_in_cache = NULL;
- dict_index_t* index;
- ibool added_to_referenced_list= FALSE;
- FILE* ef = dict_foreign_err_file;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- for_table = dict_table_check_if_in_cache_low(
- foreign->foreign_table_name);
-
- ref_table = dict_table_check_if_in_cache_low(
- foreign->referenced_table_name);
- ut_a(for_table || ref_table);
-
- if (for_table) {
- for_in_cache = dict_foreign_find(for_table, foreign->id);
- }
-
- if (!for_in_cache && ref_table) {
- for_in_cache = dict_foreign_find(ref_table, foreign->id);
- }
-
- if (for_in_cache) {
- /* Free the foreign object */
- mem_heap_free(foreign->heap);
- } else {
- for_in_cache = foreign;
- }
-
- if (for_in_cache->referenced_table == NULL && ref_table) {
- index = dict_foreign_find_index(
- ref_table,
- (const char**) for_in_cache->referenced_col_names,
- for_in_cache->n_fields, for_in_cache->foreign_index,
- check_charsets, FALSE);
-
- if (index == NULL) {
- dict_foreign_error_report(
- ef, for_in_cache,
- "there is no index in referenced table"
- " which would contain\n"
- "the columns as the first columns,"
- " or the data types in the\n"
- "referenced table do not match"
- " the ones in table.");
-
- if (for_in_cache == foreign) {
- mem_heap_free(foreign->heap);
- }
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- for_in_cache->referenced_table = ref_table;
- for_in_cache->referenced_index = index;
- UT_LIST_ADD_LAST(referenced_list,
- ref_table->referenced_list,
- for_in_cache);
- added_to_referenced_list = TRUE;
- }
-
- if (for_in_cache->foreign_table == NULL && for_table) {
- index = dict_foreign_find_index(
- for_table,
- (const char**) for_in_cache->foreign_col_names,
- for_in_cache->n_fields,
- for_in_cache->referenced_index, check_charsets,
- for_in_cache->type
- & (DICT_FOREIGN_ON_DELETE_SET_NULL
- | DICT_FOREIGN_ON_UPDATE_SET_NULL));
-
- if (index == NULL) {
- dict_foreign_error_report(
- ef, for_in_cache,
- "there is no index in the table"
- " which would contain\n"
- "the columns as the first columns,"
- " or the data types in the\n"
- "table do not match"
- " the ones in the referenced table\n"
- "or one of the ON ... SET NULL columns"
- " is declared NOT NULL.");
-
- if (for_in_cache == foreign) {
- if (added_to_referenced_list) {
- UT_LIST_REMOVE(
- referenced_list,
- ref_table->referenced_list,
- for_in_cache);
- }
-
- mem_heap_free(foreign->heap);
- }
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- for_in_cache->foreign_table = for_table;
- for_in_cache->foreign_index = index;
- UT_LIST_ADD_LAST(foreign_list,
- for_table->foreign_list,
- for_in_cache);
- }
-
- return(DB_SUCCESS);
-}
-
-/*************************************************************************
-Scans from pointer onwards. Stops if is at the start of a copy of
-'string' where characters are compared without case sensitivity, and
-only outside `` or "" quotes. Stops also at '\0'. */
-
-const char*
-dict_scan_to(
-/*=========*/
- /* out: scanned up to this */
- const char* ptr, /* in: scan from */
- const char* string) /* in: look for this */
-{
- char quote = '\0';
-
- for (; *ptr; ptr++) {
- if (*ptr == quote) {
- /* Closing quote character: do not look for
- starting quote or the keyword. */
- quote = '\0';
- } else if (quote) {
- /* Within quotes: do nothing. */
- } else if (*ptr == '`' || *ptr == '"') {
- /* Starting quote: remember the quote character. */
- quote = *ptr;
- } else {
- /* Outside quotes: look for the keyword. */
- ulint i;
- for (i = 0; string[i]; i++) {
- if (toupper((int)(unsigned char)(ptr[i]))
- != toupper((int)(unsigned char)
- (string[i]))) {
- goto nomatch;
- }
- }
- break;
-nomatch:
- ;
- }
- }
-
- return(ptr);
-}
-
-/*************************************************************************
-Accepts a specified string. Comparisons are case-insensitive. */
-static
-const char*
-dict_accept(
-/*========*/
- /* out: if string was accepted, the pointer
- is moved after that, else ptr is returned */
- struct charset_info_st* cs,/* in: the character set of ptr */
- const char* ptr, /* in: scan from this */
- const char* string, /* in: accept only this string as the next
- non-whitespace string */
- ibool* success)/* out: TRUE if accepted */
-{
- const char* old_ptr = ptr;
- const char* old_ptr2;
-
- *success = FALSE;
-
- while (my_isspace(cs, *ptr)) {
- ptr++;
- }
-
- old_ptr2 = ptr;
-
- ptr = dict_scan_to(ptr, string);
-
- if (*ptr == '\0' || old_ptr2 != ptr) {
- return(old_ptr);
- }
-
- *success = TRUE;
-
- return(ptr + ut_strlen(string));
-}
-
-/*************************************************************************
-Scans an id. For the lexical definition of an 'id', see the code below.
-Strips backquotes or double quotes from around the id. */
-static
-const char*
-dict_scan_id(
-/*=========*/
- /* out: scanned to */
- struct charset_info_st* cs,/* in: the character set of ptr */
- const char* ptr, /* in: scanned to */
- mem_heap_t* heap, /* in: heap where to allocate the id
- (NULL=id will not be allocated, but it
- will point to string near ptr) */
- const char** id, /* out,own: the id; NULL if no id was
- scannable */
- ibool table_id,/* in: TRUE=convert the allocated id
- as a table name; FALSE=convert to UTF-8 */
- ibool accept_also_dot)
- /* in: TRUE if also a dot can appear in a
- non-quoted id; in a quoted id it can appear
- always */
-{
- char quote = '\0';
- ulint len = 0;
- const char* s;
- char* str;
- char* dst;
-
- *id = NULL;
-
- while (my_isspace(cs, *ptr)) {
- ptr++;
- }
-
- if (*ptr == '\0') {
-
- return(ptr);
- }
-
- if (*ptr == '`' || *ptr == '"') {
- quote = *ptr++;
- }
-
- s = ptr;
-
- if (quote) {
- for (;;) {
- if (!*ptr) {
- /* Syntax error */
- return(ptr);
- }
- if (*ptr == quote) {
- ptr++;
- if (*ptr != quote) {
- break;
- }
- }
- ptr++;
- len++;
- }
- } else {
- while (!my_isspace(cs, *ptr) && *ptr != '(' && *ptr != ')'
- && (accept_also_dot || *ptr != '.')
- && *ptr != ',' && *ptr != '\0') {
-
- ptr++;
- }
-
- len = ptr - s;
- }
-
- if (UNIV_UNLIKELY(!heap)) {
- /* no heap given: id will point to source string */
- *id = s;
- return(ptr);
- }
-
- if (quote) {
- char* d;
- str = d = mem_heap_alloc(heap, len + 1);
- while (len--) {
- if ((*d++ = *s++) == quote) {
- s++;
- }
- }
- *d++ = 0;
- len = d - str;
- ut_ad(*s == quote);
- ut_ad(s + 1 == ptr);
- } else {
- str = mem_heap_strdupl(heap, s, len);
- }
-
- if (!table_id) {
-convert_id:
- /* Convert the identifier from connection character set
- to UTF-8. */
- len = 3 * len + 1;
- *id = dst = mem_heap_alloc(heap, len);
-
- innobase_convert_from_id(dst, str, len);
- } else if (!strncmp(str, srv_mysql50_table_name_prefix,
- sizeof srv_mysql50_table_name_prefix)) {
- /* This is a pre-5.1 table name
- containing chars other than [A-Za-z0-9].
- Discard the prefix and use raw UTF-8 encoding. */
- str += sizeof srv_mysql50_table_name_prefix;
- len -= sizeof srv_mysql50_table_name_prefix;
- goto convert_id;
- } else {
- /* Encode using filename-safe characters. */
- len = 5 * len + 1;
- *id = dst = mem_heap_alloc(heap, len);
-
- innobase_convert_from_table_id(dst, str, len);
- }
-
- return(ptr);
-}
-
-/*************************************************************************
-Tries to scan a column name. */
-static
-const char*
-dict_scan_col(
-/*==========*/
- /* out: scanned to */
- struct charset_info_st* cs, /* in: the character set of ptr */
- const char* ptr, /* in: scanned to */
- ibool* success,/* out: TRUE if success */
- dict_table_t* table, /* in: table in which the column is */
- const dict_col_t** column, /* out: pointer to column if success */
- mem_heap_t* heap, /* in: heap where to allocate */
- const char** name) /* out,own: the column name;
- NULL if no name was scannable */
-{
- ulint i;
-
- *success = FALSE;
-
- ptr = dict_scan_id(cs, ptr, heap, name, FALSE, TRUE);
-
- if (*name == NULL) {
-
- return(ptr); /* Syntax error */
- }
-
- if (table == NULL) {
- *success = TRUE;
- *column = NULL;
- } else {
- for (i = 0; i < dict_table_get_n_cols(table); i++) {
-
- const char* col_name = dict_table_get_col_name(
- table, i);
-
- if (0 == innobase_strcasecmp(col_name, *name)) {
- /* Found */
-
- *success = TRUE;
- *column = dict_table_get_nth_col(table, i);
- strcpy((char*) *name, col_name);
-
- break;
- }
- }
- }
-
- return(ptr);
-}
-
-/*************************************************************************
-Scans a table name from an SQL string. */
-static
-const char*
-dict_scan_table_name(
-/*=================*/
- /* out: scanned to */
- struct charset_info_st* cs,/* in: the character set of ptr */
- const char* ptr, /* in: scanned to */
- dict_table_t** table, /* out: table object or NULL */
- const char* name, /* in: foreign key table name */
- ibool* success,/* out: TRUE if ok name found */
- mem_heap_t* heap, /* in: heap where to allocate the id */
- const char** ref_name)/* out,own: the table name;
- NULL if no name was scannable */
-{
- const char* database_name = NULL;
- ulint database_name_len = 0;
- const char* table_name = NULL;
- ulint table_name_len;
- const char* scan_name;
- char* ref;
-
- *success = FALSE;
- *table = NULL;
-
- ptr = dict_scan_id(cs, ptr, heap, &scan_name, TRUE, FALSE);
-
- if (scan_name == NULL) {
-
- return(ptr); /* Syntax error */
- }
-
- if (*ptr == '.') {
- /* We scanned the database name; scan also the table name */
-
- ptr++;
-
- database_name = scan_name;
- database_name_len = strlen(database_name);
-
- ptr = dict_scan_id(cs, ptr, heap, &table_name, TRUE, FALSE);
-
- if (table_name == NULL) {
-
- return(ptr); /* Syntax error */
- }
- } else {
- /* To be able to read table dumps made with InnoDB-4.0.17 or
- earlier, we must allow the dot separator between the database
- name and the table name also to appear within a quoted
- identifier! InnoDB used to print a constraint as:
- ... REFERENCES `databasename.tablename` ...
- starting from 4.0.18 it is
- ... REFERENCES `databasename`.`tablename` ... */
- const char* s;
-
- for (s = scan_name; *s; s++) {
- if (*s == '.') {
- database_name = scan_name;
- database_name_len = s - scan_name;
- scan_name = ++s;
- break;/* to do: multiple dots? */
- }
- }
-
- table_name = scan_name;
- }
-
- if (database_name == NULL) {
- /* Use the database name of the foreign key table */
-
- database_name = name;
- database_name_len = dict_get_db_name_len(name);
- }
-
- table_name_len = strlen(table_name);
-
- /* Copy database_name, '/', table_name, '\0' */
- ref = mem_heap_alloc(heap, database_name_len + table_name_len + 2);
- memcpy(ref, database_name, database_name_len);
- ref[database_name_len] = '/';
- memcpy(ref + database_name_len + 1, table_name, table_name_len + 1);
-#ifndef __WIN__
- if (srv_lower_case_table_names) {
-#endif /* !__WIN__ */
- /* The table name is always put to lower case on Windows. */
- innobase_casedn_str(ref);
-#ifndef __WIN__
- }
-#endif /* !__WIN__ */
-
- *success = TRUE;
- *ref_name = ref;
- *table = dict_table_get_low(ref);
-
- return(ptr);
-}
-
-/*************************************************************************
-Skips one id. The id is allowed to contain also '.'. */
-static
-const char*
-dict_skip_word(
-/*===========*/
- /* out: scanned to */
- struct charset_info_st* cs,/* in: the character set of ptr */
- const char* ptr, /* in: scanned to */
- ibool* success)/* out: TRUE if success, FALSE if just spaces
- left in string or a syntax error */
-{
- const char* start;
-
- *success = FALSE;
-
- ptr = dict_scan_id(cs, ptr, NULL, &start, FALSE, TRUE);
-
- if (start) {
- *success = TRUE;
- }
-
- return(ptr);
-}
-
-/*************************************************************************
-Removes MySQL comments from an SQL string. A comment is either
-(a) '#' to the end of the line,
-(b) '--<space>' to the end of the line, or
-(c) '<slash><asterisk>' till the next '<asterisk><slash>' (like the familiar
-C comment syntax). */
-static
-char*
-dict_strip_comments(
-/*================*/
- /* out, own: SQL string stripped from
- comments; the caller must free this
- with mem_free()! */
- const char* sql_string) /* in: SQL string */
-{
- char* str;
- const char* sptr;
- char* ptr;
- /* unclosed quote character (0 if none) */
- char quote = 0;
-
- str = mem_alloc(strlen(sql_string) + 1);
-
- sptr = sql_string;
- ptr = str;
-
- for (;;) {
-scan_more:
- if (*sptr == '\0') {
- *ptr = '\0';
-
- ut_a(ptr <= str + strlen(sql_string));
-
- return(str);
- }
-
- if (*sptr == quote) {
- /* Closing quote character: do not look for
- starting quote or comments. */
- quote = 0;
- } else if (quote) {
- /* Within quotes: do not look for
- starting quotes or comments. */
- } else if (*sptr == '"' || *sptr == '`') {
- /* Starting quote: remember the quote character. */
- quote = *sptr;
- } else if (*sptr == '#'
- || (sptr[0] == '-' && sptr[1] == '-'
- && sptr[2] == ' ')) {
- for (;;) {
- /* In Unix a newline is 0x0A while in Windows
- it is 0x0D followed by 0x0A */
-
- if (*sptr == (char)0x0A
- || *sptr == (char)0x0D
- || *sptr == '\0') {
-
- goto scan_more;
- }
-
- sptr++;
- }
- } else if (!quote && *sptr == '/' && *(sptr + 1) == '*') {
- for (;;) {
- if (*sptr == '*' && *(sptr + 1) == '/') {
-
- sptr += 2;
-
- goto scan_more;
- }
-
- if (*sptr == '\0') {
-
- goto scan_more;
- }
-
- sptr++;
- }
- }
-
- *ptr = *sptr;
-
- ptr++;
- sptr++;
- }
-}
-
-/*************************************************************************
-Finds the highest <number> for foreign key constraints of the table. Looks
-only at the >= 4.0.18-format id's, which are of the form
-databasename/tablename_ibfk_<number>. */
-static
-ulint
-dict_table_get_highest_foreign_id(
-/*==============================*/
- /* out: highest number, 0 if table has no new
- format foreign key constraints */
- dict_table_t* table) /* in: table in the dictionary memory cache */
-{
- dict_foreign_t* foreign;
- char* endp;
- ulint biggest_id = 0;
- ulint id;
- ulint len;
-
- ut_a(table);
-
- len = ut_strlen(table->name);
- foreign = UT_LIST_GET_FIRST(table->foreign_list);
-
- while (foreign) {
- if (ut_strlen(foreign->id) > ((sizeof dict_ibfk) - 1) + len
- && 0 == ut_memcmp(foreign->id, table->name, len)
- && 0 == ut_memcmp(foreign->id + len,
- dict_ibfk, (sizeof dict_ibfk) - 1)
- && foreign->id[len + ((sizeof dict_ibfk) - 1)] != '0') {
- /* It is of the >= 4.0.18 format */
-
- id = strtoul(foreign->id + len
- + ((sizeof dict_ibfk) - 1),
- &endp, 10);
- if (*endp == '\0') {
- ut_a(id != biggest_id);
-
- if (id > biggest_id) {
- biggest_id = id;
- }
- }
- }
-
- foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
- }
-
- return(biggest_id);
-}
-
-/*************************************************************************
-Reports a simple foreign key create clause syntax error. */
-static
-void
-dict_foreign_report_syntax_err(
-/*===========================*/
- const char* name, /* in: table name */
- const char* start_of_latest_foreign,
- /* in: start of the foreign key clause
- in the SQL string */
- const char* ptr) /* in: place of the syntax error */
-{
- FILE* ef = dict_foreign_err_file;
-
- mutex_enter(&dict_foreign_err_mutex);
- dict_foreign_error_report_low(ef, name);
- fprintf(ef, "%s:\nSyntax error close to:\n%s\n",
- start_of_latest_foreign, ptr);
- mutex_exit(&dict_foreign_err_mutex);
-}
-
-/*************************************************************************
-Scans a table create SQL string and adds to the data dictionary the foreign
-key constraints declared in the string. This function should be called after
-the indexes for a table have been created. Each foreign key constraint must
-be accompanied with indexes in both participating tables. The indexes are
-allowed to contain more fields than mentioned in the constraint. */
-static
-ulint
-dict_create_foreign_constraints_low(
-/*================================*/
- /* out: error code or DB_SUCCESS */
- trx_t* trx, /* in: transaction */
- mem_heap_t* heap, /* in: memory heap */
- struct charset_info_st* cs,/* in: the character set of sql_string */
- const char* sql_string,
- /* in: CREATE TABLE or ALTER TABLE statement
- where foreign keys are declared like:
- FOREIGN KEY (a, b) REFERENCES table2(c, d),
- table2 can be written also with the database
- name before it: test.table2; the default
- database is the database of parameter name */
- const char* name, /* in: table full name in the normalized form
- database_name/table_name */
- ibool reject_fks)
- /* in: if TRUE, fail with error code
- DB_CANNOT_ADD_CONSTRAINT if any foreign
- keys are found. */
-{
- dict_table_t* table;
- dict_table_t* referenced_table;
- dict_table_t* table_to_alter;
- ulint highest_id_so_far = 0;
- dict_index_t* index;
- dict_foreign_t* foreign;
- const char* ptr = sql_string;
- const char* start_of_latest_foreign = sql_string;
- FILE* ef = dict_foreign_err_file;
- const char* constraint_name;
- ibool success;
- ulint error;
- const char* ptr1;
- const char* ptr2;
- ulint i;
- ulint j;
- ibool is_on_delete;
- ulint n_on_deletes;
- ulint n_on_updates;
- const dict_col_t*columns[500];
- const char* column_names[500];
- const char* referenced_table_name;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- table = dict_table_get_low(name);
-
- if (table == NULL) {
- mutex_enter(&dict_foreign_err_mutex);
- dict_foreign_error_report_low(ef, name);
- fprintf(ef,
- "Cannot find the table in the internal"
- " data dictionary of InnoDB.\n"
- "Create table statement:\n%s\n", sql_string);
- mutex_exit(&dict_foreign_err_mutex);
-
- return(DB_ERROR);
- }
-
- /* First check if we are actually doing an ALTER TABLE, and in that
- case look for the table being altered */
-
- ptr = dict_accept(cs, ptr, "ALTER", &success);
-
- if (!success) {
-
- goto loop;
- }
-
- ptr = dict_accept(cs, ptr, "TABLE", &success);
-
- if (!success) {
-
- goto loop;
- }
-
- /* We are doing an ALTER TABLE: scan the table name we are altering */
-
- ptr = dict_scan_table_name(cs, ptr, &table_to_alter, name,
- &success, heap, &referenced_table_name);
- if (!success) {
- fprintf(stderr,
- "InnoDB: Error: could not find"
- " the table being ALTERED in:\n%s\n",
- sql_string);
-
- return(DB_ERROR);
- }
-
- /* Starting from 4.0.18 and 4.1.2, we generate foreign key id's in the
- format databasename/tablename_ibfk_<number>, where <number> is local
- to the table; look for the highest <number> for table_to_alter, so
- that we can assign to new constraints higher numbers. */
-
- /* If we are altering a temporary table, the table name after ALTER
- TABLE does not correspond to the internal table name, and
- table_to_alter is NULL. TODO: should we fix this somehow? */
-
- if (table_to_alter == NULL) {
- highest_id_so_far = 0;
- } else {
- highest_id_so_far = dict_table_get_highest_foreign_id(
- table_to_alter);
- }
-
- /* Scan for foreign key declarations in a loop */
-loop:
- /* Scan either to "CONSTRAINT" or "FOREIGN", whichever is closer */
-
- ptr1 = dict_scan_to(ptr, "CONSTRAINT");
- ptr2 = dict_scan_to(ptr, "FOREIGN");
-
- constraint_name = NULL;
-
- if (ptr1 < ptr2) {
- /* The user may have specified a constraint name. Pick it so
- that we can store 'databasename/constraintname' as the id of
- of the constraint to system tables. */
- ptr = ptr1;
-
- ptr = dict_accept(cs, ptr, "CONSTRAINT", &success);
-
- ut_a(success);
-
- if (!my_isspace(cs, *ptr) && *ptr != '"' && *ptr != '`') {
- goto loop;
- }
-
- while (my_isspace(cs, *ptr)) {
- ptr++;
- }
-
- /* read constraint name unless got "CONSTRAINT FOREIGN" */
- if (ptr != ptr2) {
- ptr = dict_scan_id(cs, ptr, heap,
- &constraint_name, FALSE, FALSE);
- }
- } else {
- ptr = ptr2;
- }
-
- if (*ptr == '\0') {
- /* The proper way to reject foreign keys for temporary
- tables would be to split the lexing and syntactical
- analysis of foreign key clauses from the actual adding
- of them, so that ha_innodb.cc could first parse the SQL
- command, determine if there are any foreign keys, and
- if so, immediately reject the command if the table is a
- temporary one. For now, this kludge will work. */
- if (reject_fks && (UT_LIST_GET_LEN(table->foreign_list) > 0)) {
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- /**********************************************************/
- /* The following call adds the foreign key constraints
- to the data dictionary system tables on disk */
-
- error = dict_create_add_foreigns_to_dictionary(
- highest_id_so_far, table, trx);
- return(error);
- }
-
- start_of_latest_foreign = ptr;
-
- ptr = dict_accept(cs, ptr, "FOREIGN", &success);
-
- if (!success) {
- goto loop;
- }
-
- if (!my_isspace(cs, *ptr)) {
- goto loop;
- }
-
- ptr = dict_accept(cs, ptr, "KEY", &success);
-
- if (!success) {
- goto loop;
- }
-
- ptr = dict_accept(cs, ptr, "(", &success);
-
- if (!success) {
- /* MySQL allows also an index id before the '('; we
- skip it */
- ptr = dict_skip_word(cs, ptr, &success);
-
- if (!success) {
- dict_foreign_report_syntax_err(
- name, start_of_latest_foreign, ptr);
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- ptr = dict_accept(cs, ptr, "(", &success);
-
- if (!success) {
- /* We do not flag a syntax error here because in an
- ALTER TABLE we may also have DROP FOREIGN KEY abc */
-
- goto loop;
- }
- }
-
- i = 0;
-
- /* Scan the columns in the first list */
-col_loop1:
- ut_a(i < (sizeof column_names) / sizeof *column_names);
- ptr = dict_scan_col(cs, ptr, &success, table, columns + i,
- heap, column_names + i);
- if (!success) {
- mutex_enter(&dict_foreign_err_mutex);
- dict_foreign_error_report_low(ef, name);
- fprintf(ef, "%s:\nCannot resolve column name close to:\n%s\n",
- start_of_latest_foreign, ptr);
- mutex_exit(&dict_foreign_err_mutex);
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- i++;
-
- ptr = dict_accept(cs, ptr, ",", &success);
-
- if (success) {
- goto col_loop1;
- }
-
- ptr = dict_accept(cs, ptr, ")", &success);
-
- if (!success) {
- dict_foreign_report_syntax_err(
- name, start_of_latest_foreign, ptr);
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- /* Try to find an index which contains the columns
- as the first fields and in the right order */
-
- index = dict_foreign_find_index(table, column_names, i,
- NULL, TRUE, FALSE);
-
- if (!index) {
- mutex_enter(&dict_foreign_err_mutex);
- dict_foreign_error_report_low(ef, name);
- fputs("There is no index in table ", ef);
- ut_print_name(ef, NULL, TRUE, name);
- fprintf(ef, " where the columns appear\n"
- "as the first columns. Constraint:\n%s\n"
- "See http://dev.mysql.com/doc/refman/5.1/en/"
- "innodb-foreign-key-constraints.html\n"
- "for correct foreign key definition.\n",
- start_of_latest_foreign);
- mutex_exit(&dict_foreign_err_mutex);
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
- ptr = dict_accept(cs, ptr, "REFERENCES", &success);
-
- if (!success || !my_isspace(cs, *ptr)) {
- dict_foreign_report_syntax_err(
- name, start_of_latest_foreign, ptr);
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- /* Let us create a constraint struct */
-
- foreign = dict_mem_foreign_create();
-
- if (constraint_name) {
- ulint db_len;
-
- /* Catenate 'databasename/' to the constraint name specified
- by the user: we conceive the constraint as belonging to the
- same MySQL 'database' as the table itself. We store the name
- to foreign->id. */
-
- db_len = dict_get_db_name_len(table->name);
-
- foreign->id = mem_heap_alloc(
- foreign->heap, db_len + strlen(constraint_name) + 2);
-
- ut_memcpy(foreign->id, table->name, db_len);
- foreign->id[db_len] = '/';
- strcpy(foreign->id + db_len + 1, constraint_name);
- }
-
- foreign->foreign_table = table;
- foreign->foreign_table_name = mem_heap_strdup(foreign->heap,
- table->name);
- foreign->foreign_index = index;
- foreign->n_fields = (unsigned int) i;
- foreign->foreign_col_names = mem_heap_alloc(foreign->heap,
- i * sizeof(void*));
- for (i = 0; i < foreign->n_fields; i++) {
- foreign->foreign_col_names[i] = mem_heap_strdup(
- foreign->heap,
- dict_table_get_col_name(table,
- dict_col_get_no(columns[i])));
- }
-
- ptr = dict_scan_table_name(cs, ptr, &referenced_table, name,
- &success, heap, &referenced_table_name);
-
- /* Note that referenced_table can be NULL if the user has suppressed
- checking of foreign key constraints! */
-
- if (!success || (!referenced_table && trx->check_foreigns)) {
- dict_foreign_free(foreign);
-
- mutex_enter(&dict_foreign_err_mutex);
- dict_foreign_error_report_low(ef, name);
- fprintf(ef, "%s:\nCannot resolve table name close to:\n"
- "%s\n",
- start_of_latest_foreign, ptr);
- mutex_exit(&dict_foreign_err_mutex);
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- ptr = dict_accept(cs, ptr, "(", &success);
-
- if (!success) {
- dict_foreign_free(foreign);
- dict_foreign_report_syntax_err(name, start_of_latest_foreign,
- ptr);
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- /* Scan the columns in the second list */
- i = 0;
-
-col_loop2:
- ptr = dict_scan_col(cs, ptr, &success, referenced_table, columns + i,
- heap, column_names + i);
- i++;
-
- if (!success) {
- dict_foreign_free(foreign);
-
- mutex_enter(&dict_foreign_err_mutex);
- dict_foreign_error_report_low(ef, name);
- fprintf(ef, "%s:\nCannot resolve column name close to:\n"
- "%s\n",
- start_of_latest_foreign, ptr);
- mutex_exit(&dict_foreign_err_mutex);
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- ptr = dict_accept(cs, ptr, ",", &success);
-
- if (success) {
- goto col_loop2;
- }
-
- ptr = dict_accept(cs, ptr, ")", &success);
-
- if (!success || foreign->n_fields != i) {
- dict_foreign_free(foreign);
-
- dict_foreign_report_syntax_err(name, start_of_latest_foreign,
- ptr);
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- n_on_deletes = 0;
- n_on_updates = 0;
-
-scan_on_conditions:
- /* Loop here as long as we can find ON ... conditions */
-
- ptr = dict_accept(cs, ptr, "ON", &success);
-
- if (!success) {
-
- goto try_find_index;
- }
-
- ptr = dict_accept(cs, ptr, "DELETE", &success);
-
- if (!success) {
- ptr = dict_accept(cs, ptr, "UPDATE", &success);
-
- if (!success) {
- dict_foreign_free(foreign);
-
- dict_foreign_report_syntax_err(
- name, start_of_latest_foreign, ptr);
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- is_on_delete = FALSE;
- n_on_updates++;
- } else {
- is_on_delete = TRUE;
- n_on_deletes++;
- }
-
- ptr = dict_accept(cs, ptr, "RESTRICT", &success);
-
- if (success) {
- goto scan_on_conditions;
- }
-
- ptr = dict_accept(cs, ptr, "CASCADE", &success);
-
- if (success) {
- if (is_on_delete) {
- foreign->type |= DICT_FOREIGN_ON_DELETE_CASCADE;
- } else {
- foreign->type |= DICT_FOREIGN_ON_UPDATE_CASCADE;
- }
-
- goto scan_on_conditions;
- }
-
- ptr = dict_accept(cs, ptr, "NO", &success);
-
- if (success) {
- ptr = dict_accept(cs, ptr, "ACTION", &success);
-
- if (!success) {
- dict_foreign_free(foreign);
- dict_foreign_report_syntax_err(
- name, start_of_latest_foreign, ptr);
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- if (is_on_delete) {
- foreign->type |= DICT_FOREIGN_ON_DELETE_NO_ACTION;
- } else {
- foreign->type |= DICT_FOREIGN_ON_UPDATE_NO_ACTION;
- }
-
- goto scan_on_conditions;
- }
-
- ptr = dict_accept(cs, ptr, "SET", &success);
-
- if (!success) {
- dict_foreign_free(foreign);
- dict_foreign_report_syntax_err(name, start_of_latest_foreign,
- ptr);
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- ptr = dict_accept(cs, ptr, "NULL", &success);
-
- if (!success) {
- dict_foreign_free(foreign);
- dict_foreign_report_syntax_err(name, start_of_latest_foreign,
- ptr);
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- for (j = 0; j < foreign->n_fields; j++) {
- if ((dict_index_get_nth_col(foreign->foreign_index, j)->prtype)
- & DATA_NOT_NULL) {
-
- /* It is not sensible to define SET NULL
- if the column is not allowed to be NULL! */
-
- dict_foreign_free(foreign);
-
- mutex_enter(&dict_foreign_err_mutex);
- dict_foreign_error_report_low(ef, name);
- fprintf(ef, "%s:\n"
- "You have defined a SET NULL condition"
- " though some of the\n"
- "columns are defined as NOT NULL.\n",
- start_of_latest_foreign);
- mutex_exit(&dict_foreign_err_mutex);
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
- }
-
- if (is_on_delete) {
- foreign->type |= DICT_FOREIGN_ON_DELETE_SET_NULL;
- } else {
- foreign->type |= DICT_FOREIGN_ON_UPDATE_SET_NULL;
- }
-
- goto scan_on_conditions;
-
-try_find_index:
- if (n_on_deletes > 1 || n_on_updates > 1) {
- /* It is an error to define more than 1 action */
-
- dict_foreign_free(foreign);
-
- mutex_enter(&dict_foreign_err_mutex);
- dict_foreign_error_report_low(ef, name);
- fprintf(ef, "%s:\n"
- "You have twice an ON DELETE clause"
- " or twice an ON UPDATE clause.\n",
- start_of_latest_foreign);
- mutex_exit(&dict_foreign_err_mutex);
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- /* Try to find an index which contains the columns as the first fields
- and in the right order, and the types are the same as in
- foreign->foreign_index */
-
- if (referenced_table) {
- index = dict_foreign_find_index(referenced_table,
- column_names, i,
- foreign->foreign_index,
- TRUE, FALSE);
- if (!index) {
- dict_foreign_free(foreign);
- mutex_enter(&dict_foreign_err_mutex);
- dict_foreign_error_report_low(ef, name);
- fprintf(ef, "%s:\n"
- "Cannot find an index in the"
- " referenced table where the\n"
- "referenced columns appear as the"
- " first columns, or column types\n"
- "in the table and the referenced table"
- " do not match for constraint.\n"
- "Note that the internal storage type of"
- " ENUM and SET changed in\n"
- "tables created with >= InnoDB-4.1.12,"
- " and such columns in old tables\n"
- "cannot be referenced by such columns"
- " in new tables.\n"
- "See http://dev.mysql.com/doc/refman/5.1/en/"
- "innodb-foreign-key-constraints.html\n"
- "for correct foreign key definition.\n",
- start_of_latest_foreign);
- mutex_exit(&dict_foreign_err_mutex);
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
- } else {
- ut_a(trx->check_foreigns == FALSE);
- index = NULL;
- }
-
- foreign->referenced_index = index;
- foreign->referenced_table = referenced_table;
-
- foreign->referenced_table_name
- = mem_heap_strdup(foreign->heap, referenced_table_name);
-
- foreign->referenced_col_names = mem_heap_alloc(foreign->heap,
- i * sizeof(void*));
- for (i = 0; i < foreign->n_fields; i++) {
- foreign->referenced_col_names[i]
- = mem_heap_strdup(foreign->heap, column_names[i]);
- }
-
- /* We found an ok constraint definition: add to the lists */
-
- UT_LIST_ADD_LAST(foreign_list, table->foreign_list, foreign);
-
- if (referenced_table) {
- UT_LIST_ADD_LAST(referenced_list,
- referenced_table->referenced_list,
- foreign);
- }
-
- goto loop;
-}
-
-/**************************************************************************
-Determines whether a string starts with the specified keyword. */
-
-ibool
-dict_str_starts_with_keyword(
-/*=========================*/
- /* out: TRUE if str starts
- with keyword */
- void* mysql_thd, /* in: MySQL thread handle */
- const char* str, /* in: string to scan for keyword */
- const char* keyword) /* in: keyword to look for */
-{
- struct charset_info_st* cs = innobase_get_charset(mysql_thd);
- ibool success;
-
- dict_accept(cs, str, keyword, &success);
- return(success);
-}
-
-/*************************************************************************
-Scans a table create SQL string and adds to the data dictionary the foreign
-key constraints declared in the string. This function should be called after
-the indexes for a table have been created. Each foreign key constraint must
-be accompanied with indexes in both participating tables. The indexes are
-allowed to contain more fields than mentioned in the constraint. */
-
-ulint
-dict_create_foreign_constraints(
-/*============================*/
- /* out: error code or DB_SUCCESS */
- trx_t* trx, /* in: transaction */
- const char* sql_string, /* in: table create statement where
- foreign keys are declared like:
- FOREIGN KEY (a, b) REFERENCES
- table2(c, d), table2 can be written
- also with the database
- name before it: test.table2; the
- default database id the database of
- parameter name */
- const char* name, /* in: table full name in the
- normalized form
- database_name/table_name */
- ibool reject_fks) /* in: if TRUE, fail with error
- code DB_CANNOT_ADD_CONSTRAINT if
- any foreign keys are found. */
-{
- char* str;
- ulint err;
- mem_heap_t* heap;
-
- ut_a(trx);
- ut_a(trx->mysql_thd);
-
- str = dict_strip_comments(sql_string);
- heap = mem_heap_create(10000);
-
- err = dict_create_foreign_constraints_low(
- trx, heap, innobase_get_charset(trx->mysql_thd), str, name,
- reject_fks);
-
- mem_heap_free(heap);
- mem_free(str);
-
- return(err);
-}
-
-/**************************************************************************
-Parses the CONSTRAINT id's to be dropped in an ALTER TABLE statement. */
-
-ulint
-dict_foreign_parse_drop_constraints(
-/*================================*/
- /* out: DB_SUCCESS or
- DB_CANNOT_DROP_CONSTRAINT if
- syntax error or the constraint
- id does not match */
- mem_heap_t* heap, /* in: heap from which we can
- allocate memory */
- trx_t* trx, /* in: transaction */
- dict_table_t* table, /* in: table */
- ulint* n, /* out: number of constraints
- to drop */
- const char*** constraints_to_drop) /* out: id's of the
- constraints to drop */
-{
- dict_foreign_t* foreign;
- ibool success;
- char* str;
- const char* ptr;
- const char* id;
- FILE* ef = dict_foreign_err_file;
- struct charset_info_st* cs;
-
- ut_a(trx);
- ut_a(trx->mysql_thd);
-
- cs = innobase_get_charset(trx->mysql_thd);
-
- *n = 0;
-
- *constraints_to_drop = mem_heap_alloc(heap, 1000 * sizeof(char*));
-
- str = dict_strip_comments(*(trx->mysql_query_str));
- ptr = str;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-loop:
- ptr = dict_scan_to(ptr, "DROP");
-
- if (*ptr == '\0') {
- mem_free(str);
-
- return(DB_SUCCESS);
- }
-
- ptr = dict_accept(cs, ptr, "DROP", &success);
-
- if (!my_isspace(cs, *ptr)) {
-
- goto loop;
- }
-
- ptr = dict_accept(cs, ptr, "FOREIGN", &success);
-
- if (!success || !my_isspace(cs, *ptr)) {
-
- goto loop;
- }
-
- ptr = dict_accept(cs, ptr, "KEY", &success);
-
- if (!success) {
-
- goto syntax_error;
- }
-
- ptr = dict_scan_id(cs, ptr, heap, &id, FALSE, TRUE);
-
- if (id == NULL) {
-
- goto syntax_error;
- }
-
- ut_a(*n < 1000);
- (*constraints_to_drop)[*n] = id;
- (*n)++;
-
- /* Look for the given constraint id */
-
- foreign = UT_LIST_GET_FIRST(table->foreign_list);
-
- while (foreign != NULL) {
- if (0 == strcmp(foreign->id, id)
- || (strchr(foreign->id, '/')
- && 0 == strcmp(id,
- dict_remove_db_name(foreign->id)))) {
- /* Found */
- break;
- }
-
- foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
- }
-
- if (foreign == NULL) {
- mutex_enter(&dict_foreign_err_mutex);
- rewind(ef);
- ut_print_timestamp(ef);
- fputs(" Error in dropping of a foreign key constraint"
- " of table ", ef);
- ut_print_name(ef, NULL, TRUE, table->name);
- fputs(",\n"
- "in SQL command\n", ef);
- fputs(str, ef);
- fputs("\nCannot find a constraint with the given id ", ef);
- ut_print_name(ef, NULL, FALSE, id);
- fputs(".\n", ef);
- mutex_exit(&dict_foreign_err_mutex);
-
- mem_free(str);
-
- return(DB_CANNOT_DROP_CONSTRAINT);
- }
-
- goto loop;
-
-syntax_error:
- mutex_enter(&dict_foreign_err_mutex);
- rewind(ef);
- ut_print_timestamp(ef);
- fputs(" Syntax error in dropping of a"
- " foreign key constraint of table ", ef);
- ut_print_name(ef, NULL, TRUE, table->name);
- fprintf(ef, ",\n"
- "close to:\n%s\n in SQL command\n%s\n", ptr, str);
- mutex_exit(&dict_foreign_err_mutex);
-
- mem_free(str);
-
- return(DB_CANNOT_DROP_CONSTRAINT);
-}
-#endif /* UNIV_HOTBACKUP */
-
-/*==================== END OF FOREIGN KEY PROCESSING ====================*/
-
-#ifdef UNIV_DEBUG
-/**************************************************************************
-Returns an index object if it is found in the dictionary cache. */
-
-dict_index_t*
-dict_index_get_if_in_cache(
-/*=======================*/
- /* out: index, NULL if not found */
- dulint index_id) /* in: index id */
-{
- dict_index_t* index;
-
- if (dict_sys == NULL) {
- return(NULL);
- }
-
- mutex_enter(&(dict_sys->mutex));
-
- index = dict_index_find_on_id_low(index_id);
-
- mutex_exit(&(dict_sys->mutex));
-
- return(index);
-}
-#endif /* UNIV_DEBUG */
-
-#ifdef UNIV_DEBUG
-/**************************************************************************
-Checks that a tuple has n_fields_cmp value in a sensible range, so that
-no comparison can occur with the page number field in a node pointer. */
-
-ibool
-dict_index_check_search_tuple(
-/*==========================*/
- /* out: TRUE if ok */
- dict_index_t* index, /* in: index tree */
- dtuple_t* tuple) /* in: tuple used in a search */
-{
- ut_a(index);
- ut_a(dtuple_get_n_fields_cmp(tuple)
- <= dict_index_get_n_unique_in_tree(index));
- return(TRUE);
-}
-#endif /* UNIV_DEBUG */
-
-/**************************************************************************
-Builds a node pointer out of a physical record and a page number. */
-
-dtuple_t*
-dict_index_build_node_ptr(
-/*======================*/
- /* out, own: node pointer */
- dict_index_t* index, /* in: index tree */
- rec_t* rec, /* in: record for which to build node
- pointer */
- ulint page_no,/* in: page number to put in node pointer */
- mem_heap_t* heap, /* in: memory heap where pointer created */
- ulint level) /* in: level of rec in tree: 0 means leaf
- level */
-{
- dtuple_t* tuple;
- dfield_t* field;
- byte* buf;
- ulint n_unique;
-
- if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) {
- /* In a universal index tree, we take the whole record as
- the node pointer if the record is on the leaf level,
- on non-leaf levels we remove the last field, which
- contains the page number of the child page */
-
- ut_a(!dict_table_is_comp(index->table));
- n_unique = rec_get_n_fields_old(rec);
-
- if (level > 0) {
- ut_a(n_unique > 1);
- n_unique--;
- }
- } else {
- n_unique = dict_index_get_n_unique_in_tree(index);
- }
-
- tuple = dtuple_create(heap, n_unique + 1);
-
- /* When searching in the tree for the node pointer, we must not do
- comparison on the last field, the page number field, as on upper
- levels in the tree there may be identical node pointers with a
- different page number; therefore, we set the n_fields_cmp to one
- less: */
-
- dtuple_set_n_fields_cmp(tuple, n_unique);
-
- dict_index_copy_types(tuple, index, n_unique);
-
- buf = mem_heap_alloc(heap, 4);
-
- mach_write_to_4(buf, page_no);
-
- field = dtuple_get_nth_field(tuple, n_unique);
- dfield_set_data(field, buf, 4);
-
- dtype_set(dfield_get_type(field), DATA_SYS_CHILD, DATA_NOT_NULL, 4);
-
- rec_copy_prefix_to_dtuple(tuple, rec, index, n_unique, heap);
- dtuple_set_info_bits(tuple, dtuple_get_info_bits(tuple)
- | REC_STATUS_NODE_PTR);
-
- ut_ad(dtuple_check_typed(tuple));
-
- return(tuple);
-}
-
-/**************************************************************************
-Copies an initial segment of a physical record, long enough to specify an
-index entry uniquely. */
-
-rec_t*
-dict_index_copy_rec_order_prefix(
-/*=============================*/
- /* out: pointer to the prefix record */
- dict_index_t* index, /* in: index tree */
- rec_t* rec, /* in: record for which to copy prefix */
- ulint* n_fields,/* out: number of fields copied */
- byte** buf, /* in/out: memory buffer for the copied prefix,
- or NULL */
- ulint* buf_size)/* in/out: buffer size */
-{
- ulint n;
-
- UNIV_PREFETCH_R(rec);
-
- if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) {
- ut_a(!dict_table_is_comp(index->table));
- n = rec_get_n_fields_old(rec);
- } else {
- n = dict_index_get_n_unique_in_tree(index);
- }
-
- *n_fields = n;
- return(rec_copy_prefix_to_buf(rec, index, n, buf, buf_size));
-}
-
-/**************************************************************************
-Builds a typed data tuple out of a physical record. */
-
-dtuple_t*
-dict_index_build_data_tuple(
-/*========================*/
- /* out, own: data tuple */
- dict_index_t* index, /* in: index tree */
- rec_t* rec, /* in: record for which to build data tuple */
- ulint n_fields,/* in: number of data fields */
- mem_heap_t* heap) /* in: memory heap where tuple created */
-{
- dtuple_t* tuple;
-
- ut_ad(dict_table_is_comp(index->table)
- || n_fields <= rec_get_n_fields_old(rec));
-
- tuple = dtuple_create(heap, n_fields);
-
- dict_index_copy_types(tuple, index, n_fields);
-
- rec_copy_prefix_to_dtuple(tuple, rec, index, n_fields, heap);
-
- ut_ad(dtuple_check_typed(tuple));
-
- return(tuple);
-}
-
-/*************************************************************************
-Calculates the minimum record length in an index. */
-
-ulint
-dict_index_calc_min_rec_len(
-/*========================*/
- dict_index_t* index) /* in: index */
-{
- ulint sum = 0;
- ulint i;
-
- if (dict_table_is_comp(index->table)) {
- ulint nullable = 0;
- sum = REC_N_NEW_EXTRA_BYTES;
- for (i = 0; i < dict_index_get_n_fields(index); i++) {
- const dict_col_t* col
- = dict_index_get_nth_col(index, i);
- ulint size = dict_col_get_fixed_size(col);
- sum += size;
- if (!size) {
- size = col->len;
- sum += size < 128 ? 1 : 2;
- }
- if (!(col->prtype & DATA_NOT_NULL)) {
- nullable++;
- }
- }
-
- /* round the NULL flags up to full bytes */
- sum += UT_BITS_IN_BYTES(nullable);
-
- return(sum);
- }
-
- for (i = 0; i < dict_index_get_n_fields(index); i++) {
- sum += dict_col_get_fixed_size(
- dict_index_get_nth_col(index, i));
- }
-
- if (sum > 127) {
- sum += 2 * dict_index_get_n_fields(index);
- } else {
- sum += dict_index_get_n_fields(index);
- }
-
- sum += REC_N_OLD_EXTRA_BYTES;
-
- return(sum);
-}
-
-/*************************************************************************
-Calculates new estimates for table and index statistics. The statistics
-are used in query optimization. */
-
-void
-dict_update_statistics_low(
-/*=======================*/
- dict_table_t* table, /* in: table */
- ibool has_dict_mutex __attribute__((unused)))
- /* in: TRUE if the caller has the
- dictionary mutex */
-{
- dict_index_t* index;
- ulint size;
- ulint sum_of_index_sizes = 0;
-
- if (table->ibd_file_missing) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: cannot calculate statistics for table %s\n"
- "InnoDB: because the .ibd file is missing. For help,"
- " please refer to\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "innodb-troubleshooting.html\n",
- table->name);
-
- return;
- }
-
- /* If we have set a high innodb_force_recovery level, do not calculate
- statistics, as a badly corrupted index can cause a crash in it. */
-
- if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
-
- return;
- }
-
- /* Find out the sizes of the indexes and how many different values
- for the key they approximately have */
-
- index = dict_table_get_first_index(table);
-
- if (index == NULL) {
- /* Table definition is corrupt */
-
- return;
- }
-
- while (index) {
- size = btr_get_size(index, BTR_TOTAL_SIZE);
-
- index->stat_index_size = size;
-
- sum_of_index_sizes += size;
-
- size = btr_get_size(index, BTR_N_LEAF_PAGES);
-
- if (size == 0) {
- /* The root node of the tree is a leaf */
- size = 1;
- }
-
- index->stat_n_leaf_pages = size;
-
- btr_estimate_number_of_different_key_vals(index);
-
- index = dict_table_get_next_index(index);
- }
-
- index = dict_table_get_first_index(table);
-
- table->stat_n_rows = index->stat_n_diff_key_vals[
- dict_index_get_n_unique(index)];
-
- table->stat_clustered_index_size = index->stat_index_size;
-
- table->stat_sum_of_other_index_sizes = sum_of_index_sizes
- - index->stat_index_size;
-
- table->stat_initialized = TRUE;
-
- table->stat_modified_counter = 0;
-}
-
-/*************************************************************************
-Calculates new estimates for table and index statistics. The statistics
-are used in query optimization. */
-
-void
-dict_update_statistics(
-/*===================*/
- dict_table_t* table) /* in: table */
-{
- dict_update_statistics_low(table, FALSE);
-}
-
-/**************************************************************************
-A noninlined version of dict_table_get_low. */
-
-dict_table_t*
-dict_table_get_low_noninlined(
-/*==========================*/
- /* out: table, NULL if not found */
- const char* table_name) /* in: table name */
-{
- return(dict_table_get_low(table_name));
-}
-
-/**************************************************************************
-Prints info of a foreign key constraint. */
-static
-void
-dict_foreign_print_low(
-/*===================*/
- dict_foreign_t* foreign) /* in: foreign key constraint */
-{
- ulint i;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- fprintf(stderr, " FOREIGN KEY CONSTRAINT %s: %s (",
- foreign->id, foreign->foreign_table_name);
-
- for (i = 0; i < foreign->n_fields; i++) {
- fprintf(stderr, " %s", foreign->foreign_col_names[i]);
- }
-
- fprintf(stderr, " )\n"
- " REFERENCES %s (",
- foreign->referenced_table_name);
-
- for (i = 0; i < foreign->n_fields; i++) {
- fprintf(stderr, " %s", foreign->referenced_col_names[i]);
- }
-
- fputs(" )\n", stderr);
-}
-
-/**************************************************************************
-Prints a table data. */
-
-void
-dict_table_print(
-/*=============*/
- dict_table_t* table) /* in: table */
-{
- mutex_enter(&(dict_sys->mutex));
- dict_table_print_low(table);
- mutex_exit(&(dict_sys->mutex));
-}
-
-/**************************************************************************
-Prints a table data when we know the table name. */
-
-void
-dict_table_print_by_name(
-/*=====================*/
- const char* name)
-{
- dict_table_t* table;
-
- mutex_enter(&(dict_sys->mutex));
-
- table = dict_table_get_low(name);
-
- ut_a(table);
-
- dict_table_print_low(table);
- mutex_exit(&(dict_sys->mutex));
-}
-
-/**************************************************************************
-Prints a table data. */
-
-void
-dict_table_print_low(
-/*=================*/
- dict_table_t* table) /* in: table */
-{
- dict_index_t* index;
- dict_foreign_t* foreign;
- ulint i;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- dict_update_statistics_low(table, TRUE);
-
- fprintf(stderr,
- "--------------------------------------\n"
- "TABLE: name %s, id %lu %lu, columns %lu, indexes %lu,"
- " appr.rows %lu\n"
- " COLUMNS: ",
- table->name,
- (ulong) ut_dulint_get_high(table->id),
- (ulong) ut_dulint_get_low(table->id),
- (ulong) table->n_cols,
- (ulong) UT_LIST_GET_LEN(table->indexes),
- (ulong) table->stat_n_rows);
-
- for (i = 0; i + 1 < (ulint) table->n_cols; i++) {
- dict_col_print_low(table, dict_table_get_nth_col(table, i));
- fputs("; ", stderr);
- }
-
- putc('\n', stderr);
-
- index = UT_LIST_GET_FIRST(table->indexes);
-
- while (index != NULL) {
- dict_index_print_low(index);
- index = UT_LIST_GET_NEXT(indexes, index);
- }
-
- foreign = UT_LIST_GET_FIRST(table->foreign_list);
-
- while (foreign != NULL) {
- dict_foreign_print_low(foreign);
- foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
- }
-
- foreign = UT_LIST_GET_FIRST(table->referenced_list);
-
- while (foreign != NULL) {
- dict_foreign_print_low(foreign);
- foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
- }
-}
-
-/**************************************************************************
-Prints a column data. */
-static
-void
-dict_col_print_low(
-/*===============*/
- const dict_table_t* table, /* in: table */
- const dict_col_t* col) /* in: column */
-{
- dtype_t type;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- dict_col_copy_type(col, &type);
- fprintf(stderr, "%s: ", dict_table_get_col_name(table,
- dict_col_get_no(col)));
-
- dtype_print(&type);
-}
-
-/**************************************************************************
-Prints an index data. */
-static
-void
-dict_index_print_low(
-/*=================*/
- dict_index_t* index) /* in: index */
-{
- ib_longlong n_vals;
- ulint i;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- if (index->n_user_defined_cols > 0) {
- n_vals = index->stat_n_diff_key_vals[
- index->n_user_defined_cols];
- } else {
- n_vals = index->stat_n_diff_key_vals[1];
- }
-
- fprintf(stderr,
- " INDEX: name %s, id %lu %lu, fields %lu/%lu,"
- " uniq %lu, type %lu\n"
- " root page %lu, appr.key vals %lu,"
- " leaf pages %lu, size pages %lu\n"
- " FIELDS: ",
- index->name,
- (ulong) ut_dulint_get_high(index->id),
- (ulong) ut_dulint_get_low(index->id),
- (ulong) index->n_user_defined_cols,
- (ulong) index->n_fields,
- (ulong) index->n_uniq,
- (ulong) index->type,
- (ulong) index->page,
- (ulong) n_vals,
- (ulong) index->stat_n_leaf_pages,
- (ulong) index->stat_index_size);
-
- for (i = 0; i < index->n_fields; i++) {
- dict_field_print_low(dict_index_get_nth_field(index, i));
- }
-
- putc('\n', stderr);
-
-#ifdef UNIV_BTR_PRINT
- btr_print_size(index);
-
- btr_print_index(index, 7);
-#endif /* UNIV_BTR_PRINT */
-}
-
-/**************************************************************************
-Prints a field data. */
-static
-void
-dict_field_print_low(
-/*=================*/
- dict_field_t* field) /* in: field */
-{
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- fprintf(stderr, " %s", field->name);
-
- if (field->prefix_len != 0) {
- fprintf(stderr, "(%lu)", (ulong) field->prefix_len);
- }
-}
-
-/**************************************************************************
-Outputs info on a foreign key of a table in a format suitable for
-CREATE TABLE. */
-
-void
-dict_print_info_on_foreign_key_in_create_format(
-/*============================================*/
- FILE* file, /* in: file where to print */
- trx_t* trx, /* in: transaction */
- dict_foreign_t* foreign, /* in: foreign key constraint */
- ibool add_newline) /* in: whether to add a newline */
-{
- const char* stripped_id;
- ulint i;
-
- if (strchr(foreign->id, '/')) {
- /* Strip the preceding database name from the constraint id */
- stripped_id = foreign->id + 1
- + dict_get_db_name_len(foreign->id);
- } else {
- stripped_id = foreign->id;
- }
-
- putc(',', file);
-
- if (add_newline) {
- /* SHOW CREATE TABLE wants constraints each printed nicely
- on its own line, while error messages want no newlines
- inserted. */
- fputs("\n ", file);
- }
-
- fputs(" CONSTRAINT ", file);
- ut_print_name(file, trx, FALSE, stripped_id);
- fputs(" FOREIGN KEY (", file);
-
- for (i = 0;;) {
- ut_print_name(file, trx, FALSE, foreign->foreign_col_names[i]);
- if (++i < foreign->n_fields) {
- fputs(", ", file);
- } else {
- break;
- }
- }
-
- fputs(") REFERENCES ", file);
-
- if (dict_tables_have_same_db(foreign->foreign_table_name,
- foreign->referenced_table_name)) {
- /* Do not print the database name of the referenced table */
- ut_print_name(file, trx, TRUE,
- dict_remove_db_name(
- foreign->referenced_table_name));
- } else {
- ut_print_name(file, trx, TRUE,
- foreign->referenced_table_name);
- }
-
- putc(' ', file);
- putc('(', file);
-
- for (i = 0;;) {
- ut_print_name(file, trx, FALSE,
- foreign->referenced_col_names[i]);
- if (++i < foreign->n_fields) {
- fputs(", ", file);
- } else {
- break;
- }
- }
-
- putc(')', file);
-
- if (foreign->type & DICT_FOREIGN_ON_DELETE_CASCADE) {
- fputs(" ON DELETE CASCADE", file);
- }
-
- if (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL) {
- fputs(" ON DELETE SET NULL", file);
- }
-
- if (foreign->type & DICT_FOREIGN_ON_DELETE_NO_ACTION) {
- fputs(" ON DELETE NO ACTION", file);
- }
-
- if (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE) {
- fputs(" ON UPDATE CASCADE", file);
- }
-
- if (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL) {
- fputs(" ON UPDATE SET NULL", file);
- }
-
- if (foreign->type & DICT_FOREIGN_ON_UPDATE_NO_ACTION) {
- fputs(" ON UPDATE NO ACTION", file);
- }
-}
-
-/**************************************************************************
-Outputs info on foreign keys of a table. */
-
-void
-dict_print_info_on_foreign_keys(
-/*============================*/
- ibool create_table_format, /* in: if TRUE then print in
- a format suitable to be inserted into
- a CREATE TABLE, otherwise in the format
- of SHOW TABLE STATUS */
- FILE* file, /* in: file where to print */
- trx_t* trx, /* in: transaction */
- dict_table_t* table) /* in: table */
-{
- dict_foreign_t* foreign;
-
- mutex_enter(&(dict_sys->mutex));
-
- foreign = UT_LIST_GET_FIRST(table->foreign_list);
-
- if (foreign == NULL) {
- mutex_exit(&(dict_sys->mutex));
-
- return;
- }
-
- while (foreign != NULL) {
- if (create_table_format) {
- dict_print_info_on_foreign_key_in_create_format(
- file, trx, foreign, TRUE);
- } else {
- ulint i;
- fputs("; (", file);
-
- for (i = 0; i < foreign->n_fields; i++) {
- if (i) {
- putc(' ', file);
- }
-
- ut_print_name(file, trx, FALSE,
- foreign->foreign_col_names[i]);
- }
-
- fputs(") REFER ", file);
- ut_print_name(file, trx, TRUE,
- foreign->referenced_table_name);
- putc('(', file);
-
- for (i = 0; i < foreign->n_fields; i++) {
- if (i) {
- putc(' ', file);
- }
- ut_print_name(
- file, trx, FALSE,
- foreign->referenced_col_names[i]);
- }
-
- putc(')', file);
-
- if (foreign->type == DICT_FOREIGN_ON_DELETE_CASCADE) {
- fputs(" ON DELETE CASCADE", file);
- }
-
- if (foreign->type == DICT_FOREIGN_ON_DELETE_SET_NULL) {
- fputs(" ON DELETE SET NULL", file);
- }
-
- if (foreign->type & DICT_FOREIGN_ON_DELETE_NO_ACTION) {
- fputs(" ON DELETE NO ACTION", file);
- }
-
- if (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE) {
- fputs(" ON UPDATE CASCADE", file);
- }
-
- if (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL) {
- fputs(" ON UPDATE SET NULL", file);
- }
-
- if (foreign->type & DICT_FOREIGN_ON_UPDATE_NO_ACTION) {
- fputs(" ON UPDATE NO ACTION", file);
- }
- }
-
- foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
- }
-
- mutex_exit(&(dict_sys->mutex));
-}
-
-/************************************************************************
-Displays the names of the index and the table. */
-void
-dict_index_name_print(
-/*==================*/
- FILE* file, /* in: output stream */
- trx_t* trx, /* in: transaction */
- const dict_index_t* index) /* in: index to print */
-{
- fputs("index ", file);
- ut_print_name(file, trx, FALSE, index->name);
- fputs(" of table ", file);
- ut_print_name(file, trx, TRUE, index->table_name);
-}
diff --git a/storage/innobase/dict/dict0load.c b/storage/innobase/dict/dict0load.c
deleted file mode 100644
index 65f1c9536bd..00000000000
--- a/storage/innobase/dict/dict0load.c
+++ /dev/null
@@ -1,1360 +0,0 @@
-/******************************************************
-Loads to the memory cache database object definitions
-from dictionary tables
-
-(c) 1996 Innobase Oy
-
-Created 4/24/1996 Heikki Tuuri
-*******************************************************/
-
-#include "dict0load.h"
-#ifndef UNIV_HOTBACKUP
-#include "mysql_version.h"
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef UNIV_NONINL
-#include "dict0load.ic"
-#endif
-
-#include "btr0pcur.h"
-#include "btr0btr.h"
-#include "page0page.h"
-#include "mach0data.h"
-#include "dict0dict.h"
-#include "dict0boot.h"
-#include "rem0cmp.h"
-#include "srv0start.h"
-#include "srv0srv.h"
-
-/********************************************************************
-Returns TRUE if index's i'th column's name is 'name' .*/
-static
-ibool
-name_of_col_is(
-/*===========*/
- /* out: */
- dict_table_t* table, /* in: table */
- dict_index_t* index, /* in: index */
- ulint i, /* in: */
- const char* name) /* in: name to compare to */
-{
- ulint tmp = dict_col_get_no(dict_field_get_col(
- dict_index_get_nth_field(
- index, i)));
-
- return(strcmp(name, dict_table_get_col_name(table, tmp)) == 0);
-}
-
-/************************************************************************
-Finds the first table name in the given database. */
-
-char*
-dict_get_first_table_name_in_db(
-/*============================*/
- /* out, own: table name, NULL if
- does not exist; the caller must
- free the memory in the string! */
- const char* name) /* in: database name which ends in '/' */
-{
- dict_table_t* sys_tables;
- btr_pcur_t pcur;
- dict_index_t* sys_index;
- dtuple_t* tuple;
- mem_heap_t* heap;
- dfield_t* dfield;
- rec_t* rec;
- byte* field;
- ulint len;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- heap = mem_heap_create(1000);
-
- mtr_start(&mtr);
-
- sys_tables = dict_table_get_low("SYS_TABLES");
- sys_index = UT_LIST_GET_FIRST(sys_tables->indexes);
- ut_a(!dict_table_is_comp(sys_tables));
-
- tuple = dtuple_create(heap, 1);
- dfield = dtuple_get_nth_field(tuple, 0);
-
- dfield_set_data(dfield, name, ut_strlen(name));
- dict_index_copy_types(tuple, sys_index, 1);
-
- btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
-loop:
- rec = btr_pcur_get_rec(&pcur);
-
- if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) {
- /* Not found */
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap);
-
- return(NULL);
- }
-
- field = rec_get_nth_field_old(rec, 0, &len);
-
- if (len < strlen(name)
- || ut_memcmp(name, field, strlen(name)) != 0) {
- /* Not found */
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap);
-
- return(NULL);
- }
-
- if (!rec_get_deleted_flag(rec, 0)) {
-
- /* We found one */
-
- char* table_name = mem_strdupl((char*) field, len);
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap);
-
- return(table_name);
- }
-
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
-
- goto loop;
-}
-
-/************************************************************************
-Prints to the standard output information on all tables found in the data
-dictionary system table. */
-
-void
-dict_print(void)
-/*============*/
-{
- dict_table_t* sys_tables;
- dict_index_t* sys_index;
- dict_table_t* table;
- btr_pcur_t pcur;
- rec_t* rec;
- byte* field;
- ulint len;
- mtr_t mtr;
-
- /* Enlarge the fatal semaphore wait timeout during the InnoDB table
- monitor printout */
-
- mutex_enter(&kernel_mutex);
- srv_fatal_semaphore_wait_threshold += 7200; /* 2 hours */
- mutex_exit(&kernel_mutex);
-
- mutex_enter(&(dict_sys->mutex));
-
- mtr_start(&mtr);
-
- sys_tables = dict_table_get_low("SYS_TABLES");
- sys_index = UT_LIST_GET_FIRST(sys_tables->indexes);
-
- btr_pcur_open_at_index_side(TRUE, sys_index, BTR_SEARCH_LEAF, &pcur,
- TRUE, &mtr);
-loop:
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
-
- rec = btr_pcur_get_rec(&pcur);
-
- if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) {
- /* end of index */
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- mutex_exit(&(dict_sys->mutex));
-
- /* Restore the fatal semaphore wait timeout */
-
- mutex_enter(&kernel_mutex);
- srv_fatal_semaphore_wait_threshold -= 7200; /* 2 hours */
- mutex_exit(&kernel_mutex);
-
- return;
- }
-
- field = rec_get_nth_field_old(rec, 0, &len);
-
- if (!rec_get_deleted_flag(rec, 0)) {
-
- /* We found one */
-
- char* table_name = mem_strdupl((char*) field, len);
-
- btr_pcur_store_position(&pcur, &mtr);
-
- mtr_commit(&mtr);
-
- table = dict_table_get_low(table_name);
- mem_free(table_name);
-
- if (table == NULL) {
- fputs("InnoDB: Failed to load table ", stderr);
- ut_print_namel(stderr, NULL, TRUE, (char*) field, len);
- putc('\n', stderr);
- } else {
- /* The table definition was corrupt if there
- is no index */
-
- if (dict_table_get_first_index(table)) {
- dict_update_statistics_low(table, TRUE);
- }
-
- dict_table_print_low(table);
- }
-
- mtr_start(&mtr);
-
- btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr);
- }
-
- goto loop;
-}
-
-/************************************************************************
-In a crash recovery we already have all the tablespace objects created.
-This function compares the space id information in the InnoDB data dictionary
-to what we already read with fil_load_single_table_tablespaces().
-
-In a normal startup, we create the tablespace objects for every table in
-InnoDB's data dictionary, if the corresponding .ibd file exists.
-We also scan the biggest space id, and store it to fil_system. */
-
-void
-dict_check_tablespaces_and_store_max_id(
-/*====================================*/
- ibool in_crash_recovery) /* in: are we doing a crash recovery */
-{
- dict_table_t* sys_tables;
- dict_index_t* sys_index;
- btr_pcur_t pcur;
- rec_t* rec;
- byte* field;
- ulint len;
- ulint space_id;
- ulint max_space_id = 0;
- mtr_t mtr;
-
- mutex_enter(&(dict_sys->mutex));
-
- mtr_start(&mtr);
-
- sys_tables = dict_table_get_low("SYS_TABLES");
- sys_index = UT_LIST_GET_FIRST(sys_tables->indexes);
- ut_a(!dict_table_is_comp(sys_tables));
-
- btr_pcur_open_at_index_side(TRUE, sys_index, BTR_SEARCH_LEAF, &pcur,
- TRUE, &mtr);
-loop:
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
-
- rec = btr_pcur_get_rec(&pcur);
-
- if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) {
- /* end of index */
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- /* We must make the tablespace cache aware of the biggest
- known space id */
-
- /* printf("Biggest space id in data dictionary %lu\n",
- max_space_id); */
- fil_set_max_space_id_if_bigger(max_space_id);
-
- mutex_exit(&(dict_sys->mutex));
-
- return;
- }
-
- field = rec_get_nth_field_old(rec, 0, &len);
-
- if (!rec_get_deleted_flag(rec, 0)) {
-
- /* We found one */
-
- char* name = mem_strdupl((char*) field, len);
-
- field = rec_get_nth_field_old(rec, 9, &len);
- ut_a(len == 4);
-
- space_id = mach_read_from_4(field);
-
- btr_pcur_store_position(&pcur, &mtr);
-
- mtr_commit(&mtr);
-
- if (space_id != 0 && in_crash_recovery) {
- /* Check that the tablespace (the .ibd file) really
- exists; print a warning to the .err log if not */
-
- fil_space_for_table_exists_in_mem(space_id, name,
- FALSE, TRUE, TRUE);
- }
-
- if (space_id != 0 && !in_crash_recovery) {
- /* It is a normal database startup: create the space
- object and check that the .ibd file exists. */
-
- fil_open_single_table_tablespace(FALSE, space_id,
- name);
- }
-
- mem_free(name);
-
- if (space_id > max_space_id) {
- max_space_id = space_id;
- }
-
- mtr_start(&mtr);
-
- btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr);
- }
-
- goto loop;
-}
-
-/************************************************************************
-Loads definitions for table columns. */
-static
-void
-dict_load_columns(
-/*==============*/
- dict_table_t* table, /* in: table */
- mem_heap_t* heap) /* in: memory heap for temporary storage */
-{
- dict_table_t* sys_columns;
- dict_index_t* sys_index;
- btr_pcur_t pcur;
- dtuple_t* tuple;
- dfield_t* dfield;
- rec_t* rec;
- byte* field;
- ulint len;
- byte* buf;
- char* name;
- ulint mtype;
- ulint prtype;
- ulint col_len;
- ulint i;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- mtr_start(&mtr);
-
- sys_columns = dict_table_get_low("SYS_COLUMNS");
- sys_index = UT_LIST_GET_FIRST(sys_columns->indexes);
- ut_a(!dict_table_is_comp(sys_columns));
-
- tuple = dtuple_create(heap, 1);
- dfield = dtuple_get_nth_field(tuple, 0);
-
- buf = mem_heap_alloc(heap, 8);
- mach_write_to_8(buf, table->id);
-
- dfield_set_data(dfield, buf, 8);
- dict_index_copy_types(tuple, sys_index, 1);
-
- btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
- for (i = 0; i + DATA_N_SYS_COLS < (ulint) table->n_cols; i++) {
-
- rec = btr_pcur_get_rec(&pcur);
-
- ut_a(btr_pcur_is_on_user_rec(&pcur, &mtr));
-
- ut_a(!rec_get_deleted_flag(rec, 0));
-
- field = rec_get_nth_field_old(rec, 0, &len);
- ut_ad(len == 8);
- ut_a(ut_dulint_cmp(table->id, mach_read_from_8(field)) == 0);
-
- field = rec_get_nth_field_old(rec, 1, &len);
- ut_ad(len == 4);
- ut_a(i == mach_read_from_4(field));
-
- ut_a(name_of_col_is(sys_columns, sys_index, 4, "NAME"));
-
- field = rec_get_nth_field_old(rec, 4, &len);
- name = mem_heap_strdupl(heap, (char*) field, len);
-
- field = rec_get_nth_field_old(rec, 5, &len);
- mtype = mach_read_from_4(field);
-
- field = rec_get_nth_field_old(rec, 6, &len);
- prtype = mach_read_from_4(field);
-
- if (dtype_get_charset_coll(prtype) == 0
- && dtype_is_string_type(mtype)) {
- /* The table was created with < 4.1.2. */
-
- if (dtype_is_binary_string_type(mtype, prtype)) {
- /* Use the binary collation for
- string columns of binary type. */
-
- prtype = dtype_form_prtype(
- prtype,
- DATA_MYSQL_BINARY_CHARSET_COLL);
- } else {
- /* Use the default charset for
- other than binary columns. */
-
- prtype = dtype_form_prtype(
- prtype,
- data_mysql_default_charset_coll);
- }
- }
-
- field = rec_get_nth_field_old(rec, 7, &len);
- col_len = mach_read_from_4(field);
-
- ut_a(name_of_col_is(sys_columns, sys_index, 8, "PREC"));
-
- dict_mem_table_add_col(table, heap, name,
- mtype, prtype, col_len);
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
- }
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-}
-
-/************************************************************************
-Report that an index field or index for a table has been delete marked. */
-static
-void
-dict_load_report_deleted_index(
-/*===========================*/
- const char* name, /* in: table name */
- ulint field) /* in: index field, or ULINT_UNDEFINED */
-{
- fprintf(stderr, "InnoDB: Error: data dictionary entry"
- " for table %s is corrupt!\n", name);
- if (field != ULINT_UNDEFINED) {
- fprintf(stderr,
- "InnoDB: Index field %lu is delete marked.\n", field);
- } else {
- fputs("InnoDB: An index is delete marked.\n", stderr);
- }
-}
-
-/************************************************************************
-Loads definitions for index fields. */
-static
-void
-dict_load_fields(
-/*=============*/
- dict_table_t* table, /* in: table */
- dict_index_t* index, /* in: index whose fields to load */
- mem_heap_t* heap) /* in: memory heap for temporary storage */
-{
- dict_table_t* sys_fields;
- dict_index_t* sys_index;
- btr_pcur_t pcur;
- dtuple_t* tuple;
- dfield_t* dfield;
- ulint pos_and_prefix_len;
- ulint prefix_len;
- rec_t* rec;
- byte* field;
- ulint len;
- byte* buf;
- ulint i;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- mtr_start(&mtr);
-
- sys_fields = dict_table_get_low("SYS_FIELDS");
- sys_index = UT_LIST_GET_FIRST(sys_fields->indexes);
- ut_a(!dict_table_is_comp(sys_fields));
-
- tuple = dtuple_create(heap, 1);
- dfield = dtuple_get_nth_field(tuple, 0);
-
- buf = mem_heap_alloc(heap, 8);
- mach_write_to_8(buf, index->id);
-
- dfield_set_data(dfield, buf, 8);
- dict_index_copy_types(tuple, sys_index, 1);
-
- btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
- for (i = 0; i < index->n_fields; i++) {
-
- rec = btr_pcur_get_rec(&pcur);
-
- ut_a(btr_pcur_is_on_user_rec(&pcur, &mtr));
- if (rec_get_deleted_flag(rec, 0)) {
- dict_load_report_deleted_index(table->name, i);
- }
-
- field = rec_get_nth_field_old(rec, 0, &len);
- ut_ad(len == 8);
- ut_a(ut_memcmp(buf, field, len) == 0);
-
- field = rec_get_nth_field_old(rec, 1, &len);
- ut_a(len == 4);
-
- /* The next field stores the field position in the index
- and a possible column prefix length if the index field
- does not contain the whole column. The storage format is
- like this: if there is at least one prefix field in the index,
- then the HIGH 2 bytes contain the field number (== i) and the
- low 2 bytes the prefix length for the field. Otherwise the
- field number (== i) is contained in the 2 LOW bytes. */
-
- pos_and_prefix_len = mach_read_from_4(field);
-
- ut_a((pos_and_prefix_len & 0xFFFFUL) == i
- || (pos_and_prefix_len & 0xFFFF0000UL) == (i << 16));
-
- if ((i == 0 && pos_and_prefix_len > 0)
- || (pos_and_prefix_len & 0xFFFF0000UL) > 0) {
-
- prefix_len = pos_and_prefix_len & 0xFFFFUL;
- } else {
- prefix_len = 0;
- }
-
- ut_a(name_of_col_is(sys_fields, sys_index, 4, "COL_NAME"));
-
- field = rec_get_nth_field_old(rec, 4, &len);
-
- dict_mem_index_add_field(index,
- mem_heap_strdupl(heap,
- (char*) field, len),
- prefix_len);
-
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
- }
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-}
-
-/************************************************************************
-Loads definitions for table indexes. Adds them to the data dictionary
-cache. */
-static
-ulint
-dict_load_indexes(
-/*==============*/
- /* out: DB_SUCCESS if ok, DB_CORRUPTION
- if corruption of dictionary table or
- DB_UNSUPPORTED if table has unknown index
- type */
- dict_table_t* table, /* in: table */
- mem_heap_t* heap) /* in: memory heap for temporary storage */
-{
- dict_table_t* sys_indexes;
- dict_index_t* sys_index;
- dict_index_t* index;
- btr_pcur_t pcur;
- dtuple_t* tuple;
- dfield_t* dfield;
- rec_t* rec;
- byte* field;
- ulint len;
- ulint name_len;
- char* name_buf;
- ulint type;
- ulint space;
- ulint page_no;
- ulint n_fields;
- byte* buf;
- ibool is_sys_table;
- dulint id;
- mtr_t mtr;
- ulint error = DB_SUCCESS;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- if ((ut_dulint_get_high(table->id) == 0)
- && (ut_dulint_get_low(table->id) < DICT_HDR_FIRST_ID)) {
- is_sys_table = TRUE;
- } else {
- is_sys_table = FALSE;
- }
-
- mtr_start(&mtr);
-
- sys_indexes = dict_table_get_low("SYS_INDEXES");
- sys_index = UT_LIST_GET_FIRST(sys_indexes->indexes);
- ut_a(!dict_table_is_comp(sys_indexes));
-
- tuple = dtuple_create(heap, 1);
- dfield = dtuple_get_nth_field(tuple, 0);
-
- buf = mem_heap_alloc(heap, 8);
- mach_write_to_8(buf, table->id);
-
- dfield_set_data(dfield, buf, 8);
- dict_index_copy_types(tuple, sys_index, 1);
-
- btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
- for (;;) {
- if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) {
-
- break;
- }
-
- rec = btr_pcur_get_rec(&pcur);
-
- field = rec_get_nth_field_old(rec, 0, &len);
- ut_ad(len == 8);
-
- if (ut_memcmp(buf, field, len) != 0) {
- break;
- }
-
- if (rec_get_deleted_flag(rec, 0)) {
- dict_load_report_deleted_index(table->name,
- ULINT_UNDEFINED);
-
- error = DB_CORRUPTION;
- goto func_exit;
- }
-
- field = rec_get_nth_field_old(rec, 1, &len);
- ut_ad(len == 8);
- id = mach_read_from_8(field);
-
- ut_a(name_of_col_is(sys_indexes, sys_index, 4, "NAME"));
-
- field = rec_get_nth_field_old(rec, 4, &name_len);
- name_buf = mem_heap_strdupl(heap, (char*) field, name_len);
-
- field = rec_get_nth_field_old(rec, 5, &len);
- n_fields = mach_read_from_4(field);
-
- field = rec_get_nth_field_old(rec, 6, &len);
- type = mach_read_from_4(field);
-
- field = rec_get_nth_field_old(rec, 7, &len);
- space = mach_read_from_4(field);
-
- ut_a(name_of_col_is(sys_indexes, sys_index, 8, "PAGE_NO"));
-
- field = rec_get_nth_field_old(rec, 8, &len);
- page_no = mach_read_from_4(field);
-
- /* We check for unsupported types first, so that the
- subsequent checks are relevant for the supported types. */
- if (type & ~(DICT_CLUSTERED | DICT_UNIQUE)) {
-
- fprintf(stderr,
- "InnoDB: Error: unknown type %lu"
- " of index %s of table %s\n",
- (ulong) type, name_buf, table->name);
-
- error = DB_UNSUPPORTED;
- goto func_exit;
- } else if (page_no == FIL_NULL) {
-
- fprintf(stderr,
- "InnoDB: Error: trying to load index %s"
- " for table %s\n"
- "InnoDB: but the index tree has been freed!\n",
- name_buf, table->name);
-
- error = DB_CORRUPTION;
- goto func_exit;
- } else if ((type & DICT_CLUSTERED) == 0
- && NULL == dict_table_get_first_index(table)) {
-
- fprintf(stderr,
- "InnoDB: Error: trying to load index %s"
- " for table %s\n"
- "InnoDB: but the first index"
- " is not clustered!\n",
- name_buf, table->name);
-
- error = DB_CORRUPTION;
- goto func_exit;
- } else if (is_sys_table
- && ((type & DICT_CLUSTERED)
- || ((table == dict_sys->sys_tables)
- && (name_len == (sizeof "ID_IND") - 1)
- && (0 == ut_memcmp(name_buf,
- "ID_IND", name_len))))) {
-
- /* The index was created in memory already at booting
- of the database server */
- } else {
- index = dict_mem_index_create(table->name, name_buf,
- space, type, n_fields);
- index->id = id;
-
- dict_load_fields(table, index, heap);
- dict_index_add_to_cache(table, index, page_no);
- }
-
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
- }
-
-func_exit:
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- return(error);
-}
-
-/************************************************************************
-Loads a table definition and also all its index definitions, and also
-the cluster definition if the table is a member in a cluster. Also loads
-all foreign key constraints where the foreign key is in the table or where
-a foreign key references columns in this table. Adds all these to the data
-dictionary cache. */
-
-dict_table_t*
-dict_load_table(
-/*============*/
- /* out: table, NULL if does not exist;
- if the table is stored in an .ibd file,
- but the file does not exist,
- then we set the ibd_file_missing flag TRUE
- in the table object we return */
- const char* name) /* in: table name in the
- databasename/tablename format */
-{
- ibool ibd_file_missing = FALSE;
- dict_table_t* table;
- dict_table_t* sys_tables;
- btr_pcur_t pcur;
- dict_index_t* sys_index;
- dtuple_t* tuple;
- mem_heap_t* heap;
- dfield_t* dfield;
- rec_t* rec;
- byte* field;
- ulint len;
- ulint space;
- ulint n_cols;
- ulint flags;
- ulint err;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- heap = mem_heap_create(32000);
-
- mtr_start(&mtr);
-
- sys_tables = dict_table_get_low("SYS_TABLES");
- sys_index = UT_LIST_GET_FIRST(sys_tables->indexes);
- ut_a(!dict_table_is_comp(sys_tables));
-
- tuple = dtuple_create(heap, 1);
- dfield = dtuple_get_nth_field(tuple, 0);
-
- dfield_set_data(dfield, name, ut_strlen(name));
- dict_index_copy_types(tuple, sys_index, 1);
-
- btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
- rec = btr_pcur_get_rec(&pcur);
-
- if (!btr_pcur_is_on_user_rec(&pcur, &mtr)
- || rec_get_deleted_flag(rec, 0)) {
- /* Not found */
-err_exit:
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap);
-
- return(NULL);
- }
-
- field = rec_get_nth_field_old(rec, 0, &len);
-
- /* Check if the table name in record is the searched one */
- if (len != ut_strlen(name) || ut_memcmp(name, field, len) != 0) {
-
- goto err_exit;
- }
-
- ut_a(name_of_col_is(sys_tables, sys_index, 9, "SPACE"));
-
- field = rec_get_nth_field_old(rec, 9, &len);
- space = mach_read_from_4(field);
-
- /* Check if the tablespace exists and has the right name */
- if (space != 0) {
- if (fil_space_for_table_exists_in_mem(space, name, FALSE,
- FALSE, FALSE)) {
- /* Ok; (if we did a crash recovery then the tablespace
- can already be in the memory cache) */
- } else {
- /* In >= 4.1.9, InnoDB scans the data dictionary also
- at a normal mysqld startup. It is an error if the
- space object does not exist in memory. */
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: error: space object of table %s,\n"
- "InnoDB: space id %lu did not exist in memory."
- " Retrying an open.\n",
- name, (ulong)space);
- /* Try to open the tablespace */
- if (!fil_open_single_table_tablespace(TRUE,
- space, name)) {
- /* We failed to find a sensible tablespace
- file */
-
- ibd_file_missing = TRUE;
- }
- }
- }
-
- ut_a(name_of_col_is(sys_tables, sys_index, 4, "N_COLS"));
-
- field = rec_get_nth_field_old(rec, 4, &len);
- n_cols = mach_read_from_4(field);
-
- flags = 0;
-
- /* The high-order bit of N_COLS is the "compact format" flag. */
- if (n_cols & 0x80000000UL) {
- flags |= DICT_TF_COMPACT;
- }
-
- table = dict_mem_table_create(name, space, n_cols & ~0x80000000UL,
- flags);
-
- table->ibd_file_missing = (unsigned int) ibd_file_missing;
-
- ut_a(name_of_col_is(sys_tables, sys_index, 3, "ID"));
-
- field = rec_get_nth_field_old(rec, 3, &len);
- table->id = mach_read_from_8(field);
-
- field = rec_get_nth_field_old(rec, 5, &len);
- if (UNIV_UNLIKELY(mach_read_from_4(field) != DICT_TABLE_ORDINARY)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: table %s: unknown table type %lu\n",
- name, (ulong) mach_read_from_4(field));
- goto err_exit;
- }
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- dict_load_columns(table, heap);
-
- dict_table_add_to_cache(table, heap);
-
- mem_heap_empty(heap);
-
- err = dict_load_indexes(table, heap);
-
- /* If the force recovery flag is set, we open the table irrespective
- of the error condition, since the user may want to dump data from the
- clustered index. However we load the foreign key information only if
- all indexes were loaded. */
- if (err == DB_SUCCESS) {
- err = dict_load_foreigns(table->name, TRUE);
- } else if (!srv_force_recovery) {
- dict_table_remove_from_cache(table);
- table = NULL;
- }
-#if 0
- if (err != DB_SUCCESS && table != NULL) {
-
- mutex_enter(&dict_foreign_err_mutex);
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Error: could not make a foreign key"
- " definition to match\n"
- "InnoDB: the foreign key table"
- " or the referenced table!\n"
- "InnoDB: The data dictionary of InnoDB is corrupt."
- " You may need to drop\n"
- "InnoDB: and recreate the foreign key table"
- " or the referenced table.\n"
- "InnoDB: Submit a detailed bug report"
- " to http://bugs.mysql.com\n"
- "InnoDB: Latest foreign key error printout:\n%s\n",
- dict_foreign_err_buf);
-
- mutex_exit(&dict_foreign_err_mutex);
- }
-#endif /* 0 */
- mem_heap_free(heap);
-
- return(table);
-}
-
-/***************************************************************************
-Loads a table object based on the table id. */
-
-dict_table_t*
-dict_load_table_on_id(
-/*==================*/
- /* out: table; NULL if table does not exist */
- dulint table_id) /* in: table id */
-{
- byte id_buf[8];
- btr_pcur_t pcur;
- mem_heap_t* heap;
- dtuple_t* tuple;
- dfield_t* dfield;
- dict_index_t* sys_table_ids;
- dict_table_t* sys_tables;
- rec_t* rec;
- byte* field;
- ulint len;
- dict_table_t* table;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- /* NOTE that the operation of this function is protected by
- the dictionary mutex, and therefore no deadlocks can occur
- with other dictionary operations. */
-
- mtr_start(&mtr);
- /*---------------------------------------------------*/
- /* Get the secondary index based on ID for table SYS_TABLES */
- sys_tables = dict_sys->sys_tables;
- sys_table_ids = dict_table_get_next_index(
- dict_table_get_first_index(sys_tables));
- ut_a(!dict_table_is_comp(sys_tables));
- heap = mem_heap_create(256);
-
- tuple = dtuple_create(heap, 1);
- dfield = dtuple_get_nth_field(tuple, 0);
-
- /* Write the table id in byte format to id_buf */
- mach_write_to_8(id_buf, table_id);
-
- dfield_set_data(dfield, id_buf, 8);
- dict_index_copy_types(tuple, sys_table_ids, 1);
-
- btr_pcur_open_on_user_rec(sys_table_ids, tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
- rec = btr_pcur_get_rec(&pcur);
-
- if (!btr_pcur_is_on_user_rec(&pcur, &mtr)
- || rec_get_deleted_flag(rec, 0)) {
- /* Not found */
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap);
-
- return(NULL);
- }
-
- /*---------------------------------------------------*/
- /* Now we have the record in the secondary index containing the
- table ID and NAME */
-
- rec = btr_pcur_get_rec(&pcur);
- field = rec_get_nth_field_old(rec, 0, &len);
- ut_ad(len == 8);
-
- /* Check if the table id in record is the one searched for */
- if (ut_dulint_cmp(table_id, mach_read_from_8(field)) != 0) {
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap);
-
- return(NULL);
- }
-
- /* Now we get the table name from the record */
- field = rec_get_nth_field_old(rec, 1, &len);
- /* Load the table definition to memory */
- table = dict_load_table(mem_heap_strdupl(heap, (char*) field, len));
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap);
-
- return(table);
-}
-
-/************************************************************************
-This function is called when the database is booted. Loads system table
-index definitions except for the clustered index which is added to the
-dictionary cache at booting before calling this function. */
-
-void
-dict_load_sys_table(
-/*================*/
- dict_table_t* table) /* in: system table */
-{
- mem_heap_t* heap;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- heap = mem_heap_create(1000);
-
- dict_load_indexes(table, heap);
-
- mem_heap_free(heap);
-}
-
-/************************************************************************
-Loads foreign key constraint col names (also for the referenced table). */
-static
-void
-dict_load_foreign_cols(
-/*===================*/
- const char* id, /* in: foreign constraint id as a
- null-terminated string */
- dict_foreign_t* foreign)/* in: foreign constraint object */
-{
- dict_table_t* sys_foreign_cols;
- dict_index_t* sys_index;
- btr_pcur_t pcur;
- dtuple_t* tuple;
- dfield_t* dfield;
- rec_t* rec;
- byte* field;
- ulint len;
- ulint i;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- foreign->foreign_col_names = mem_heap_alloc(
- foreign->heap, foreign->n_fields * sizeof(void*));
-
- foreign->referenced_col_names = mem_heap_alloc(
- foreign->heap, foreign->n_fields * sizeof(void*));
- mtr_start(&mtr);
-
- sys_foreign_cols = dict_table_get_low("SYS_FOREIGN_COLS");
- sys_index = UT_LIST_GET_FIRST(sys_foreign_cols->indexes);
- ut_a(!dict_table_is_comp(sys_foreign_cols));
-
- tuple = dtuple_create(foreign->heap, 1);
- dfield = dtuple_get_nth_field(tuple, 0);
-
- dfield_set_data(dfield, id, ut_strlen(id));
- dict_index_copy_types(tuple, sys_index, 1);
-
- btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
- for (i = 0; i < foreign->n_fields; i++) {
-
- rec = btr_pcur_get_rec(&pcur);
-
- ut_a(btr_pcur_is_on_user_rec(&pcur, &mtr));
- ut_a(!rec_get_deleted_flag(rec, 0));
-
- field = rec_get_nth_field_old(rec, 0, &len);
- ut_a(len == ut_strlen(id));
- ut_a(ut_memcmp(id, field, len) == 0);
-
- field = rec_get_nth_field_old(rec, 1, &len);
- ut_a(len == 4);
- ut_a(i == mach_read_from_4(field));
-
- field = rec_get_nth_field_old(rec, 4, &len);
- foreign->foreign_col_names[i] = mem_heap_strdupl(
- foreign->heap, (char*) field, len);
-
- field = rec_get_nth_field_old(rec, 5, &len);
- foreign->referenced_col_names[i] = mem_heap_strdupl(
- foreign->heap, (char*) field, len);
-
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
- }
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-}
-
-/***************************************************************************
-Loads a foreign key constraint to the dictionary cache. */
-static
-ulint
-dict_load_foreign(
-/*==============*/
- /* out: DB_SUCCESS or error code */
- const char* id, /* in: foreign constraint id as a
- null-terminated string */
- ibool check_charsets)
- /* in: TRUE=check charset compatibility */
-{
- dict_foreign_t* foreign;
- dict_table_t* sys_foreign;
- btr_pcur_t pcur;
- dict_index_t* sys_index;
- dtuple_t* tuple;
- mem_heap_t* heap2;
- dfield_t* dfield;
- rec_t* rec;
- byte* field;
- ulint len;
- ulint n_fields_and_type;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- heap2 = mem_heap_create(1000);
-
- mtr_start(&mtr);
-
- sys_foreign = dict_table_get_low("SYS_FOREIGN");
- sys_index = UT_LIST_GET_FIRST(sys_foreign->indexes);
- ut_a(!dict_table_is_comp(sys_foreign));
-
- tuple = dtuple_create(heap2, 1);
- dfield = dtuple_get_nth_field(tuple, 0);
-
- dfield_set_data(dfield, id, ut_strlen(id));
- dict_index_copy_types(tuple, sys_index, 1);
-
- btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
- rec = btr_pcur_get_rec(&pcur);
-
- if (!btr_pcur_is_on_user_rec(&pcur, &mtr)
- || rec_get_deleted_flag(rec, 0)) {
- /* Not found */
-
- fprintf(stderr,
- "InnoDB: Error A: cannot load foreign constraint %s\n",
- id);
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap2);
-
- return(DB_ERROR);
- }
-
- field = rec_get_nth_field_old(rec, 0, &len);
-
- /* Check if the id in record is the searched one */
- if (len != ut_strlen(id) || ut_memcmp(id, field, len) != 0) {
-
- fprintf(stderr,
- "InnoDB: Error B: cannot load foreign constraint %s\n",
- id);
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap2);
-
- return(DB_ERROR);
- }
-
- /* Read the table names and the number of columns associated
- with the constraint */
-
- mem_heap_free(heap2);
-
- foreign = dict_mem_foreign_create();
-
- n_fields_and_type = mach_read_from_4(
- rec_get_nth_field_old(rec, 5, &len));
-
- ut_a(len == 4);
-
- /* We store the type in the bits 24..29 of n_fields_and_type. */
-
- foreign->type = (unsigned int) (n_fields_and_type >> 24);
- foreign->n_fields = (unsigned int) (n_fields_and_type & 0x3FFUL);
-
- foreign->id = mem_heap_strdup(foreign->heap, id);
-
- field = rec_get_nth_field_old(rec, 3, &len);
- foreign->foreign_table_name = mem_heap_strdupl(
- foreign->heap, (char*) field, len);
-
- field = rec_get_nth_field_old(rec, 4, &len);
- foreign->referenced_table_name = mem_heap_strdupl(
- foreign->heap, (char*) field, len);
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- dict_load_foreign_cols(id, foreign);
-
- /* If the foreign table is not yet in the dictionary cache, we
- have to load it so that we are able to make type comparisons
- in the next function call. */
-
- dict_table_get_low(foreign->foreign_table_name);
-
- /* Note that there may already be a foreign constraint object in
- the dictionary cache for this constraint: then the following
- call only sets the pointers in it to point to the appropriate table
- and index objects and frees the newly created object foreign.
- Adding to the cache should always succeed since we are not creating
- a new foreign key constraint but loading one from the data
- dictionary. */
-
- return(dict_foreign_add_to_cache(foreign, check_charsets));
-}
-
-/***************************************************************************
-Loads foreign key constraints where the table is either the foreign key
-holder or where the table is referenced by a foreign key. Adds these
-constraints to the data dictionary. Note that we know that the dictionary
-cache already contains all constraints where the other relevant table is
-already in the dictionary cache. */
-
-ulint
-dict_load_foreigns(
-/*===============*/
- /* out: DB_SUCCESS or error code */
- const char* table_name, /* in: table name */
- ibool check_charsets) /* in: TRUE=check charset
- compatibility */
-{
- btr_pcur_t pcur;
- mem_heap_t* heap;
- dtuple_t* tuple;
- dfield_t* dfield;
- dict_index_t* sec_index;
- dict_table_t* sys_foreign;
- rec_t* rec;
- byte* field;
- ulint len;
- char* id ;
- ulint err;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- sys_foreign = dict_table_get_low("SYS_FOREIGN");
-
- if (sys_foreign == NULL) {
- /* No foreign keys defined yet in this database */
-
- fprintf(stderr,
- "InnoDB: Error: no foreign key system tables"
- " in the database\n");
-
- return(DB_ERROR);
- }
-
- ut_a(!dict_table_is_comp(sys_foreign));
- mtr_start(&mtr);
-
- /* Get the secondary index based on FOR_NAME from table
- SYS_FOREIGN */
-
- sec_index = dict_table_get_next_index(
- dict_table_get_first_index(sys_foreign));
-start_load:
- heap = mem_heap_create(256);
-
- tuple = dtuple_create(heap, 1);
- dfield = dtuple_get_nth_field(tuple, 0);
-
- dfield_set_data(dfield, table_name, ut_strlen(table_name));
- dict_index_copy_types(tuple, sec_index, 1);
-
- btr_pcur_open_on_user_rec(sec_index, tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
-loop:
- rec = btr_pcur_get_rec(&pcur);
-
- if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) {
- /* End of index */
-
- goto load_next_index;
- }
-
- /* Now we have the record in the secondary index containing a table
- name and a foreign constraint ID */
-
- rec = btr_pcur_get_rec(&pcur);
- field = rec_get_nth_field_old(rec, 0, &len);
-
- /* Check if the table name in the record is the one searched for; the
- following call does the comparison in the latin1_swedish_ci
- charset-collation, in a case-insensitive way. */
-
- if (0 != cmp_data_data(dfield_get_type(dfield)->mtype,
- dfield_get_type(dfield)->prtype,
- dfield_get_data(dfield), dfield_get_len(dfield),
- field, len)) {
-
- goto load_next_index;
- }
-
- /* Since table names in SYS_FOREIGN are stored in a case-insensitive
- order, we have to check that the table name matches also in a binary
- string comparison. On Unix, MySQL allows table names that only differ
- in character case. */
-
- if (0 != ut_memcmp(field, table_name, len)) {
-
- goto next_rec;
- }
-
- if (rec_get_deleted_flag(rec, 0)) {
-
- goto next_rec;
- }
-
- /* Now we get a foreign key constraint id */
- field = rec_get_nth_field_old(rec, 1, &len);
- id = mem_heap_strdupl(heap, (char*) field, len);
-
- btr_pcur_store_position(&pcur, &mtr);
-
- mtr_commit(&mtr);
-
- /* Load the foreign constraint definition to the dictionary cache */
-
- err = dict_load_foreign(id, check_charsets);
-
- if (err != DB_SUCCESS) {
- btr_pcur_close(&pcur);
- mem_heap_free(heap);
-
- return(err);
- }
-
- mtr_start(&mtr);
-
- btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr);
-next_rec:
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
-
- goto loop;
-
-load_next_index:
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap);
-
- sec_index = dict_table_get_next_index(sec_index);
-
- if (sec_index != NULL) {
-
- mtr_start(&mtr);
-
- goto start_load;
- }
-
- return(DB_SUCCESS);
-}
diff --git a/storage/innobase/dict/dict0mem.c b/storage/innobase/dict/dict0mem.c
deleted file mode 100644
index 168771ca307..00000000000
--- a/storage/innobase/dict/dict0mem.c
+++ /dev/null
@@ -1,344 +0,0 @@
-/**********************************************************************
-Data dictionary memory object creation
-
-(c) 1996 Innobase Oy
-
-Created 1/8/1996 Heikki Tuuri
-***********************************************************************/
-
-#include "dict0mem.h"
-
-#ifdef UNIV_NONINL
-#include "dict0mem.ic"
-#endif
-
-#include "rem0rec.h"
-#include "data0type.h"
-#include "mach0data.h"
-#include "dict0dict.h"
-#include "que0que.h"
-#include "pars0pars.h"
-#include "lock0lock.h"
-
-#define DICT_HEAP_SIZE 100 /* initial memory heap size when
- creating a table or index object */
-
-/**************************************************************************
-Creates a table memory object. */
-
-dict_table_t*
-dict_mem_table_create(
-/*==================*/
- /* out, own: table object */
- const char* name, /* in: table name */
- ulint space, /* in: space where the clustered index of
- the table is placed; this parameter is
- ignored if the table is made a member of
- a cluster */
- ulint n_cols, /* in: number of columns */
- ulint flags) /* in: table flags */
-{
- dict_table_t* table;
- mem_heap_t* heap;
-
- ut_ad(name);
- ut_ad(!(flags & ~DICT_TF_COMPACT));
-
- heap = mem_heap_create(DICT_HEAP_SIZE);
-
- table = mem_heap_alloc(heap, sizeof(dict_table_t));
-
- table->heap = heap;
-
- table->flags = (unsigned int) flags;
- table->name = mem_heap_strdup(heap, name);
- table->dir_path_of_temp_table = NULL;
- table->space = (unsigned int) space;
- table->ibd_file_missing = FALSE;
- table->tablespace_discarded = FALSE;
- table->n_def = 0;
- table->n_cols = (unsigned int) (n_cols + DATA_N_SYS_COLS);
-
- table->n_mysql_handles_opened = 0;
- table->n_foreign_key_checks_running = 0;
-
- table->cached = FALSE;
-
- table->cols = mem_heap_alloc(heap, (n_cols + DATA_N_SYS_COLS)
- * sizeof(dict_col_t));
- table->col_names = NULL;
- UT_LIST_INIT(table->indexes);
-
- table->auto_inc_lock = mem_heap_alloc(heap, lock_get_size());
-
- table->query_cache_inv_trx_id = ut_dulint_zero;
-
- UT_LIST_INIT(table->locks);
- UT_LIST_INIT(table->foreign_list);
- UT_LIST_INIT(table->referenced_list);
-
-#ifdef UNIV_DEBUG
- table->does_not_fit_in_memory = FALSE;
-#endif /* UNIV_DEBUG */
-
- table->stat_initialized = FALSE;
-
- table->stat_modified_counter = 0;
-
- table->big_rows = 0;
-
- mutex_create(&table->autoinc_mutex, SYNC_DICT_AUTOINC_MUTEX);
-
- table->autoinc = 0;
-
- /* The number of transactions that are either waiting on the
- AUTOINC lock or have been granted the lock. */
- table->n_waiting_or_granted_auto_inc_locks = 0;
-
-#ifdef UNIV_DEBUG
- table->magic_n = DICT_TABLE_MAGIC_N;
-#endif /* UNIV_DEBUG */
- return(table);
-}
-
-/********************************************************************
-Free a table memory object. */
-
-void
-dict_mem_table_free(
-/*================*/
- dict_table_t* table) /* in: table */
-{
- ut_ad(table);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- mutex_free(&(table->autoinc_mutex));
- mem_heap_free(table->heap);
-}
-
-/********************************************************************
-Append 'name' to 'col_names' (@see dict_table_t::col_names). */
-static
-const char*
-dict_add_col_name(
-/*==============*/
- /* out: new column names array */
- const char* col_names, /* in: existing column names, or
- NULL */
- ulint cols, /* in: number of existing columns */
- const char* name, /* in: new column name */
- mem_heap_t* heap) /* in: heap */
-{
- ulint old_len;
- ulint new_len;
- ulint total_len;
- char* res;
-
- ut_ad(!cols == !col_names);
-
- /* Find out length of existing array. */
- if (col_names) {
- const char* s = col_names;
- ulint i;
-
- for (i = 0; i < cols; i++) {
- s += strlen(s) + 1;
- }
-
- old_len = s - col_names;
- } else {
- old_len = 0;
- }
-
- new_len = strlen(name) + 1;
- total_len = old_len + new_len;
-
- res = mem_heap_alloc(heap, total_len);
-
- if (old_len > 0) {
- memcpy(res, col_names, old_len);
- }
-
- memcpy(res + old_len, name, new_len);
-
- return(res);
-}
-
-/**************************************************************************
-Adds a column definition to a table. */
-
-void
-dict_mem_table_add_col(
-/*===================*/
- dict_table_t* table, /* in: table */
- mem_heap_t* heap, /* in: temporary memory heap, or NULL */
- const char* name, /* in: column name, or NULL */
- ulint mtype, /* in: main datatype */
- ulint prtype, /* in: precise type */
- ulint len) /* in: precision */
-{
- dict_col_t* col;
- ulint mbminlen;
- ulint mbmaxlen;
- ulint i;
-
- ut_ad(table);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
- ut_ad(!heap == !name);
-
- i = table->n_def++;
-
- if (name) {
- if (UNIV_UNLIKELY(table->n_def == table->n_cols)) {
- heap = table->heap;
- }
- if (UNIV_LIKELY(i) && UNIV_UNLIKELY(!table->col_names)) {
- /* All preceding column names are empty. */
- char* s = mem_heap_alloc(heap, table->n_def);
- memset(s, 0, table->n_def);
- table->col_names = s;
- }
-
- table->col_names = dict_add_col_name(table->col_names,
- i, name, heap);
- }
-
- col = (dict_col_t*) dict_table_get_nth_col(table, i);
-
- col->ind = (unsigned int) i;
- col->ord_part = 0;
-
- col->mtype = (unsigned int) mtype;
- col->prtype = (unsigned int) prtype;
- col->len = (unsigned int) len;
-
- dtype_get_mblen(mtype, prtype, &mbminlen, &mbmaxlen);
-
- col->mbminlen = (unsigned int) mbminlen;
- col->mbmaxlen = (unsigned int) mbmaxlen;
-}
-
-/**************************************************************************
-Creates an index memory object. */
-
-dict_index_t*
-dict_mem_index_create(
-/*==================*/
- /* out, own: index object */
- const char* table_name, /* in: table name */
- const char* index_name, /* in: index name */
- ulint space, /* in: space where the index tree is
- placed, ignored if the index is of
- the clustered type */
- ulint type, /* in: DICT_UNIQUE,
- DICT_CLUSTERED, ... ORed */
- ulint n_fields) /* in: number of fields */
-{
- dict_index_t* index;
- mem_heap_t* heap;
-
- ut_ad(table_name && index_name);
-
- heap = mem_heap_create(DICT_HEAP_SIZE);
- index = mem_heap_alloc(heap, sizeof(dict_index_t));
-
- index->heap = heap;
-
- index->type = type;
- index->space = (unsigned int) space;
- index->page = 0;
- index->name = mem_heap_strdup(heap, index_name);
- index->table_name = table_name;
- index->table = NULL;
- index->n_def = index->n_nullable = 0;
- index->n_fields = (unsigned int) n_fields;
- index->fields = mem_heap_alloc(heap, 1 + n_fields
- * sizeof(dict_field_t));
- /* The '1 +' above prevents allocation
- of an empty mem block */
- index->stat_n_diff_key_vals = NULL;
-
- index->cached = FALSE;
- memset(&index->lock, 0, sizeof index->lock);
-#ifdef UNIV_DEBUG
- index->magic_n = DICT_INDEX_MAGIC_N;
-#endif /* UNIV_DEBUG */
- return(index);
-}
-
-/**************************************************************************
-Creates and initializes a foreign constraint memory object. */
-
-dict_foreign_t*
-dict_mem_foreign_create(void)
-/*=========================*/
- /* out, own: foreign constraint struct */
-{
- dict_foreign_t* foreign;
- mem_heap_t* heap;
-
- heap = mem_heap_create(100);
-
- foreign = mem_heap_alloc(heap, sizeof(dict_foreign_t));
-
- foreign->heap = heap;
-
- foreign->id = NULL;
-
- foreign->type = 0;
- foreign->foreign_table_name = NULL;
- foreign->foreign_table = NULL;
- foreign->foreign_col_names = NULL;
-
- foreign->referenced_table_name = NULL;
- foreign->referenced_table = NULL;
- foreign->referenced_col_names = NULL;
-
- foreign->n_fields = 0;
-
- foreign->foreign_index = NULL;
- foreign->referenced_index = NULL;
-
- return(foreign);
-}
-
-/**************************************************************************
-Adds a field definition to an index. NOTE: does not take a copy
-of the column name if the field is a column. The memory occupied
-by the column name may be released only after publishing the index. */
-
-void
-dict_mem_index_add_field(
-/*=====================*/
- dict_index_t* index, /* in: index */
- const char* name, /* in: column name */
- ulint prefix_len) /* in: 0 or the column prefix length
- in a MySQL index like
- INDEX (textcol(25)) */
-{
- dict_field_t* field;
-
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- index->n_def++;
-
- field = dict_index_get_nth_field(index, index->n_def - 1);
-
- field->name = name;
- field->prefix_len = (unsigned int) prefix_len;
-}
-
-/**************************************************************************
-Frees an index memory object. */
-
-void
-dict_mem_index_free(
-/*================*/
- dict_index_t* index) /* in: index */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- mem_heap_free(index->heap);
-}
diff --git a/storage/innobase/dyn/dyn0dyn.c b/storage/innobase/dyn/dyn0dyn.c
deleted file mode 100644
index bcf2fda2b08..00000000000
--- a/storage/innobase/dyn/dyn0dyn.c
+++ /dev/null
@@ -1,48 +0,0 @@
-/******************************************************
-The dynamically allocated array
-
-(c) 1996 Innobase Oy
-
-Created 2/5/1996 Heikki Tuuri
-*******************************************************/
-
-#include "dyn0dyn.h"
-#ifdef UNIV_NONINL
-#include "dyn0dyn.ic"
-#endif
-
-/****************************************************************
-Adds a new block to a dyn array. */
-
-dyn_block_t*
-dyn_array_add_block(
-/*================*/
- /* out: created block */
- dyn_array_t* arr) /* in: dyn array */
-{
- mem_heap_t* heap;
- dyn_block_t* block;
-
- ut_ad(arr);
- ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
-
- if (arr->heap == NULL) {
- UT_LIST_INIT(arr->base);
- UT_LIST_ADD_FIRST(list, arr->base, arr);
-
- arr->heap = mem_heap_create(sizeof(dyn_block_t));
- }
-
- block = dyn_array_get_last_block(arr);
- block->used = block->used | DYN_BLOCK_FULL_FLAG;
-
- heap = arr->heap;
-
- block = mem_heap_alloc(heap, sizeof(dyn_block_t));
-
- block->used = 0;
-
- UT_LIST_ADD_LAST(list, arr->base, block);
-
- return(block);
-}
diff --git a/storage/innobase/eval/eval0eval.c b/storage/innobase/eval/eval0eval.c
deleted file mode 100644
index cbc47ec508f..00000000000
--- a/storage/innobase/eval/eval0eval.c
+++ /dev/null
@@ -1,836 +0,0 @@
-/******************************************************
-SQL evaluator: evaluates simple data structures, like expressions, in
-a query graph
-
-(c) 1997 Innobase Oy
-
-Created 12/29/1997 Heikki Tuuri
-*******************************************************/
-
-#include "eval0eval.h"
-
-#ifdef UNIV_NONINL
-#include "eval0eval.ic"
-#endif
-
-#include "data0data.h"
-#include "row0sel.h"
-
-/* The RND function seed */
-ulint eval_rnd = 128367121;
-
-/* Dummy adress used when we should allocate a buffer of size 0 in
-the function below */
-
-byte eval_dummy;
-
-/*********************************************************************
-Allocate a buffer from global dynamic memory for a value of a que_node.
-NOTE that this memory must be explicitly freed when the query graph is
-freed. If the node already has an allocated buffer, that buffer is freed
-here. NOTE that this is the only function where dynamic memory should be
-allocated for a query node val field. */
-
-byte*
-eval_node_alloc_val_buf(
-/*====================*/
- /* out: pointer to allocated buffer */
- que_node_t* node, /* in: query graph node; sets the val field
- data field to point to the new buffer, and
- len field equal to size */
- ulint size) /* in: buffer size */
-{
- dfield_t* dfield;
- byte* data;
-
- ut_ad(que_node_get_type(node) == QUE_NODE_SYMBOL
- || que_node_get_type(node) == QUE_NODE_FUNC);
-
- dfield = que_node_get_val(node);
-
- data = dfield_get_data(dfield);
-
- if (data && data != &eval_dummy) {
- mem_free(data);
- }
-
- if (size == 0) {
- data = &eval_dummy;
- } else {
- data = mem_alloc(size);
- }
-
- que_node_set_val_buf_size(node, size);
-
- dfield_set_data(dfield, data, size);
-
- return(data);
-}
-
-/*********************************************************************
-Free the buffer from global dynamic memory for a value of a que_node,
-if it has been allocated in the above function. The freeing for pushed
-column values is done in sel_col_prefetch_buf_free. */
-
-void
-eval_node_free_val_buf(
-/*===================*/
- que_node_t* node) /* in: query graph node */
-{
- dfield_t* dfield;
- byte* data;
-
- ut_ad(que_node_get_type(node) == QUE_NODE_SYMBOL
- || que_node_get_type(node) == QUE_NODE_FUNC);
-
- dfield = que_node_get_val(node);
-
- data = dfield_get_data(dfield);
-
- if (que_node_get_val_buf_size(node) > 0) {
- ut_a(data);
-
- mem_free(data);
- }
-}
-
-/*********************************************************************
-Evaluates a comparison node. */
-
-ibool
-eval_cmp(
-/*=====*/
- /* out: the result of the comparison */
- func_node_t* cmp_node) /* in: comparison node */
-{
- que_node_t* arg1;
- que_node_t* arg2;
- int res;
- ibool val;
- int func;
-
- ut_ad(que_node_get_type(cmp_node) == QUE_NODE_FUNC);
-
- arg1 = cmp_node->args;
- arg2 = que_node_get_next(arg1);
-
- res = cmp_dfield_dfield(que_node_get_val(arg1),
- que_node_get_val(arg2));
- val = TRUE;
-
- func = cmp_node->func;
-
- if (func == '=') {
- if (res != 0) {
- val = FALSE;
- }
- } else if (func == '<') {
- if (res != -1) {
- val = FALSE;
- }
- } else if (func == PARS_LE_TOKEN) {
- if (res == 1) {
- val = FALSE;
- }
- } else if (func == PARS_NE_TOKEN) {
- if (res == 0) {
- val = FALSE;
- }
- } else if (func == PARS_GE_TOKEN) {
- if (res == -1) {
- val = FALSE;
- }
- } else {
- ut_ad(func == '>');
-
- if (res != 1) {
- val = FALSE;
- }
- }
-
- eval_node_set_ibool_val(cmp_node, val);
-
- return(val);
-}
-
-/*********************************************************************
-Evaluates a logical operation node. */
-UNIV_INLINE
-void
-eval_logical(
-/*=========*/
- func_node_t* logical_node) /* in: logical operation node */
-{
- que_node_t* arg1;
- que_node_t* arg2;
- ibool val1;
- ibool val2 = 0; /* remove warning */
- ibool val = 0; /* remove warning */
- int func;
-
- ut_ad(que_node_get_type(logical_node) == QUE_NODE_FUNC);
-
- arg1 = logical_node->args;
- arg2 = que_node_get_next(arg1); /* arg2 is NULL if func is 'NOT' */
-
- val1 = eval_node_get_ibool_val(arg1);
-
- if (arg2) {
- val2 = eval_node_get_ibool_val(arg2);
- }
-
- func = logical_node->func;
-
- if (func == PARS_AND_TOKEN) {
- val = val1 & val2;
- } else if (func == PARS_OR_TOKEN) {
- val = val1 | val2;
- } else if (func == PARS_NOT_TOKEN) {
- val = TRUE - val1;
- } else {
- ut_error;
- }
-
- eval_node_set_ibool_val(logical_node, val);
-}
-
-/*********************************************************************
-Evaluates an arithmetic operation node. */
-UNIV_INLINE
-void
-eval_arith(
-/*=======*/
- func_node_t* arith_node) /* in: arithmetic operation node */
-{
- que_node_t* arg1;
- que_node_t* arg2;
- lint val1;
- lint val2 = 0; /* remove warning */
- lint val;
- int func;
-
- ut_ad(que_node_get_type(arith_node) == QUE_NODE_FUNC);
-
- arg1 = arith_node->args;
- arg2 = que_node_get_next(arg1); /* arg2 is NULL if func is unary '-' */
-
- val1 = eval_node_get_int_val(arg1);
-
- if (arg2) {
- val2 = eval_node_get_int_val(arg2);
- }
-
- func = arith_node->func;
-
- if (func == '+') {
- val = val1 + val2;
- } else if ((func == '-') && arg2) {
- val = val1 - val2;
- } else if (func == '-') {
- val = -val1;
- } else if (func == '*') {
- val = val1 * val2;
- } else {
- ut_ad(func == '/');
- val = val1 / val2;
- }
-
- eval_node_set_int_val(arith_node, val);
-}
-
-/*********************************************************************
-Evaluates an aggregate operation node. */
-UNIV_INLINE
-void
-eval_aggregate(
-/*===========*/
- func_node_t* node) /* in: aggregate operation node */
-{
- que_node_t* arg;
- lint val;
- lint arg_val;
- int func;
-
- ut_ad(que_node_get_type(node) == QUE_NODE_FUNC);
-
- val = eval_node_get_int_val(node);
-
- func = node->func;
-
- if (func == PARS_COUNT_TOKEN) {
-
- val = val + 1;
- } else {
- ut_ad(func == PARS_SUM_TOKEN);
-
- arg = node->args;
- arg_val = eval_node_get_int_val(arg);
-
- val = val + arg_val;
- }
-
- eval_node_set_int_val(node, val);
-}
-
-/*********************************************************************
-Evaluates a predefined function node where the function is not relevant
-in benchmarks. */
-static
-void
-eval_predefined_2(
-/*==============*/
- func_node_t* func_node) /* in: predefined function node */
-{
- que_node_t* arg;
- que_node_t* arg1;
- que_node_t* arg2 = 0; /* remove warning (??? bug ???) */
- lint int_val;
- byte* data;
- ulint len1;
- ulint len2;
- int func;
- ulint i;
-
- ut_ad(que_node_get_type(func_node) == QUE_NODE_FUNC);
-
- arg1 = func_node->args;
-
- if (arg1) {
- arg2 = que_node_get_next(arg1);
- }
-
- func = func_node->func;
-
- if (func == PARS_PRINTF_TOKEN) {
-
- arg = arg1;
-
- while (arg) {
- dfield_print(que_node_get_val(arg));
-
- arg = que_node_get_next(arg);
- }
-
- putc('\n', stderr);
-
- } else if (func == PARS_ASSERT_TOKEN) {
-
- if (!eval_node_get_ibool_val(arg1)) {
- fputs("SQL assertion fails in a stored procedure!\n",
- stderr);
- }
-
- ut_a(eval_node_get_ibool_val(arg1));
-
- /* This function, or more precisely, a debug procedure,
- returns no value */
-
- } else if (func == PARS_RND_TOKEN) {
-
- len1 = (ulint)eval_node_get_int_val(arg1);
- len2 = (ulint)eval_node_get_int_val(arg2);
-
- ut_ad(len2 >= len1);
-
- if (len2 > len1) {
- int_val = (lint) (len1
- + (eval_rnd % (len2 - len1 + 1)));
- } else {
- int_val = (lint) len1;
- }
-
- eval_rnd = ut_rnd_gen_next_ulint(eval_rnd);
-
- eval_node_set_int_val(func_node, int_val);
-
- } else if (func == PARS_RND_STR_TOKEN) {
-
- len1 = (ulint)eval_node_get_int_val(arg1);
-
- data = eval_node_ensure_val_buf(func_node, len1);
-
- for (i = 0; i < len1; i++) {
- data[i] = (byte)(97 + (eval_rnd % 3));
-
- eval_rnd = ut_rnd_gen_next_ulint(eval_rnd);
- }
- } else {
- ut_error;
- }
-}
-
-/*********************************************************************
-Evaluates a notfound-function node. */
-UNIV_INLINE
-void
-eval_notfound(
-/*==========*/
- func_node_t* func_node) /* in: function node */
-{
- que_node_t* arg1;
- que_node_t* arg2;
- sym_node_t* cursor;
- sel_node_t* sel_node;
- ibool ibool_val;
-
- arg1 = func_node->args;
- arg2 = que_node_get_next(arg1);
-
- ut_ad(func_node->func == PARS_NOTFOUND_TOKEN);
-
- cursor = arg1;
-
- ut_ad(que_node_get_type(cursor) == QUE_NODE_SYMBOL);
-
- if (cursor->token_type == SYM_LIT) {
-
- ut_ad(ut_memcmp(dfield_get_data(que_node_get_val(cursor)),
- "SQL", 3) == 0);
-
- sel_node = cursor->sym_table->query_graph->last_sel_node;
- } else {
- sel_node = cursor->alias->cursor_def;
- }
-
- if (sel_node->state == SEL_NODE_NO_MORE_ROWS) {
- ibool_val = TRUE;
- } else {
- ibool_val = FALSE;
- }
-
- eval_node_set_ibool_val(func_node, ibool_val);
-}
-
-/*********************************************************************
-Evaluates a substr-function node. */
-UNIV_INLINE
-void
-eval_substr(
-/*========*/
- func_node_t* func_node) /* in: function node */
-{
- que_node_t* arg1;
- que_node_t* arg2;
- que_node_t* arg3;
- dfield_t* dfield;
- byte* str1;
- ulint len1;
- ulint len2;
-
- arg1 = func_node->args;
- arg2 = que_node_get_next(arg1);
-
- ut_ad(func_node->func == PARS_SUBSTR_TOKEN);
-
- arg3 = que_node_get_next(arg2);
-
- str1 = dfield_get_data(que_node_get_val(arg1));
-
- len1 = (ulint)eval_node_get_int_val(arg2);
- len2 = (ulint)eval_node_get_int_val(arg3);
-
- dfield = que_node_get_val(func_node);
-
- dfield_set_data(dfield, str1 + len1, len2);
-}
-
-/*********************************************************************
-Evaluates a replstr-procedure node. */
-static
-void
-eval_replstr(
-/*=========*/
- func_node_t* func_node) /* in: function node */
-{
- que_node_t* arg1;
- que_node_t* arg2;
- que_node_t* arg3;
- que_node_t* arg4;
- byte* str1;
- byte* str2;
- ulint len1;
- ulint len2;
-
- arg1 = func_node->args;
- arg2 = que_node_get_next(arg1);
-
- ut_ad(que_node_get_type(arg1) == QUE_NODE_SYMBOL);
-
- arg3 = que_node_get_next(arg2);
- arg4 = que_node_get_next(arg3);
-
- str1 = dfield_get_data(que_node_get_val(arg1));
- str2 = dfield_get_data(que_node_get_val(arg2));
-
- len1 = (ulint)eval_node_get_int_val(arg3);
- len2 = (ulint)eval_node_get_int_val(arg4);
-
- if ((dfield_get_len(que_node_get_val(arg1)) < len1 + len2)
- || (dfield_get_len(que_node_get_val(arg2)) < len2)) {
-
- ut_error;
- }
-
- ut_memcpy(str1 + len1, str2, len2);
-}
-
-/*********************************************************************
-Evaluates an instr-function node. */
-static
-void
-eval_instr(
-/*=======*/
- func_node_t* func_node) /* in: function node */
-{
- que_node_t* arg1;
- que_node_t* arg2;
- dfield_t* dfield1;
- dfield_t* dfield2;
- lint int_val;
- byte* str1;
- byte* str2;
- byte match_char;
- ulint len1;
- ulint len2;
- ulint i;
- ulint j;
-
- arg1 = func_node->args;
- arg2 = que_node_get_next(arg1);
-
- dfield1 = que_node_get_val(arg1);
- dfield2 = que_node_get_val(arg2);
-
- str1 = dfield_get_data(dfield1);
- str2 = dfield_get_data(dfield2);
-
- len1 = dfield_get_len(dfield1);
- len2 = dfield_get_len(dfield2);
-
- if (len2 == 0) {
- ut_error;
- }
-
- match_char = str2[0];
-
- for (i = 0; i < len1; i++) {
- /* In this outer loop, the number of matched characters is 0 */
-
- if (str1[i] == match_char) {
-
- if (i + len2 > len1) {
-
- break;
- }
-
- for (j = 1;; j++) {
- /* We have already matched j characters */
-
- if (j == len2) {
- int_val = i + 1;
-
- goto match_found;
- }
-
- if (str1[i + j] != str2[j]) {
-
- break;
- }
- }
- }
- }
-
- int_val = 0;
-
-match_found:
- eval_node_set_int_val(func_node, int_val);
-}
-
-/*********************************************************************
-Evaluates a predefined function node. */
-UNIV_INLINE
-void
-eval_binary_to_number(
-/*==================*/
- func_node_t* func_node) /* in: function node */
-{
- que_node_t* arg1;
- dfield_t* dfield;
- byte* str1;
- byte* str2;
- ulint len1;
- ulint int_val;
-
- arg1 = func_node->args;
-
- dfield = que_node_get_val(arg1);
-
- str1 = dfield_get_data(dfield);
- len1 = dfield_get_len(dfield);
-
- if (len1 > 4) {
- ut_error;
- }
-
- if (len1 == 4) {
- str2 = str1;
- } else {
- int_val = 0;
- str2 = (byte*)&int_val;
-
- ut_memcpy(str2 + (4 - len1), str1, len1);
- }
-
- eval_node_copy_and_alloc_val(func_node, str2, 4);
-}
-
-/*********************************************************************
-Evaluates a predefined function node. */
-static
-void
-eval_concat(
-/*========*/
- func_node_t* func_node) /* in: function node */
-{
- que_node_t* arg;
- dfield_t* dfield;
- byte* data;
- ulint len;
- ulint len1;
-
- arg = func_node->args;
- len = 0;
-
- while (arg) {
- len1 = dfield_get_len(que_node_get_val(arg));
-
- len += len1;
-
- arg = que_node_get_next(arg);
- }
-
- data = eval_node_ensure_val_buf(func_node, len);
-
- arg = func_node->args;
- len = 0;
-
- while (arg) {
- dfield = que_node_get_val(arg);
- len1 = dfield_get_len(dfield);
-
- ut_memcpy(data + len, dfield_get_data(dfield), len1);
-
- len += len1;
-
- arg = que_node_get_next(arg);
- }
-}
-
-/*********************************************************************
-Evaluates a predefined function node. If the first argument is an integer,
-this function looks at the second argument which is the integer length in
-bytes, and converts the integer to a VARCHAR.
-If the first argument is of some other type, this function converts it to
-BINARY. */
-UNIV_INLINE
-void
-eval_to_binary(
-/*===========*/
- func_node_t* func_node) /* in: function node */
-{
- que_node_t* arg1;
- que_node_t* arg2;
- dfield_t* dfield;
- byte* str1;
- ulint len;
- ulint len1;
-
- arg1 = func_node->args;
-
- str1 = dfield_get_data(que_node_get_val(arg1));
-
- if (dtype_get_mtype(que_node_get_data_type(arg1)) != DATA_INT) {
-
- len = dfield_get_len(que_node_get_val(arg1));
-
- dfield = que_node_get_val(func_node);
-
- dfield_set_data(dfield, str1, len);
-
- return;
- }
-
- arg2 = que_node_get_next(arg1);
-
- len1 = (ulint)eval_node_get_int_val(arg2);
-
- if (len1 > 4) {
-
- ut_error;
- }
-
- dfield = que_node_get_val(func_node);
-
- dfield_set_data(dfield, str1 + (4 - len1), len1);
-}
-
-/*********************************************************************
-Evaluates a predefined function node. */
-UNIV_INLINE
-void
-eval_predefined(
-/*============*/
- func_node_t* func_node) /* in: function node */
-{
- que_node_t* arg1;
- lint int_val;
- byte* data;
- int func;
-
- func = func_node->func;
-
- arg1 = func_node->args;
-
- if (func == PARS_LENGTH_TOKEN) {
-
- int_val = (lint)dfield_get_len(que_node_get_val(arg1));
-
- } else if (func == PARS_TO_CHAR_TOKEN) {
-
- /* Convert number to character string as a
- signed decimal integer. */
-
- ulint uint_val;
- int int_len;
-
- int_val = eval_node_get_int_val(arg1);
-
- /* Determine the length of the string. */
-
- if (int_val == 0) {
- int_len = 1; /* the number 0 occupies 1 byte */
- } else {
- int_len = 0;
- if (int_val < 0) {
- uint_val = ((ulint) -int_val - 1) + 1;
- int_len++; /* reserve space for minus sign */
- } else {
- uint_val = (ulint) int_val;
- }
- for (; uint_val > 0; int_len++) {
- uint_val /= 10;
- }
- }
-
- /* allocate the string */
- data = eval_node_ensure_val_buf(func_node, int_len + 1);
-
- /* add terminating NUL character */
- data[int_len] = 0;
-
- /* convert the number */
-
- if (int_val == 0) {
- data[0] = '0';
- } else {
- int tmp;
- if (int_val < 0) {
- data[0] = '-'; /* preceding minus sign */
- uint_val = ((ulint) -int_val - 1) + 1;
- } else {
- uint_val = (ulint) int_val;
- }
- for (tmp = int_len; uint_val > 0; uint_val /= 10) {
- data[--tmp] = (byte)
- ('0' + (byte)(uint_val % 10));
- }
- }
-
- dfield_set_len((dfield_t*) que_node_get_val(func_node),
- int_len);
-
- return;
-
- } else if (func == PARS_TO_NUMBER_TOKEN) {
-
- int_val = atoi((char*)
- dfield_get_data(que_node_get_val(arg1)));
-
- } else if (func == PARS_SYSDATE_TOKEN) {
- int_val = (lint)ut_time();
- } else {
- eval_predefined_2(func_node);
-
- return;
- }
-
- eval_node_set_int_val(func_node, int_val);
-}
-
-/*********************************************************************
-Evaluates a function node. */
-
-void
-eval_func(
-/*======*/
- func_node_t* func_node) /* in: function node */
-{
- que_node_t* arg;
- ulint class;
- ulint func;
-
- ut_ad(que_node_get_type(func_node) == QUE_NODE_FUNC);
-
- class = func_node->class;
- func = func_node->func;
-
- arg = func_node->args;
-
- /* Evaluate first the argument list */
- while (arg) {
- eval_exp(arg);
-
- /* The functions are not defined for SQL null argument
- values, except for eval_cmp and notfound */
-
- if ((dfield_get_len(que_node_get_val(arg)) == UNIV_SQL_NULL)
- && (class != PARS_FUNC_CMP)
- && (func != PARS_NOTFOUND_TOKEN)
- && (func != PARS_PRINTF_TOKEN)) {
- ut_error;
- }
-
- arg = que_node_get_next(arg);
- }
-
- if (class == PARS_FUNC_CMP) {
- eval_cmp(func_node);
- } else if (class == PARS_FUNC_ARITH) {
- eval_arith(func_node);
- } else if (class == PARS_FUNC_AGGREGATE) {
- eval_aggregate(func_node);
- } else if (class == PARS_FUNC_PREDEFINED) {
-
- if (func == PARS_NOTFOUND_TOKEN) {
- eval_notfound(func_node);
- } else if (func == PARS_SUBSTR_TOKEN) {
- eval_substr(func_node);
- } else if (func == PARS_REPLSTR_TOKEN) {
- eval_replstr(func_node);
- } else if (func == PARS_INSTR_TOKEN) {
- eval_instr(func_node);
- } else if (func == PARS_BINARY_TO_NUMBER_TOKEN) {
- eval_binary_to_number(func_node);
- } else if (func == PARS_CONCAT_TOKEN) {
- eval_concat(func_node);
- } else if (func == PARS_TO_BINARY_TOKEN) {
- eval_to_binary(func_node);
- } else {
- eval_predefined(func_node);
- }
- } else {
- ut_ad(class == PARS_FUNC_LOGICAL);
-
- eval_logical(func_node);
- }
-}
diff --git a/storage/innobase/eval/eval0proc.c b/storage/innobase/eval/eval0proc.c
deleted file mode 100644
index a513e8e4024..00000000000
--- a/storage/innobase/eval/eval0proc.c
+++ /dev/null
@@ -1,278 +0,0 @@
-/******************************************************
-Executes SQL stored procedures and their control structures
-
-(c) 1998 Innobase Oy
-
-Created 1/20/1998 Heikki Tuuri
-*******************************************************/
-
-#include "eval0proc.h"
-
-#ifdef UNIV_NONINL
-#include "eval0proc.ic"
-#endif
-
-/**************************************************************************
-Performs an execution step of an if-statement node. */
-
-que_thr_t*
-if_step(
-/*====*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr) /* in: query thread */
-{
- if_node_t* node;
- elsif_node_t* elsif_node;
-
- ut_ad(thr);
-
- node = thr->run_node;
- ut_ad(que_node_get_type(node) == QUE_NODE_IF);
-
- if (thr->prev_node == que_node_get_parent(node)) {
-
- /* Evaluate the condition */
-
- eval_exp(node->cond);
-
- if (eval_node_get_ibool_val(node->cond)) {
-
- /* The condition evaluated to TRUE: start execution
- from the first statement in the statement list */
-
- thr->run_node = node->stat_list;
-
- } else if (node->else_part) {
- thr->run_node = node->else_part;
-
- } else if (node->elsif_list) {
- elsif_node = node->elsif_list;
-
- for (;;) {
- eval_exp(elsif_node->cond);
-
- if (eval_node_get_ibool_val(
- elsif_node->cond)) {
-
- /* The condition evaluated to TRUE:
- start execution from the first
- statement in the statement list */
-
- thr->run_node = elsif_node->stat_list;
-
- break;
- }
-
- elsif_node = que_node_get_next(elsif_node);
-
- if (elsif_node == NULL) {
- thr->run_node = NULL;
-
- break;
- }
- }
- } else {
- thr->run_node = NULL;
- }
- } else {
- /* Move to the next statement */
- ut_ad(que_node_get_next(thr->prev_node) == NULL);
-
- thr->run_node = NULL;
- }
-
- if (thr->run_node == NULL) {
- thr->run_node = que_node_get_parent(node);
- }
-
- return(thr);
-}
-
-/**************************************************************************
-Performs an execution step of a while-statement node. */
-
-que_thr_t*
-while_step(
-/*=======*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr) /* in: query thread */
-{
- while_node_t* node;
-
- ut_ad(thr);
-
- node = thr->run_node;
- ut_ad(que_node_get_type(node) == QUE_NODE_WHILE);
-
- ut_ad((thr->prev_node == que_node_get_parent(node))
- || (que_node_get_next(thr->prev_node) == NULL));
-
- /* Evaluate the condition */
-
- eval_exp(node->cond);
-
- if (eval_node_get_ibool_val(node->cond)) {
-
- /* The condition evaluated to TRUE: start execution
- from the first statement in the statement list */
-
- thr->run_node = node->stat_list;
- } else {
- thr->run_node = que_node_get_parent(node);
- }
-
- return(thr);
-}
-
-/**************************************************************************
-Performs an execution step of an assignment statement node. */
-
-que_thr_t*
-assign_step(
-/*========*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr) /* in: query thread */
-{
- assign_node_t* node;
-
- ut_ad(thr);
-
- node = thr->run_node;
- ut_ad(que_node_get_type(node) == QUE_NODE_ASSIGNMENT);
-
- /* Evaluate the value to assign */
-
- eval_exp(node->val);
-
- eval_node_copy_val(node->var->alias, node->val);
-
- thr->run_node = que_node_get_parent(node);
-
- return(thr);
-}
-
-/**************************************************************************
-Performs an execution step of a for-loop node. */
-
-que_thr_t*
-for_step(
-/*=====*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr) /* in: query thread */
-{
- for_node_t* node;
- que_node_t* parent;
- lint loop_var_value;
-
- ut_ad(thr);
-
- node = thr->run_node;
-
- ut_ad(que_node_get_type(node) == QUE_NODE_FOR);
-
- parent = que_node_get_parent(node);
-
- if (thr->prev_node != parent) {
-
- /* Move to the next statement */
- thr->run_node = que_node_get_next(thr->prev_node);
-
- if (thr->run_node != NULL) {
-
- return(thr);
- }
-
- /* Increment the value of loop_var */
-
- loop_var_value = 1 + eval_node_get_int_val(node->loop_var);
- } else {
- /* Initialize the loop */
-
- eval_exp(node->loop_start_limit);
- eval_exp(node->loop_end_limit);
-
- loop_var_value = eval_node_get_int_val(node->loop_start_limit);
-
- node->loop_end_value
- = (int) eval_node_get_int_val(node->loop_end_limit);
- }
-
- /* Check if we should do another loop */
-
- if (loop_var_value > node->loop_end_value) {
-
- /* Enough loops done */
-
- thr->run_node = parent;
- } else {
- eval_node_set_int_val(node->loop_var, loop_var_value);
-
- thr->run_node = node->stat_list;
- }
-
- return(thr);
-}
-
-/**************************************************************************
-Performs an execution step of an exit statement node. */
-
-que_thr_t*
-exit_step(
-/*======*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr) /* in: query thread */
-{
- exit_node_t* node;
- que_node_t* loop_node;
-
- ut_ad(thr);
-
- node = thr->run_node;
-
- ut_ad(que_node_get_type(node) == QUE_NODE_EXIT);
-
- /* Loops exit by setting thr->run_node as the loop node's parent, so
- find our containing loop node and get its parent. */
-
- loop_node = que_node_get_containing_loop_node(node);
-
- /* If someone uses an EXIT statement outside of a loop, this will
- trigger. */
- ut_a(loop_node);
-
- thr->run_node = que_node_get_parent(loop_node);
-
- return(thr);
-}
-
-/**************************************************************************
-Performs an execution step of a return-statement node. */
-
-que_thr_t*
-return_step(
-/*========*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr) /* in: query thread */
-{
- return_node_t* node;
- que_node_t* parent;
-
- ut_ad(thr);
-
- node = thr->run_node;
-
- ut_ad(que_node_get_type(node) == QUE_NODE_RETURN);
-
- parent = node;
-
- while (que_node_get_type(parent) != QUE_NODE_PROC) {
-
- parent = que_node_get_parent(parent);
- }
-
- ut_a(parent);
-
- thr->run_node = que_node_get_parent(parent);
-
- return(thr);
-}
diff --git a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c
deleted file mode 100644
index c63d67cae60..00000000000
--- a/storage/innobase/fil/fil0fil.c
+++ /dev/null
@@ -1,4566 +0,0 @@
-/******************************************************
-The tablespace memory cache
-
-(c) 1995 Innobase Oy
-
-Created 10/25/1995 Heikki Tuuri
-*******************************************************/
-
-#include "fil0fil.h"
-
-#include "mem0mem.h"
-#include "sync0sync.h"
-#include "hash0hash.h"
-#include "os0file.h"
-#include "os0sync.h"
-#include "mach0data.h"
-#include "ibuf0ibuf.h"
-#include "buf0buf.h"
-#include "buf0flu.h"
-#include "buf0lru.h"
-#include "log0log.h"
-#include "log0recv.h"
-#include "fsp0fsp.h"
-#include "srv0srv.h"
-#include "srv0start.h"
-#include "mtr0mtr.h"
-#include "mtr0log.h"
-#include "dict0dict.h"
-
-
-/*
- IMPLEMENTATION OF THE TABLESPACE MEMORY CACHE
- =============================================
-
-The tablespace cache is responsible for providing fast read/write access to
-tablespaces and logs of the database. File creation and deletion is done
-in other modules which know more of the logic of the operation, however.
-
-A tablespace consists of a chain of files. The size of the files does not
-have to be divisible by the database block size, because we may just leave
-the last incomplete block unused. When a new file is appended to the
-tablespace, the maximum size of the file is also specified. At the moment,
-we think that it is best to extend the file to its maximum size already at
-the creation of the file, because then we can avoid dynamically extending
-the file when more space is needed for the tablespace.
-
-A block's position in the tablespace is specified with a 32-bit unsigned
-integer. The files in the chain are thought to be catenated, and the block
-corresponding to an address n is the nth block in the catenated file (where
-the first block is named the 0th block, and the incomplete block fragments
-at the end of files are not taken into account). A tablespace can be extended
-by appending a new file at the end of the chain.
-
-Our tablespace concept is similar to the one of Oracle.
-
-To acquire more speed in disk transfers, a technique called disk striping is
-sometimes used. This means that logical block addresses are divided in a
-round-robin fashion across several disks. Windows NT supports disk striping,
-so there we do not need to support it in the database. Disk striping is
-implemented in hardware in RAID disks. We conclude that it is not necessary
-to implement it in the database. Oracle 7 does not support disk striping,
-either.
-
-Another trick used at some database sites is replacing tablespace files by
-raw disks, that is, the whole physical disk drive, or a partition of it, is
-opened as a single file, and it is accessed through byte offsets calculated
-from the start of the disk or the partition. This is recommended in some
-books on database tuning to achieve more speed in i/o. Using raw disk
-certainly prevents the OS from fragmenting disk space, but it is not clear
-if it really adds speed. We measured on the Pentium 100 MHz + NT + NTFS file
-system + EIDE Conner disk only a negligible difference in speed when reading
-from a file, versus reading from a raw disk.
-
-To have fast access to a tablespace or a log file, we put the data structures
-to a hash table. Each tablespace and log file is given an unique 32-bit
-identifier.
-
-Some operating systems do not support many open files at the same time,
-though NT seems to tolerate at least 900 open files. Therefore, we put the
-open files in an LRU-list. If we need to open another file, we may close the
-file at the end of the LRU-list. When an i/o-operation is pending on a file,
-the file cannot be closed. We take the file nodes with pending i/o-operations
-out of the LRU-list and keep a count of pending operations. When an operation
-completes, we decrement the count and return the file node to the LRU-list if
-the count drops to zero. */
-
-/* When mysqld is run, the default directory "." is the mysqld datadir,
-but in the MySQL Embedded Server Library and ibbackup it is not the default
-directory, and we must set the base file path explicitly */
-const char* fil_path_to_mysql_datadir = ".";
-
-/* The number of fsyncs done to the log */
-ulint fil_n_log_flushes = 0;
-
-ulint fil_n_pending_log_flushes = 0;
-ulint fil_n_pending_tablespace_flushes = 0;
-
-/* Null file address */
-fil_addr_t fil_addr_null = {FIL_NULL, 0};
-
-/* File node of a tablespace or the log data space */
-struct fil_node_struct {
- fil_space_t* space; /* backpointer to the space where this node
- belongs */
- char* name; /* path to the file */
- ibool open; /* TRUE if file open */
- os_file_t handle; /* OS handle to the file, if file open */
- ibool is_raw_disk;/* TRUE if the 'file' is actually a raw
- device or a raw disk partition */
- ulint size; /* size of the file in database pages, 0 if
- not known yet; the possible last incomplete
- megabyte may be ignored if space == 0 */
- ulint n_pending;
- /* count of pending i/o's on this file;
- closing of the file is not allowed if
- this is > 0 */
- ulint n_pending_flushes;
- /* count of pending flushes on this file;
- closing of the file is not allowed if
- this is > 0 */
- ib_longlong modification_counter;/* when we write to the file we
- increment this by one */
- ib_longlong flush_counter;/* up to what modification_counter value
- we have flushed the modifications to disk */
- UT_LIST_NODE_T(fil_node_t) chain;
- /* link field for the file chain */
- UT_LIST_NODE_T(fil_node_t) LRU;
- /* link field for the LRU list */
- ulint magic_n;
-};
-
-#define FIL_NODE_MAGIC_N 89389
-
-/* Tablespace or log data space: let us call them by a common name space */
-struct fil_space_struct {
- char* name; /* space name = the path to the first file in
- it */
- ulint id; /* space id */
- ib_longlong tablespace_version;
- /* in DISCARD/IMPORT this timestamp is used to
- check if we should ignore an insert buffer
- merge request for a page because it actually
- was for the previous incarnation of the
- space */
- ibool mark; /* this is set to TRUE at database startup if
- the space corresponds to a table in the InnoDB
- data dictionary; so we can print a warning of
- orphaned tablespaces */
- ibool stop_ios;/* TRUE if we want to rename the .ibd file of
- tablespace and want to stop temporarily
- posting of new i/o requests on the file */
- ibool stop_ibuf_merges;
- /* we set this TRUE when we start deleting a
- single-table tablespace */
- ibool is_being_deleted;
- /* this is set to TRUE when we start
- deleting a single-table tablespace and its
- file; when this flag is set no further i/o
- or flush requests can be placed on this space,
- though there may be such requests still being
- processed on this space */
- ulint purpose;/* FIL_TABLESPACE, FIL_LOG, or FIL_ARCH_LOG */
- UT_LIST_BASE_NODE_T(fil_node_t) chain;
- /* base node for the file chain */
- ulint size; /* space size in pages; 0 if a single-table
- tablespace whose size we do not know yet;
- last incomplete megabytes in data files may be
- ignored if space == 0 */
- ulint n_reserved_extents;
- /* number of reserved free extents for
- ongoing operations like B-tree page split */
- ulint n_pending_flushes; /* this is > 0 when flushing
- the tablespace to disk; dropping of the
- tablespace is forbidden if this is > 0 */
- ulint n_pending_ibuf_merges;/* this is > 0 when merging
- insert buffer entries to a page so that we
- may need to access the ibuf bitmap page in the
- tablespade: dropping of the tablespace is
- forbidden if this is > 0 */
- hash_node_t hash; /* hash chain node */
- hash_node_t name_hash;/* hash chain the name_hash table */
- rw_lock_t latch; /* latch protecting the file space storage
- allocation */
- UT_LIST_NODE_T(fil_space_t) unflushed_spaces;
- /* list of spaces with at least one unflushed
- file we have written to */
- ibool is_in_unflushed_spaces; /* TRUE if this space is
- currently in the list above */
- UT_LIST_NODE_T(fil_space_t) space_list;
- /* list of all spaces */
- ibuf_data_t* ibuf_data;
- /* insert buffer data */
- ulint magic_n;
-};
-
-#define FIL_SPACE_MAGIC_N 89472
-
-/* The tablespace memory cache; also the totality of logs = the log data space,
-is stored here; below we talk about tablespaces, but also the ib_logfiles
-form a 'space' and it is handled here */
-
-typedef struct fil_system_struct fil_system_t;
-struct fil_system_struct {
- mutex_t mutex; /* The mutex protecting the cache */
- hash_table_t* spaces; /* The hash table of spaces in the
- system; they are hashed on the space
- id */
- hash_table_t* name_hash; /* hash table based on the space
- name */
- UT_LIST_BASE_NODE_T(fil_node_t) LRU;
- /* base node for the LRU list of the
- most recently used open files with no
- pending i/o's; if we start an i/o on
- the file, we first remove it from this
- list, and return it to the start of
- the list when the i/o ends;
- log files and the system tablespace are
- not put to this list: they are opened
- after the startup, and kept open until
- shutdown */
- UT_LIST_BASE_NODE_T(fil_space_t) unflushed_spaces;
- /* base node for the list of those
- tablespaces whose files contain
- unflushed writes; those spaces have
- at least one file node where
- modification_counter > flush_counter */
- ulint n_open; /* number of files currently open */
- ulint max_n_open; /* n_open is not allowed to exceed
- this */
- ib_longlong modification_counter;/* when we write to a file we
- increment this by one */
- ulint max_assigned_id;/* maximum space id in the existing
- tables, or assigned during the time
- mysqld has been up; at an InnoDB
- startup we scan the data dictionary
- and set here the maximum of the
- space id's of the tables there */
- ib_longlong tablespace_version;
- /* a counter which is incremented for
- every space object memory creation;
- every space mem object gets a
- 'timestamp' from this; in DISCARD/
- IMPORT this is used to check if we
- should ignore an insert buffer merge
- request */
- UT_LIST_BASE_NODE_T(fil_space_t) space_list;
- /* list of all file spaces */
-};
-
-/* The tablespace memory cache. This variable is NULL before the module is
-initialized. */
-fil_system_t* fil_system = NULL;
-
-
-/************************************************************************
-NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
-
-Prepares a file node for i/o. Opens the file if it is closed. Updates the
-pending i/o's field in the node and the system appropriately. Takes the node
-off the LRU list if it is in the LRU list. The caller must hold the fil_sys
-mutex. */
-static
-void
-fil_node_prepare_for_io(
-/*====================*/
- fil_node_t* node, /* in: file node */
- fil_system_t* system, /* in: tablespace memory cache */
- fil_space_t* space); /* in: space */
-/************************************************************************
-Updates the data structures when an i/o operation finishes. Updates the
-pending i/o's field in the node appropriately. */
-static
-void
-fil_node_complete_io(
-/*=================*/
- fil_node_t* node, /* in: file node */
- fil_system_t* system, /* in: tablespace memory cache */
- ulint type); /* in: OS_FILE_WRITE or OS_FILE_READ; marks
- the node as modified if
- type == OS_FILE_WRITE */
-/***********************************************************************
-Checks if a single-table tablespace for a given table name exists in the
-tablespace memory cache. */
-static
-ulint
-fil_get_space_id_for_table(
-/*=======================*/
- /* out: space id, ULINT_UNDEFINED if not
- found */
- const char* name); /* in: table name in the standard
- 'databasename/tablename' format */
-
-
-/***********************************************************************
-Returns the version number of a tablespace, -1 if not found. */
-
-ib_longlong
-fil_space_get_version(
-/*==================*/
- /* out: version number, -1 if the tablespace does not
- exist in the memory cache */
- ulint id) /* in: space id */
-{
- fil_system_t* system = fil_system;
- fil_space_t* space;
- ib_longlong version = -1;
-
- ut_ad(system);
-
- mutex_enter(&(system->mutex));
-
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
-
- if (space) {
- version = space->tablespace_version;
- }
-
- mutex_exit(&(system->mutex));
-
- return(version);
-}
-
-/***********************************************************************
-Returns the latch of a file space. */
-
-rw_lock_t*
-fil_space_get_latch(
-/*================*/
- /* out: latch protecting storage allocation */
- ulint id) /* in: space id */
-{
- fil_system_t* system = fil_system;
- fil_space_t* space;
-
- ut_ad(system);
-
- mutex_enter(&(system->mutex));
-
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
-
- ut_a(space);
-
- mutex_exit(&(system->mutex));
-
- return(&(space->latch));
-}
-
-/***********************************************************************
-Returns the type of a file space. */
-
-ulint
-fil_space_get_type(
-/*===============*/
- /* out: FIL_TABLESPACE or FIL_LOG */
- ulint id) /* in: space id */
-{
- fil_system_t* system = fil_system;
- fil_space_t* space;
-
- ut_ad(system);
-
- mutex_enter(&(system->mutex));
-
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
-
- ut_a(space);
-
- mutex_exit(&(system->mutex));
-
- return(space->purpose);
-}
-
-/***********************************************************************
-Returns the ibuf data of a file space. */
-
-ibuf_data_t*
-fil_space_get_ibuf_data(
-/*====================*/
- /* out: ibuf data for this space */
- ulint id) /* in: space id */
-{
- fil_system_t* system = fil_system;
- fil_space_t* space;
-
- ut_ad(system);
-
- ut_a(id == 0);
-
- mutex_enter(&(system->mutex));
-
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
-
- mutex_exit(&(system->mutex));
-
- ut_a(space);
-
- return(space->ibuf_data);
-}
-
-/**************************************************************************
-Checks if all the file nodes in a space are flushed. The caller must hold
-the fil_system mutex. */
-static
-ibool
-fil_space_is_flushed(
-/*=================*/
- /* out: TRUE if all are flushed */
- fil_space_t* space) /* in: space */
-{
- fil_node_t* node;
-
- ut_ad(mutex_own(&(fil_system->mutex)));
-
- node = UT_LIST_GET_FIRST(space->chain);
-
- while (node) {
- if (node->modification_counter > node->flush_counter) {
-
- return(FALSE);
- }
-
- node = UT_LIST_GET_NEXT(chain, node);
- }
-
- return(TRUE);
-}
-
-/***********************************************************************
-Appends a new file to the chain of files of a space. File must be closed. */
-
-void
-fil_node_create(
-/*============*/
- const char* name, /* in: file name (file must be closed) */
- ulint size, /* in: file size in database blocks, rounded
- downwards to an integer */
- ulint id, /* in: space id where to append */
- ibool is_raw) /* in: TRUE if a raw device or
- a raw disk partition */
-{
- fil_system_t* system = fil_system;
- fil_node_t* node;
- fil_space_t* space;
-
- ut_a(system);
- ut_a(name);
-
- mutex_enter(&(system->mutex));
-
- node = mem_alloc(sizeof(fil_node_t));
-
- node->name = mem_strdup(name);
- node->open = FALSE;
-
- ut_a(!is_raw || srv_start_raw_disk_in_use);
-
- node->is_raw_disk = is_raw;
- node->size = size;
- node->magic_n = FIL_NODE_MAGIC_N;
- node->n_pending = 0;
- node->n_pending_flushes = 0;
-
- node->modification_counter = 0;
- node->flush_counter = 0;
-
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
-
- if (!space) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: Could not find tablespace %lu for\n"
- "InnoDB: file ", (ulong) id);
- ut_print_filename(stderr, name);
- fputs(" in the tablespace memory cache.\n", stderr);
- mem_free(node->name);
-
- mem_free(node);
-
- mutex_exit(&(system->mutex));
-
- return;
- }
-
- space->size += size;
-
- node->space = space;
-
- UT_LIST_ADD_LAST(chain, space->chain, node);
-
- mutex_exit(&(system->mutex));
-}
-
-/************************************************************************
-Opens a the file of a node of a tablespace. The caller must own the fil_system
-mutex. */
-static
-void
-fil_node_open_file(
-/*===============*/
- fil_node_t* node, /* in: file node */
- fil_system_t* system, /* in: tablespace memory cache */
- fil_space_t* space) /* in: space */
-{
- ib_longlong size_bytes;
- ulint size_low;
- ulint size_high;
- ibool ret;
- ibool success;
-#ifndef UNIV_HOTBACKUP
- byte* buf2;
- byte* page;
- ulint space_id;
-#endif /* !UNIV_HOTBACKUP */
-
- ut_ad(mutex_own(&(system->mutex)));
- ut_a(node->n_pending == 0);
- ut_a(node->open == FALSE);
-
- if (node->size == 0) {
- /* It must be a single-table tablespace and we do not know the
- size of the file yet. First we open the file in the normal
- mode, no async I/O here, for simplicity. Then do some checks,
- and close the file again.
- NOTE that we could not use the simple file read function
- os_file_read() in Windows to read from a file opened for
- async I/O! */
-
- node->handle = os_file_create_simple_no_error_handling(
- node->name, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
- if (!success) {
- /* The following call prints an error message */
- os_file_get_last_error(TRUE);
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Fatal error: cannot open %s\n."
- "InnoDB: Have you deleted .ibd files"
- " under a running mysqld server?\n",
- node->name);
- ut_a(0);
- }
-
- os_file_get_size(node->handle, &size_low, &size_high);
-
- size_bytes = (((ib_longlong)size_high) << 32)
- + (ib_longlong)size_low;
-#ifdef UNIV_HOTBACKUP
- node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
-
-#else
- ut_a(space->purpose != FIL_LOG);
- ut_a(space->id != 0);
-
- if (size_bytes < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
- fprintf(stderr,
- "InnoDB: Error: the size of single-table"
- " tablespace file %s\n"
- "InnoDB: is only %lu %lu,"
- " should be at least %lu!\n",
- node->name,
- (ulong) size_high,
- (ulong) size_low,
- (ulong) (FIL_IBD_FILE_INITIAL_SIZE
- * UNIV_PAGE_SIZE));
-
- ut_a(0);
- }
-
- /* Read the first page of the tablespace */
-
- buf2 = ut_malloc(2 * UNIV_PAGE_SIZE);
- /* Align the memory for file i/o if we might have O_DIRECT
- set */
- page = ut_align(buf2, UNIV_PAGE_SIZE);
-
- success = os_file_read(node->handle, page, 0, 0,
- UNIV_PAGE_SIZE);
- space_id = fsp_header_get_space_id(page);
-
- ut_free(buf2);
-
- /* Close the file now that we have read the space id from it */
-
- os_file_close(node->handle);
-
- if (space_id == ULINT_UNDEFINED || space_id == 0) {
- fprintf(stderr,
- "InnoDB: Error: tablespace id %lu"
- " in file %s is not sensible\n",
- (ulong) space_id, node->name);
-
- ut_a(0);
- }
-
- if (space_id != space->id) {
- fprintf(stderr,
- "InnoDB: Error: tablespace id is %lu"
- " in the data dictionary\n"
- "InnoDB: but in file %s it is %lu!\n",
- space->id, node->name, space_id);
-
- ut_a(0);
- }
-
- if (size_bytes >= FSP_EXTENT_SIZE * UNIV_PAGE_SIZE) {
- node->size = (ulint)
- ((size_bytes / (1024 * 1024))
- * ((1024 * 1024) / UNIV_PAGE_SIZE));
- } else {
- node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
- }
-#endif
- space->size += node->size;
- }
-
- /* printf("Opening file %s\n", node->name); */
-
- /* Open the file for reading and writing, in Windows normally in the
- unbuffered async I/O mode, though global variables may make
- os_file_create() to fall back to the normal file I/O mode. */
-
- if (space->purpose == FIL_LOG) {
- node->handle = os_file_create(node->name, OS_FILE_OPEN,
- OS_FILE_AIO, OS_LOG_FILE, &ret);
- } else if (node->is_raw_disk) {
- node->handle = os_file_create(node->name,
- OS_FILE_OPEN_RAW,
- OS_FILE_AIO, OS_DATA_FILE, &ret);
- } else {
- node->handle = os_file_create(node->name, OS_FILE_OPEN,
- OS_FILE_AIO, OS_DATA_FILE, &ret);
- }
-
- ut_a(ret);
-
- node->open = TRUE;
-
- system->n_open++;
-
- if (space->purpose == FIL_TABLESPACE && space->id != 0) {
- /* Put the node to the LRU list */
- UT_LIST_ADD_FIRST(LRU, system->LRU, node);
- }
-}
-
-/**************************************************************************
-Closes a file. */
-static
-void
-fil_node_close_file(
-/*================*/
- fil_node_t* node, /* in: file node */
- fil_system_t* system) /* in: tablespace memory cache */
-{
- ibool ret;
-
- ut_ad(node && system);
- ut_ad(mutex_own(&(system->mutex)));
- ut_a(node->open);
- ut_a(node->n_pending == 0);
- ut_a(node->n_pending_flushes == 0);
- ut_a(node->modification_counter == node->flush_counter);
-
- ret = os_file_close(node->handle);
- ut_a(ret);
-
- /* printf("Closing file %s\n", node->name); */
-
- node->open = FALSE;
- ut_a(system->n_open > 0);
- system->n_open--;
-
- if (node->space->purpose == FIL_TABLESPACE && node->space->id != 0) {
- ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
-
- /* The node is in the LRU list, remove it */
- UT_LIST_REMOVE(LRU, system->LRU, node);
- }
-}
-
-/************************************************************************
-Tries to close a file in the LRU list. The caller must hold the fil_sys
-mutex. */
-static
-ibool
-fil_try_to_close_file_in_LRU(
-/*=========================*/
- /* out: TRUE if success, FALSE if should retry
- later; since i/o's generally complete in <
- 100 ms, and as InnoDB writes at most 128 pages
- from the buffer pool in a batch, and then
- immediately flushes the files, there is a good
- chance that the next time we find a suitable
- node from the LRU list */
- ibool print_info) /* in: if TRUE, prints information why it
- cannot close a file */
-{
- fil_system_t* system = fil_system;
- fil_node_t* node;
-
- ut_ad(mutex_own(&(system->mutex)));
-
- node = UT_LIST_GET_LAST(system->LRU);
-
- if (print_info) {
- fprintf(stderr,
- "InnoDB: fil_sys open file LRU len %lu\n",
- (ulong) UT_LIST_GET_LEN(system->LRU));
- }
-
- while (node != NULL) {
- if (node->modification_counter == node->flush_counter
- && node->n_pending_flushes == 0) {
-
- fil_node_close_file(node, system);
-
- return(TRUE);
- }
-
- if (print_info && node->n_pending_flushes > 0) {
- fputs("InnoDB: cannot close file ", stderr);
- ut_print_filename(stderr, node->name);
- fprintf(stderr, ", because n_pending_flushes %lu\n",
- (ulong) node->n_pending_flushes);
- }
-
- if (print_info
- && node->modification_counter != node->flush_counter) {
- fputs("InnoDB: cannot close file ", stderr);
- ut_print_filename(stderr, node->name);
- fprintf(stderr,
- ", because mod_count %ld != fl_count %ld\n",
- (long) node->modification_counter,
- (long) node->flush_counter);
- }
-
- node = UT_LIST_GET_PREV(LRU, node);
- }
-
- return(FALSE);
-}
-
-/***********************************************************************
-Reserves the fil_system mutex and tries to make sure we can open at least one
-file while holding it. This should be called before calling
-fil_node_prepare_for_io(), because that function may need to open a file. */
-static
-void
-fil_mutex_enter_and_prepare_for_io(
-/*===============================*/
- ulint space_id) /* in: space id */
-{
- fil_system_t* system = fil_system;
- fil_space_t* space;
- ibool success;
- ibool print_info = FALSE;
- ulint count = 0;
- ulint count2 = 0;
-
- ut_ad(!mutex_own(&(system->mutex)));
-retry:
- mutex_enter(&(system->mutex));
-
- if (space_id == 0 || space_id >= SRV_LOG_SPACE_FIRST_ID) {
- /* We keep log files and system tablespace files always open;
- this is important in preventing deadlocks in this module, as
- a page read completion often performs another read from the
- insert buffer. The insert buffer is in tablespace 0, and we
- cannot end up waiting in this function. */
-
- return;
- }
-
- if (system->n_open < system->max_n_open) {
-
- return;
- }
-
- HASH_SEARCH(hash, system->spaces, space_id, space,
- space->id == space_id);
- if (space != NULL && space->stop_ios) {
- /* We are going to do a rename file and want to stop new i/o's
- for a while */
-
- if (count2 > 20000) {
- fputs("InnoDB: Warning: tablespace ", stderr);
- ut_print_filename(stderr, space->name);
- fprintf(stderr,
- " has i/o ops stopped for a long time %lu\n",
- (ulong) count2);
- }
-
- mutex_exit(&(system->mutex));
-
- os_thread_sleep(20000);
-
- count2++;
-
- goto retry;
- }
-
- /* If the file is already open, no need to do anything; if the space
- does not exist, we handle the situation in the function which called
- this function */
-
- if (!space || UT_LIST_GET_FIRST(space->chain)->open) {
-
- return;
- }
-
- if (count > 1) {
- print_info = TRUE;
- }
-
- /* Too many files are open, try to close some */
-close_more:
- success = fil_try_to_close_file_in_LRU(print_info);
-
- if (success && system->n_open >= system->max_n_open) {
-
- goto close_more;
- }
-
- if (system->n_open < system->max_n_open) {
- /* Ok */
-
- return;
- }
-
- if (count >= 2) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: too many (%lu) files stay open"
- " while the maximum\n"
- "InnoDB: allowed value would be %lu.\n"
- "InnoDB: You may need to raise the value of"
- " innodb_max_files_open in\n"
- "InnoDB: my.cnf.\n",
- (ulong) system->n_open, (ulong) system->max_n_open);
-
- return;
- }
-
- mutex_exit(&(system->mutex));
-
-#ifndef UNIV_HOTBACKUP
- /* Wake the i/o-handler threads to make sure pending i/o's are
- performed */
- os_aio_simulated_wake_handler_threads();
-
- os_thread_sleep(20000);
-#endif
- /* Flush tablespaces so that we can close modified files in the LRU
- list */
-
- fil_flush_file_spaces(FIL_TABLESPACE);
-
- count++;
-
- goto retry;
-}
-
-/***********************************************************************
-Frees a file node object from a tablespace memory cache. */
-static
-void
-fil_node_free(
-/*==========*/
- fil_node_t* node, /* in, own: file node */
- fil_system_t* system, /* in: tablespace memory cache */
- fil_space_t* space) /* in: space where the file node is chained */
-{
- ut_ad(node && system && space);
- ut_ad(mutex_own(&(system->mutex)));
- ut_a(node->magic_n == FIL_NODE_MAGIC_N);
- ut_a(node->n_pending == 0);
-
- if (node->open) {
- /* We fool the assertion in fil_node_close_file() to think
- there are no unflushed modifications in the file */
-
- node->modification_counter = node->flush_counter;
-
- if (space->is_in_unflushed_spaces
- && fil_space_is_flushed(space)) {
-
- space->is_in_unflushed_spaces = FALSE;
-
- UT_LIST_REMOVE(unflushed_spaces,
- system->unflushed_spaces,
- space);
- }
-
- fil_node_close_file(node, system);
- }
-
- space->size -= node->size;
-
- UT_LIST_REMOVE(chain, space->chain, node);
-
- mem_free(node->name);
- mem_free(node);
-}
-
-/********************************************************************
-Drops files from the start of a file space, so that its size is cut by
-the amount given. */
-
-void
-fil_space_truncate_start(
-/*=====================*/
- ulint id, /* in: space id */
- ulint trunc_len) /* in: truncate by this much; it is an error
- if this does not equal to the combined size of
- some initial files in the space */
-{
- fil_system_t* system = fil_system;
- fil_node_t* node;
- fil_space_t* space;
-
- mutex_enter(&(system->mutex));
-
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
-
- ut_a(space);
-
- while (trunc_len > 0) {
- node = UT_LIST_GET_FIRST(space->chain);
-
- ut_a(node->size * UNIV_PAGE_SIZE >= trunc_len);
-
- trunc_len -= node->size * UNIV_PAGE_SIZE;
-
- fil_node_free(node, system, space);
- }
-
- mutex_exit(&(system->mutex));
-}
-
-/***********************************************************************
-Creates a space memory object and puts it to the tablespace memory cache. If
-there is an error, prints an error message to the .err log. */
-
-ibool
-fil_space_create(
-/*=============*/
- /* out: TRUE if success */
- const char* name, /* in: space name */
- ulint id, /* in: space id */
- ulint purpose)/* in: FIL_TABLESPACE, or FIL_LOG if log */
-{
- fil_system_t* system = fil_system;
- fil_space_t* space;
- ulint namesake_id;
-try_again:
- /*printf(
- "InnoDB: Adding tablespace %lu of name %s, purpose %lu\n", id, name,
- purpose);*/
-
- ut_a(system);
- ut_a(name);
-
- mutex_enter(&(system->mutex));
-
- HASH_SEARCH(name_hash, system->name_hash, ut_fold_string(name), space,
- 0 == strcmp(name, space->name));
- if (space != NULL) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: trying to init to the"
- " tablespace memory cache\n"
- "InnoDB: a tablespace %lu of name ", (ulong) id);
- ut_print_filename(stderr, name);
- fprintf(stderr, ",\n"
- "InnoDB: but a tablespace %lu of the same name\n"
- "InnoDB: already exists in the"
- " tablespace memory cache!\n",
- (ulong) space->id);
-
- if (id == 0 || purpose != FIL_TABLESPACE) {
-
- mutex_exit(&(system->mutex));
-
- return(FALSE);
- }
-
- fprintf(stderr,
- "InnoDB: We assume that InnoDB did a crash recovery,"
- " and you had\n"
- "InnoDB: an .ibd file for which the table"
- " did not exist in the\n"
- "InnoDB: InnoDB internal data dictionary in the"
- " ibdata files.\n"
- "InnoDB: We assume that you later removed the"
- " .ibd and .frm files,\n"
- "InnoDB: and are now trying to recreate the table."
- " We now remove the\n"
- "InnoDB: conflicting tablespace object"
- " from the memory cache and try\n"
- "InnoDB: the init again.\n");
-
- namesake_id = space->id;
-
- mutex_exit(&(system->mutex));
-
- fil_space_free(namesake_id);
-
- goto try_again;
- }
-
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
-
- if (space != NULL) {
- fprintf(stderr,
- "InnoDB: Error: trying to add tablespace %lu"
- " of name ", (ulong) id);
- ut_print_filename(stderr, name);
- fprintf(stderr, "\n"
- "InnoDB: to the tablespace memory cache,"
- " but tablespace\n"
- "InnoDB: %lu of name ", (ulong) space->id);
- ut_print_filename(stderr, space->name);
- fputs(" already exists in the tablespace\n"
- "InnoDB: memory cache!\n", stderr);
-
- mutex_exit(&(system->mutex));
-
- return(FALSE);
- }
-
- space = mem_alloc(sizeof(fil_space_t));
-
- space->name = mem_strdup(name);
- space->id = id;
-
- system->tablespace_version++;
- space->tablespace_version = system->tablespace_version;
- space->mark = FALSE;
-
- if (purpose == FIL_TABLESPACE && id > system->max_assigned_id) {
- system->max_assigned_id = id;
- }
-
- space->stop_ios = FALSE;
- space->stop_ibuf_merges = FALSE;
- space->is_being_deleted = FALSE;
- space->purpose = purpose;
- space->size = 0;
-
- space->n_reserved_extents = 0;
-
- space->n_pending_flushes = 0;
- space->n_pending_ibuf_merges = 0;
-
- UT_LIST_INIT(space->chain);
- space->magic_n = FIL_SPACE_MAGIC_N;
-
- space->ibuf_data = NULL;
-
- rw_lock_create(&space->latch, SYNC_FSP);
-
- HASH_INSERT(fil_space_t, hash, system->spaces, id, space);
-
- HASH_INSERT(fil_space_t, name_hash, system->name_hash,
- ut_fold_string(name), space);
- space->is_in_unflushed_spaces = FALSE;
-
- UT_LIST_ADD_LAST(space_list, system->space_list, space);
-
- mutex_exit(&(system->mutex));
-
- return(TRUE);
-}
-
-/***********************************************************************
-Assigns a new space id for a new single-table tablespace. This works simply by
-incrementing the global counter. If 4 billion id's is not enough, we may need
-to recycle id's. */
-static
-ulint
-fil_assign_new_space_id(void)
-/*=========================*/
- /* out: new tablespace id; ULINT_UNDEFINED if could
- not assign an id */
-{
- fil_system_t* system = fil_system;
- ulint id;
-
- mutex_enter(&(system->mutex));
-
- system->max_assigned_id++;
-
- id = system->max_assigned_id;
-
- if (id > (SRV_LOG_SPACE_FIRST_ID / 2) && (id % 1000000UL == 0)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- "InnoDB: Warning: you are running out of new"
- " single-table tablespace id's.\n"
- "InnoDB: Current counter is %lu and it"
- " must not exceed %lu!\n"
- "InnoDB: To reset the counter to zero"
- " you have to dump all your tables and\n"
- "InnoDB: recreate the whole InnoDB installation.\n",
- (ulong) id,
- (ulong) SRV_LOG_SPACE_FIRST_ID);
- }
-
- if (id >= SRV_LOG_SPACE_FIRST_ID) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- "InnoDB: You have run out of single-table"
- " tablespace id's!\n"
- "InnoDB: Current counter is %lu.\n"
- "InnoDB: To reset the counter to zero you"
- " have to dump all your tables and\n"
- "InnoDB: recreate the whole InnoDB installation.\n",
- (ulong) id);
- system->max_assigned_id--;
-
- id = ULINT_UNDEFINED;
- }
-
- mutex_exit(&(system->mutex));
-
- return(id);
-}
-
-/***********************************************************************
-Frees a space object from the tablespace memory cache. Closes the files in
-the chain but does not delete them. There must not be any pending i/o's or
-flushes on the files. */
-
-ibool
-fil_space_free(
-/*===========*/
- /* out: TRUE if success */
- ulint id) /* in: space id */
-{
- fil_system_t* system = fil_system;
- fil_space_t* space;
- fil_space_t* namespace;
- fil_node_t* fil_node;
-
- mutex_enter(&(system->mutex));
-
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
-
- if (!space) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: trying to remove tablespace %lu"
- " from the cache but\n"
- "InnoDB: it is not there.\n", (ulong) id);
-
- mutex_exit(&(system->mutex));
-
- return(FALSE);
- }
-
- HASH_DELETE(fil_space_t, hash, system->spaces, id, space);
-
- HASH_SEARCH(name_hash, system->name_hash, ut_fold_string(space->name),
- namespace, 0 == strcmp(space->name, namespace->name));
- ut_a(namespace);
- ut_a(space == namespace);
-
- HASH_DELETE(fil_space_t, name_hash, system->name_hash,
- ut_fold_string(space->name), space);
-
- if (space->is_in_unflushed_spaces) {
- space->is_in_unflushed_spaces = FALSE;
-
- UT_LIST_REMOVE(unflushed_spaces, system->unflushed_spaces,
- space);
- }
-
- UT_LIST_REMOVE(space_list, system->space_list, space);
-
- ut_a(space->magic_n == FIL_SPACE_MAGIC_N);
- ut_a(0 == space->n_pending_flushes);
-
- fil_node = UT_LIST_GET_FIRST(space->chain);
-
- while (fil_node != NULL) {
- fil_node_free(fil_node, system, space);
-
- fil_node = UT_LIST_GET_FIRST(space->chain);
- }
-
- ut_a(0 == UT_LIST_GET_LEN(space->chain));
-
- mutex_exit(&(system->mutex));
-
- rw_lock_free(&(space->latch));
-
- mem_free(space->name);
- mem_free(space);
-
- return(TRUE);
-}
-
-#ifdef UNIV_HOTBACKUP
-/***********************************************************************
-Returns the tablespace object for a given id, or NULL if not found from the
-tablespace memory cache. */
-static
-fil_space_t*
-fil_get_space_for_id_low(
-/*=====================*/
- /* out: tablespace object or NULL; NOTE that you must
- own &(fil_system->mutex) to call this function! */
- ulint id) /* in: space id */
-{
- fil_system_t* system = fil_system;
- fil_space_t* space;
-
- ut_ad(system);
-
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
-
- return(space);
-}
-#endif
-
-/***********************************************************************
-Returns the size of the space in pages. The tablespace must be cached in the
-memory cache. */
-
-ulint
-fil_space_get_size(
-/*===============*/
- /* out: space size, 0 if space not found */
- ulint id) /* in: space id */
-{
- fil_system_t* system = fil_system;
- fil_node_t* node;
- fil_space_t* space;
- ulint size;
-
- ut_ad(system);
-
- fil_mutex_enter_and_prepare_for_io(id);
-
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
-
- if (space == NULL) {
- mutex_exit(&(system->mutex));
-
- return(0);
- }
-
- if (space->size == 0 && space->purpose == FIL_TABLESPACE) {
- ut_a(id != 0);
-
- ut_a(1 == UT_LIST_GET_LEN(space->chain));
-
- node = UT_LIST_GET_FIRST(space->chain);
-
- /* It must be a single-table tablespace and we have not opened
- the file yet; the following calls will open it and update the
- size fields */
-
- fil_node_prepare_for_io(node, system, space);
- fil_node_complete_io(node, system, OS_FILE_READ);
- }
-
- size = space->size;
-
- mutex_exit(&(system->mutex));
-
- return(size);
-}
-
-/***********************************************************************
-Checks if the pair space, page_no refers to an existing page in a tablespace
-file space. The tablespace must be cached in the memory cache. */
-
-ibool
-fil_check_adress_in_tablespace(
-/*===========================*/
- /* out: TRUE if the address is meaningful */
- ulint id, /* in: space id */
- ulint page_no)/* in: page number */
-{
- if (fil_space_get_size(id) > page_no) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/********************************************************************
-Creates a the tablespace memory cache. */
-static
-fil_system_t*
-fil_system_create(
-/*==============*/
- /* out, own: tablespace memory cache */
- ulint hash_size, /* in: hash table size */
- ulint max_n_open) /* in: maximum number of open files; must be
- > 10 */
-{
- fil_system_t* system;
-
- ut_a(hash_size > 0);
- ut_a(max_n_open > 0);
-
- system = mem_alloc(sizeof(fil_system_t));
-
- mutex_create(&system->mutex, SYNC_ANY_LATCH);
-
- system->spaces = hash_create(hash_size);
- system->name_hash = hash_create(hash_size);
-
- UT_LIST_INIT(system->LRU);
-
- system->n_open = 0;
- system->max_n_open = max_n_open;
-
- system->modification_counter = 0;
- system->max_assigned_id = 0;
-
- system->tablespace_version = 0;
-
- UT_LIST_INIT(system->unflushed_spaces);
- UT_LIST_INIT(system->space_list);
-
- return(system);
-}
-
-/********************************************************************
-Initializes the tablespace memory cache. */
-
-void
-fil_init(
-/*=====*/
- ulint max_n_open) /* in: max number of open files */
-{
- ulint hash_size;
-
- ut_a(fil_system == NULL);
-
- if (srv_file_per_table) {
- hash_size = 50000;
- } else {
- hash_size = 5000;
- }
-
- fil_system = fil_system_create(hash_size, max_n_open);
-}
-
-/***********************************************************************
-Opens all log files and system tablespace data files. They stay open until the
-database server shutdown. This should be called at a server startup after the
-space objects for the log and the system tablespace have been created. The
-purpose of this operation is to make sure we never run out of file descriptors
-if we need to read from the insert buffer or to write to the log. */
-
-void
-fil_open_log_and_system_tablespace_files(void)
-/*==========================================*/
-{
- fil_system_t* system = fil_system;
- fil_space_t* space;
- fil_node_t* node;
-
- mutex_enter(&(system->mutex));
-
- space = UT_LIST_GET_FIRST(system->space_list);
-
- while (space != NULL) {
- if (space->purpose != FIL_TABLESPACE || space->id == 0) {
- node = UT_LIST_GET_FIRST(space->chain);
-
- while (node != NULL) {
- if (!node->open) {
- fil_node_open_file(node, system,
- space);
- }
- if (system->max_n_open < 10 + system->n_open) {
- fprintf(stderr,
- "InnoDB: Warning: you must"
- " raise the value of"
- " innodb_max_open_files in\n"
- "InnoDB: my.cnf! Remember that"
- " InnoDB keeps all log files"
- " and all system\n"
- "InnoDB: tablespace files open"
- " for the whole time mysqld is"
- " running, and\n"
- "InnoDB: needs to open also"
- " some .ibd files if the"
- " file-per-table storage\n"
- "InnoDB: model is used."
- " Current open files %lu,"
- " max allowed"
- " open files %lu.\n",
- (ulong) system->n_open,
- (ulong) system->max_n_open);
- }
- node = UT_LIST_GET_NEXT(chain, node);
- }
- }
- space = UT_LIST_GET_NEXT(space_list, space);
- }
-
- mutex_exit(&(system->mutex));
-}
-
-/***********************************************************************
-Closes all open files. There must not be any pending i/o's or not flushed
-modifications in the files. */
-
-void
-fil_close_all_files(void)
-/*=====================*/
-{
- fil_system_t* system = fil_system;
- fil_space_t* space;
- fil_node_t* node;
-
- mutex_enter(&(system->mutex));
-
- space = UT_LIST_GET_FIRST(system->space_list);
-
- while (space != NULL) {
- node = UT_LIST_GET_FIRST(space->chain);
-
- while (node != NULL) {
- if (node->open) {
- fil_node_close_file(node, system);
- }
- node = UT_LIST_GET_NEXT(chain, node);
- }
- space = UT_LIST_GET_NEXT(space_list, space);
- }
-
- mutex_exit(&(system->mutex));
-}
-
-/***********************************************************************
-Sets the max tablespace id counter if the given number is bigger than the
-previous value. */
-
-void
-fil_set_max_space_id_if_bigger(
-/*===========================*/
- ulint max_id) /* in: maximum known id */
-{
- fil_system_t* system = fil_system;
-
- if (max_id >= SRV_LOG_SPACE_FIRST_ID) {
- fprintf(stderr,
- "InnoDB: Fatal error: max tablespace id"
- " is too high, %lu\n", (ulong) max_id);
- ut_a(0);
- }
-
- mutex_enter(&(system->mutex));
-
- if (system->max_assigned_id < max_id) {
-
- system->max_assigned_id = max_id;
- }
-
- mutex_exit(&(system->mutex));
-}
-
-/********************************************************************
-Initializes the ibuf data structure for space 0 == the system tablespace.
-This can be called after the file space headers have been created and the
-dictionary system has been initialized. */
-
-void
-fil_ibuf_init_at_db_start(void)
-/*===========================*/
-{
- fil_space_t* space;
-
- space = UT_LIST_GET_FIRST(fil_system->space_list);
-
- ut_a(space);
- ut_a(space->purpose == FIL_TABLESPACE);
-
- space->ibuf_data = ibuf_data_init_for_space(space->id);
-}
-
-/********************************************************************
-Writes the flushed lsn and the latest archived log number to the page header
-of the first page of a data file. */
-static
-ulint
-fil_write_lsn_and_arch_no_to_file(
-/*==============================*/
- ulint space_id, /* in: space number */
- ulint sum_of_sizes, /* in: combined size of previous files in
- space, in database pages */
- dulint lsn, /* in: lsn to write */
- ulint arch_log_no /* in: archived log number to write */
- __attribute__((unused)))
-{
- byte* buf1;
- byte* buf;
-
- buf1 = mem_alloc(2 * UNIV_PAGE_SIZE);
- buf = ut_align(buf1, UNIV_PAGE_SIZE);
-
- fil_read(TRUE, space_id, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
-
- mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn);
-
- fil_write(TRUE, space_id, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
-
- return(DB_SUCCESS);
-}
-
-/********************************************************************
-Writes the flushed lsn and the latest archived log number to the page
-header of the first page of each data file in the system tablespace. */
-
-ulint
-fil_write_flushed_lsn_to_data_files(
-/*================================*/
- /* out: DB_SUCCESS or error number */
- dulint lsn, /* in: lsn to write */
- ulint arch_log_no) /* in: latest archived log file number */
-{
- fil_space_t* space;
- fil_node_t* node;
- ulint sum_of_sizes;
- ulint err;
-
- mutex_enter(&(fil_system->mutex));
-
- space = UT_LIST_GET_FIRST(fil_system->space_list);
-
- while (space) {
- /* We only write the lsn to all existing data files which have
- been open during the lifetime of the mysqld process; they are
- represented by the space objects in the tablespace memory
- cache. Note that all data files in the system tablespace 0 are
- always open. */
-
- if (space->purpose == FIL_TABLESPACE
- && space->id == 0) {
- sum_of_sizes = 0;
-
- node = UT_LIST_GET_FIRST(space->chain);
- while (node) {
- mutex_exit(&(fil_system->mutex));
-
- err = fil_write_lsn_and_arch_no_to_file(
- space->id, sum_of_sizes, lsn,
- arch_log_no);
- if (err != DB_SUCCESS) {
-
- return(err);
- }
-
- mutex_enter(&(fil_system->mutex));
-
- sum_of_sizes += node->size;
- node = UT_LIST_GET_NEXT(chain, node);
- }
- }
- space = UT_LIST_GET_NEXT(space_list, space);
- }
-
- mutex_exit(&(fil_system->mutex));
-
- return(DB_SUCCESS);
-}
-
-/***********************************************************************
-Reads the flushed lsn and arch no fields from a data file at database
-startup. */
-
-void
-fil_read_flushed_lsn_and_arch_log_no(
-/*=================================*/
- os_file_t data_file, /* in: open data file */
- ibool one_read_already, /* in: TRUE if min and max parameters
- below already contain sensible data */
-#ifdef UNIV_LOG_ARCHIVE
- ulint* min_arch_log_no, /* in/out: */
- ulint* max_arch_log_no, /* in/out: */
-#endif /* UNIV_LOG_ARCHIVE */
- dulint* min_flushed_lsn, /* in/out: */
- dulint* max_flushed_lsn) /* in/out: */
-{
- byte* buf;
- byte* buf2;
- dulint flushed_lsn;
-
- buf2 = ut_malloc(2 * UNIV_PAGE_SIZE);
- /* Align the memory for a possible read from a raw device */
- buf = ut_align(buf2, UNIV_PAGE_SIZE);
-
- os_file_read(data_file, buf, 0, 0, UNIV_PAGE_SIZE);
-
- flushed_lsn = mach_read_from_8(buf + FIL_PAGE_FILE_FLUSH_LSN);
-
- ut_free(buf2);
-
- if (!one_read_already) {
- *min_flushed_lsn = flushed_lsn;
- *max_flushed_lsn = flushed_lsn;
-#ifdef UNIV_LOG_ARCHIVE
- *min_arch_log_no = arch_log_no;
- *max_arch_log_no = arch_log_no;
-#endif /* UNIV_LOG_ARCHIVE */
- return;
- }
-
- if (ut_dulint_cmp(*min_flushed_lsn, flushed_lsn) > 0) {
- *min_flushed_lsn = flushed_lsn;
- }
- if (ut_dulint_cmp(*max_flushed_lsn, flushed_lsn) < 0) {
- *max_flushed_lsn = flushed_lsn;
- }
-#ifdef UNIV_LOG_ARCHIVE
- if (*min_arch_log_no > arch_log_no) {
- *min_arch_log_no = arch_log_no;
- }
- if (*max_arch_log_no < arch_log_no) {
- *max_arch_log_no = arch_log_no;
- }
-#endif /* UNIV_LOG_ARCHIVE */
-}
-
-/*================ SINGLE-TABLE TABLESPACES ==========================*/
-
-/***********************************************************************
-Increments the count of pending insert buffer page merges, if space is not
-being deleted. */
-
-ibool
-fil_inc_pending_ibuf_merges(
-/*========================*/
- /* out: TRUE if being deleted, and ibuf merges should
- be skipped */
- ulint id) /* in: space id */
-{
- fil_system_t* system = fil_system;
- fil_space_t* space;
-
- mutex_enter(&(system->mutex));
-
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
-
- if (space == NULL) {
- fprintf(stderr,
- "InnoDB: Error: trying to do ibuf merge to a"
- " dropped tablespace %lu\n",
- (ulong) id);
- }
-
- if (space == NULL || space->stop_ibuf_merges) {
- mutex_exit(&(system->mutex));
-
- return(TRUE);
- }
-
- space->n_pending_ibuf_merges++;
-
- mutex_exit(&(system->mutex));
-
- return(FALSE);
-}
-
-/***********************************************************************
-Decrements the count of pending insert buffer page merges. */
-
-void
-fil_decr_pending_ibuf_merges(
-/*=========================*/
- ulint id) /* in: space id */
-{
- fil_system_t* system = fil_system;
- fil_space_t* space;
-
- mutex_enter(&(system->mutex));
-
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
-
- if (space == NULL) {
- fprintf(stderr,
- "InnoDB: Error: decrementing ibuf merge of a"
- " dropped tablespace %lu\n",
- (ulong) id);
- }
-
- if (space != NULL) {
- space->n_pending_ibuf_merges--;
- }
-
- mutex_exit(&(system->mutex));
-}
-
-/************************************************************
-Creates the database directory for a table if it does not exist yet. */
-static
-void
-fil_create_directory_for_tablename(
-/*===============================*/
- const char* name) /* in: name in the standard
- 'databasename/tablename' format */
-{
- const char* namend;
- char* path;
- ulint len;
-
- len = strlen(fil_path_to_mysql_datadir);
- namend = strchr(name, '/');
- ut_a(namend);
- path = mem_alloc(len + (namend - name) + 2);
-
- memcpy(path, fil_path_to_mysql_datadir, len);
- path[len] = '/';
- memcpy(path + len + 1, name, namend - name);
- path[len + (namend - name) + 1] = 0;
-
- srv_normalize_path_for_win(path);
-
- ut_a(os_file_create_directory(path, FALSE));
- mem_free(path);
-}
-
-#ifndef UNIV_HOTBACKUP
-/************************************************************
-Writes a log record about an .ibd file create/rename/delete. */
-static
-void
-fil_op_write_log(
-/*=============*/
- ulint type, /* in: MLOG_FILE_CREATE,
- MLOG_FILE_DELETE, or
- MLOG_FILE_RENAME */
- ulint space_id, /* in: space id */
- const char* name, /* in: table name in the familiar
- 'databasename/tablename' format, or
- the file path in the case of
- MLOG_FILE_DELETE */
- const char* new_name, /* in: if type is MLOG_FILE_RENAME,
- the new table name in the
- 'databasename/tablename' format */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- byte* log_ptr;
- ulint len;
-
- log_ptr = mlog_open(mtr, 11 + 2);
-
- if (!log_ptr) {
- /* Logging in mtr is switched off during crash recovery:
- in that case mlog_open returns NULL */
- return;
- }
-
- log_ptr = mlog_write_initial_log_record_for_file_op(type, space_id, 0,
- log_ptr, mtr);
- /* Let us store the strings as null-terminated for easier readability
- and handling */
-
- len = strlen(name) + 1;
-
- mach_write_to_2(log_ptr, len);
- log_ptr += 2;
- mlog_close(mtr, log_ptr);
-
- mlog_catenate_string(mtr, (byte*) name, len);
-
- if (type == MLOG_FILE_RENAME) {
- ulint len = strlen(new_name) + 1;
- log_ptr = mlog_open(mtr, 2 + len);
- ut_a(log_ptr);
- mach_write_to_2(log_ptr, len);
- log_ptr += 2;
- mlog_close(mtr, log_ptr);
-
- mlog_catenate_string(mtr, (byte*) new_name, len);
- }
-}
-#endif
-
-/***********************************************************************
-Parses the body of a log record written about an .ibd file operation. That is,
-the log record part after the standard (type, space id, page no) header of the
-log record.
-
-If desired, also replays the delete or rename operation if the .ibd file
-exists and the space id in it matches. Replays the create operation if a file
-at that path does not exist yet. If the database directory for the file to be
-created does not exist, then we create the directory, too.
-
-Note that ibbackup --apply-log sets fil_path_to_mysql_datadir to point to the
-datadir that we should use in replaying the file operations. */
-
-byte*
-fil_op_log_parse_or_replay(
-/*=======================*/
- /* out: end of log record, or NULL if the
- record was not completely contained between
- ptr and end_ptr */
- byte* ptr, /* in: buffer containing the log record body,
- or an initial segment of it, if the record does
- not fir completely between ptr and end_ptr */
- byte* end_ptr, /* in: buffer end */
- ulint type, /* in: the type of this log record */
- ibool do_replay, /* in: TRUE if we want to replay the
- operation, and not just parse the log record */
- ulint space_id) /* in: if do_replay is TRUE, the space id of
- the tablespace in question; otherwise
- ignored */
-{
- ulint name_len;
- ulint new_name_len;
- const char* name;
- const char* new_name = NULL;
-
- if (end_ptr < ptr + 2) {
-
- return(NULL);
- }
-
- name_len = mach_read_from_2(ptr);
-
- ptr += 2;
-
- if (end_ptr < ptr + name_len) {
-
- return(NULL);
- }
-
- name = (const char*) ptr;
-
- ptr += name_len;
-
- if (type == MLOG_FILE_RENAME) {
- if (end_ptr < ptr + 2) {
-
- return(NULL);
- }
-
- new_name_len = mach_read_from_2(ptr);
-
- ptr += 2;
-
- if (end_ptr < ptr + new_name_len) {
-
- return(NULL);
- }
-
- new_name = (const char*) ptr;
-
- ptr += new_name_len;
- }
-
- /* We managed to parse a full log record body */
- /*
- printf("Parsed log rec of type %lu space %lu\n"
- "name %s\n", type, space_id, name);
-
- if (type == MLOG_FILE_RENAME) {
- printf("new name %s\n", new_name);
- }
- */
- if (do_replay == FALSE) {
-
- return(ptr);
- }
-
- /* Let us try to perform the file operation, if sensible. Note that
- ibbackup has at this stage already read in all space id info to the
- fil0fil.c data structures.
-
- NOTE that our algorithm is not guaranteed to work correctly if there
- were renames of tables during the backup. See ibbackup code for more
- on the problem. */
-
- if (type == MLOG_FILE_DELETE) {
- if (fil_tablespace_exists_in_mem(space_id)) {
- ut_a(fil_delete_tablespace(space_id));
- }
- } else if (type == MLOG_FILE_RENAME) {
- /* We do the rename based on space id, not old file name;
- this should guarantee that after the log replay each .ibd file
- has the correct name for the latest log sequence number; the
- proof is left as an exercise :) */
-
- if (fil_tablespace_exists_in_mem(space_id)) {
- /* Create the database directory for the new name, if
- it does not exist yet */
- fil_create_directory_for_tablename(new_name);
-
- /* Rename the table if there is not yet a tablespace
- with the same name */
-
- if (fil_get_space_id_for_table(new_name)
- == ULINT_UNDEFINED) {
- /* We do not care of the old name, that is
- why we pass NULL as the first argument */
- if (!fil_rename_tablespace(NULL, space_id,
- new_name)) {
- ut_error;
- }
- }
- }
- } else {
- ut_a(type == MLOG_FILE_CREATE);
-
- if (fil_tablespace_exists_in_mem(space_id)) {
- /* Do nothing */
- } else if (fil_get_space_id_for_table(name)
- != ULINT_UNDEFINED) {
- /* Do nothing */
- } else {
- /* Create the database directory for name, if it does
- not exist yet */
- fil_create_directory_for_tablename(name);
-
- ut_a(space_id != 0);
-
- if (fil_create_new_single_table_tablespace(
- &space_id, name, FALSE,
- FIL_IBD_FILE_INITIAL_SIZE) != DB_SUCCESS) {
- ut_error;
- }
- }
- }
-
- return(ptr);
-}
-
-/***********************************************************************
-Deletes a single-table tablespace. The tablespace must be cached in the
-memory cache. */
-
-ibool
-fil_delete_tablespace(
-/*==================*/
- /* out: TRUE if success */
- ulint id) /* in: space id */
-{
- fil_system_t* system = fil_system;
- ibool success;
- fil_space_t* space;
- fil_node_t* node;
- ulint count = 0;
- char* path;
-
- ut_a(id != 0);
-stop_ibuf_merges:
- mutex_enter(&(system->mutex));
-
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
-
- if (space != NULL) {
- space->stop_ibuf_merges = TRUE;
-
- if (space->n_pending_ibuf_merges == 0) {
- mutex_exit(&(system->mutex));
-
- count = 0;
-
- goto try_again;
- } else {
- if (count > 5000) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Warning: trying to"
- " delete tablespace ", stderr);
- ut_print_filename(stderr, space->name);
- fprintf(stderr, ",\n"
- "InnoDB: but there are %lu pending"
- " ibuf merges on it.\n"
- "InnoDB: Loop %lu.\n",
- (ulong) space->n_pending_ibuf_merges,
- (ulong) count);
- }
-
- mutex_exit(&(system->mutex));
-
- os_thread_sleep(20000);
- count++;
-
- goto stop_ibuf_merges;
- }
- }
-
- mutex_exit(&(system->mutex));
- count = 0;
-
-try_again:
- mutex_enter(&(system->mutex));
-
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
-
- if (space == NULL) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: cannot delete tablespace %lu\n"
- "InnoDB: because it is not found in the"
- " tablespace memory cache.\n",
- (ulong) id);
-
- mutex_exit(&(system->mutex));
-
- return(FALSE);
- }
-
- ut_a(space);
- ut_a(space->n_pending_ibuf_merges == 0);
-
- space->is_being_deleted = TRUE;
-
- ut_a(UT_LIST_GET_LEN(space->chain) == 1);
- node = UT_LIST_GET_FIRST(space->chain);
-
- if (space->n_pending_flushes > 0 || node->n_pending > 0) {
- if (count > 1000) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Warning: trying to"
- " delete tablespace ", stderr);
- ut_print_filename(stderr, space->name);
- fprintf(stderr, ",\n"
- "InnoDB: but there are %lu flushes"
- " and %lu pending i/o's on it\n"
- "InnoDB: Loop %lu.\n",
- (ulong) space->n_pending_flushes,
- (ulong) node->n_pending,
- (ulong) count);
- }
- mutex_exit(&(system->mutex));
- os_thread_sleep(20000);
-
- count++;
-
- goto try_again;
- }
-
- path = mem_strdup(space->name);
-
- mutex_exit(&(system->mutex));
-#ifndef UNIV_HOTBACKUP
- /* Invalidate in the buffer pool all pages belonging to the
- tablespace. Since we have set space->is_being_deleted = TRUE, readahead
- or ibuf merge can no longer read more pages of this tablespace to the
- buffer pool. Thus we can clean the tablespace out of the buffer pool
- completely and permanently. The flag is_being_deleted also prevents
- fil_flush() from being applied to this tablespace. */
-
- buf_LRU_invalidate_tablespace(id);
-#endif
- /* printf("Deleting tablespace %s id %lu\n", space->name, id); */
-
- success = fil_space_free(id);
-
- if (success) {
- success = os_file_delete(path);
-
- if (!success) {
- success = os_file_delete_if_exists(path);
- }
- }
-
- if (success) {
-#ifndef UNIV_HOTBACKUP
- /* Write a log record about the deletion of the .ibd
- file, so that ibbackup can replay it in the
- --apply-log phase. We use a dummy mtr and the familiar
- log write mechanism. */
- mtr_t mtr;
-
- /* When replaying the operation in ibbackup, do not try
- to write any log record */
- mtr_start(&mtr);
-
- fil_op_write_log(MLOG_FILE_DELETE, id, path, NULL, &mtr);
- mtr_commit(&mtr);
-#endif
- mem_free(path);
-
- return(TRUE);
- }
-
- mem_free(path);
-
- return(FALSE);
-}
-
-/***********************************************************************
-Discards a single-table tablespace. The tablespace must be cached in the
-memory cache. Discarding is like deleting a tablespace, but
-1) we do not drop the table from the data dictionary;
-2) we remove all insert buffer entries for the tablespace immediately; in DROP
-TABLE they are only removed gradually in the background;
-3) when the user does IMPORT TABLESPACE, the tablespace will have the same id
-as it originally had. */
-
-ibool
-fil_discard_tablespace(
-/*===================*/
- /* out: TRUE if success */
- ulint id) /* in: space id */
-{
- ibool success;
-
- success = fil_delete_tablespace(id);
-
- if (!success) {
- fprintf(stderr,
- "InnoDB: Warning: cannot delete tablespace %lu"
- " in DISCARD TABLESPACE.\n"
- "InnoDB: But let us remove the"
- " insert buffer entries for this tablespace.\n",
- (ulong) id);
- }
-
- /* Remove all insert buffer entries for the tablespace */
-
- ibuf_delete_for_discarded_space(id);
-
- return(TRUE);
-}
-
-/***********************************************************************
-Renames the memory cache structures of a single-table tablespace. */
-static
-ibool
-fil_rename_tablespace_in_mem(
-/*=========================*/
- /* out: TRUE if success */
- fil_space_t* space, /* in: tablespace memory object */
- fil_node_t* node, /* in: file node of that tablespace */
- const char* path) /* in: new name */
-{
- fil_system_t* system = fil_system;
- fil_space_t* space2;
- const char* old_name = space->name;
-
- HASH_SEARCH(name_hash, system->name_hash, ut_fold_string(old_name),
- space2, 0 == strcmp(old_name, space2->name));
- if (space != space2) {
- fputs("InnoDB: Error: cannot find ", stderr);
- ut_print_filename(stderr, old_name);
- fputs(" in tablespace memory cache\n", stderr);
-
- return(FALSE);
- }
-
- HASH_SEARCH(name_hash, system->name_hash, ut_fold_string(path),
- space2, 0 == strcmp(path, space2->name));
- if (space2 != NULL) {
- fputs("InnoDB: Error: ", stderr);
- ut_print_filename(stderr, path);
- fputs(" is already in tablespace memory cache\n", stderr);
-
- return(FALSE);
- }
-
- HASH_DELETE(fil_space_t, name_hash, system->name_hash,
- ut_fold_string(space->name), space);
- mem_free(space->name);
- mem_free(node->name);
-
- space->name = mem_strdup(path);
- node->name = mem_strdup(path);
-
- HASH_INSERT(fil_space_t, name_hash, system->name_hash,
- ut_fold_string(path), space);
- return(TRUE);
-}
-
-/***********************************************************************
-Allocates a file name for a single-table tablespace. The string must be freed
-by caller with mem_free(). */
-static
-char*
-fil_make_ibd_name(
-/*==============*/
- /* out, own: file name */
- const char* name, /* in: table name or a dir path of a
- TEMPORARY table */
- ibool is_temp) /* in: TRUE if it is a dir path */
-{
- ulint namelen = strlen(name);
- ulint dirlen = strlen(fil_path_to_mysql_datadir);
- char* filename = mem_alloc(namelen + dirlen + sizeof "/.ibd");
-
- if (is_temp) {
- memcpy(filename, name, namelen);
- memcpy(filename + namelen, ".ibd", sizeof ".ibd");
- } else {
- memcpy(filename, fil_path_to_mysql_datadir, dirlen);
- filename[dirlen] = '/';
-
- memcpy(filename + dirlen + 1, name, namelen);
- memcpy(filename + dirlen + namelen + 1, ".ibd", sizeof ".ibd");
- }
-
- srv_normalize_path_for_win(filename);
-
- return(filename);
-}
-
-/***********************************************************************
-Renames a single-table tablespace. The tablespace must be cached in the
-tablespace memory cache. */
-
-ibool
-fil_rename_tablespace(
-/*==================*/
- /* out: TRUE if success */
- const char* old_name, /* in: old table name in the standard
- databasename/tablename format of
- InnoDB, or NULL if we do the rename
- based on the space id only */
- ulint id, /* in: space id */
- const char* new_name) /* in: new table name in the standard
- databasename/tablename format
- of InnoDB */
-{
- fil_system_t* system = fil_system;
- ibool success;
- fil_space_t* space;
- fil_node_t* node;
- ulint count = 0;
- char* path;
- ibool old_name_was_specified = TRUE;
- char* old_path;
-
- ut_a(id != 0);
-
- if (old_name == NULL) {
- old_name = "(name not specified)";
- old_name_was_specified = FALSE;
- }
-retry:
- count++;
-
- if (count > 1000) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Warning: problems renaming ", stderr);
- ut_print_filename(stderr, old_name);
- fputs(" to ", stderr);
- ut_print_filename(stderr, new_name);
- fprintf(stderr, ", %lu iterations\n", (ulong) count);
- }
-
- mutex_enter(&(system->mutex));
-
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
-
- if (space == NULL) {
- fprintf(stderr,
- "InnoDB: Error: cannot find space id %lu"
- " in the tablespace memory cache\n"
- "InnoDB: though the table ", (ulong) id);
- ut_print_filename(stderr, old_name);
- fputs(" in a rename operation should have that id\n", stderr);
- mutex_exit(&(system->mutex));
-
- return(FALSE);
- }
-
- if (count > 25000) {
- space->stop_ios = FALSE;
- mutex_exit(&(system->mutex));
-
- return(FALSE);
- }
-
- /* We temporarily close the .ibd file because we do not trust that
- operating systems can rename an open file. For the closing we have to
- wait until there are no pending i/o's or flushes on the file. */
-
- space->stop_ios = TRUE;
-
- ut_a(UT_LIST_GET_LEN(space->chain) == 1);
- node = UT_LIST_GET_FIRST(space->chain);
-
- if (node->n_pending > 0 || node->n_pending_flushes > 0) {
- /* There are pending i/o's or flushes, sleep for a while and
- retry */
-
- mutex_exit(&(system->mutex));
-
- os_thread_sleep(20000);
-
- goto retry;
-
- } else if (node->modification_counter > node->flush_counter) {
- /* Flush the space */
-
- mutex_exit(&(system->mutex));
-
- os_thread_sleep(20000);
-
- fil_flush(id);
-
- goto retry;
-
- } else if (node->open) {
- /* Close the file */
-
- fil_node_close_file(node, system);
- }
-
- /* Check that the old name in the space is right */
-
- if (old_name_was_specified) {
- old_path = fil_make_ibd_name(old_name, FALSE);
-
- ut_a(strcmp(space->name, old_path) == 0);
- ut_a(strcmp(node->name, old_path) == 0);
- } else {
- old_path = mem_strdup(space->name);
- }
-
- /* Rename the tablespace and the node in the memory cache */
- path = fil_make_ibd_name(new_name, FALSE);
- success = fil_rename_tablespace_in_mem(space, node, path);
-
- if (success) {
- success = os_file_rename(old_path, path);
-
- if (!success) {
- /* We have to revert the changes we made
- to the tablespace memory cache */
-
- ut_a(fil_rename_tablespace_in_mem(space, node,
- old_path));
- }
- }
-
- mem_free(path);
- mem_free(old_path);
-
- space->stop_ios = FALSE;
-
- mutex_exit(&(system->mutex));
-
-#ifndef UNIV_HOTBACKUP
- if (success) {
- mtr_t mtr;
-
- mtr_start(&mtr);
-
- fil_op_write_log(MLOG_FILE_RENAME, id, old_name, new_name,
- &mtr);
- mtr_commit(&mtr);
- }
-#endif
- return(success);
-}
-
-/***********************************************************************
-Creates a new single-table tablespace to a database directory of MySQL.
-Database directories are under the 'datadir' of MySQL. The datadir is the
-directory of a running mysqld program. We can refer to it by simply the
-path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp
-dir of the mysqld server. */
-
-ulint
-fil_create_new_single_table_tablespace(
-/*===================================*/
- /* out: DB_SUCCESS or error code */
- ulint* space_id, /* in/out: space id; if this is != 0,
- then this is an input parameter,
- otherwise output */
- const char* tablename, /* in: the table name in the usual
- databasename/tablename format
- of InnoDB, or a dir path to a temp
- table */
- ibool is_temp, /* in: TRUE if a table created with
- CREATE TEMPORARY TABLE */
- ulint size) /* in: the initial size of the
- tablespace file in pages,
- must be >= FIL_IBD_FILE_INITIAL_SIZE */
-{
- os_file_t file;
- ibool ret;
- ulint err;
- byte* buf2;
- byte* page;
- ibool success;
- char* path;
-
- ut_a(size >= FIL_IBD_FILE_INITIAL_SIZE);
-
- path = fil_make_ibd_name(tablename, is_temp);
-
- file = os_file_create(path, OS_FILE_CREATE, OS_FILE_NORMAL,
- OS_DATA_FILE, &ret);
- if (ret == FALSE) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error creating file ", stderr);
- ut_print_filename(stderr, path);
- fputs(".\n", stderr);
-
- /* The following call will print an error message */
-
- err = os_file_get_last_error(TRUE);
-
- if (err == OS_FILE_ALREADY_EXISTS) {
- fputs("InnoDB: The file already exists though"
- " the corresponding table did not\n"
- "InnoDB: exist in the InnoDB data dictionary."
- " Have you moved InnoDB\n"
- "InnoDB: .ibd files around without using the"
- " SQL commands\n"
- "InnoDB: DISCARD TABLESPACE and"
- " IMPORT TABLESPACE, or did\n"
- "InnoDB: mysqld crash in the middle of"
- " CREATE TABLE? You can\n"
- "InnoDB: resolve the problem by"
- " removing the file ", stderr);
- ut_print_filename(stderr, path);
- fputs("\n"
- "InnoDB: under the 'datadir' of MySQL.\n",
- stderr);
-
- mem_free(path);
- return(DB_TABLESPACE_ALREADY_EXISTS);
- }
-
- if (err == OS_FILE_DISK_FULL) {
-
- mem_free(path);
- return(DB_OUT_OF_FILE_SPACE);
- }
-
- mem_free(path);
- return(DB_ERROR);
- }
-
- buf2 = ut_malloc(2 * UNIV_PAGE_SIZE);
- /* Align the memory for file i/o if we might have O_DIRECT set */
- page = ut_align(buf2, UNIV_PAGE_SIZE);
-
- ret = os_file_set_size(path, file, size * UNIV_PAGE_SIZE, 0);
-
- if (!ret) {
- ut_free(buf2);
- os_file_close(file);
- os_file_delete(path);
-
- mem_free(path);
- return(DB_OUT_OF_FILE_SPACE);
- }
-
- if (*space_id == 0) {
- *space_id = fil_assign_new_space_id();
- }
-
- /* printf("Creating tablespace %s id %lu\n", path, *space_id); */
-
- if (*space_id == ULINT_UNDEFINED) {
- ut_free(buf2);
-error_exit:
- os_file_close(file);
-error_exit2:
- os_file_delete(path);
-
- mem_free(path);
- return(DB_ERROR);
- }
-
- /* We have to write the space id to the file immediately and flush the
- file to disk. This is because in crash recovery we must be aware what
- tablespaces exist and what are their space id's, so that we can apply
- the log records to the right file. It may take quite a while until
- buffer pool flush algorithms write anything to the file and flush it to
- disk. If we would not write here anything, the file would be filled
- with zeros from the call of os_file_set_size(), until a buffer pool
- flush would write to it. */
-
- memset(page, '\0', UNIV_PAGE_SIZE);
-
- fsp_header_write_space_id(page, *space_id);
-
- buf_flush_init_for_writing(page, ut_dulint_zero, *space_id, 0);
-
- ret = os_file_write(path, file, page, 0, 0, UNIV_PAGE_SIZE);
-
- ut_free(buf2);
-
- if (!ret) {
- fputs("InnoDB: Error: could not write the first page"
- " to tablespace ", stderr);
- ut_print_filename(stderr, path);
- putc('\n', stderr);
- goto error_exit;
- }
-
- ret = os_file_flush(file);
-
- if (!ret) {
- fputs("InnoDB: Error: file flush of tablespace ", stderr);
- ut_print_filename(stderr, path);
- fputs(" failed\n", stderr);
- goto error_exit;
- }
-
- os_file_close(file);
-
- if (*space_id == ULINT_UNDEFINED) {
- goto error_exit2;
- }
-
- success = fil_space_create(path, *space_id, FIL_TABLESPACE);
-
- if (!success) {
- goto error_exit2;
- }
-
- fil_node_create(path, size, *space_id, FALSE);
-
-#ifndef UNIV_HOTBACKUP
- {
- mtr_t mtr;
-
- mtr_start(&mtr);
-
- fil_op_write_log(MLOG_FILE_CREATE, *space_id, tablename,
- NULL, &mtr);
-
- mtr_commit(&mtr);
- }
-#endif
- mem_free(path);
- return(DB_SUCCESS);
-}
-
-/************************************************************************
-It is possible, though very improbable, that the lsn's in the tablespace to be
-imported have risen above the current system lsn, if a lengthy purge, ibuf
-merge, or rollback was performed on a backup taken with ibbackup. If that is
-the case, reset page lsn's in the file. We assume that mysqld was shut down
-after it performed these cleanup operations on the .ibd file, so that it at
-the shutdown stamped the latest lsn to the FIL_PAGE_FILE_FLUSH_LSN in the
-first page of the .ibd file, and we can determine whether we need to reset the
-lsn's just by looking at that flush lsn. */
-
-ibool
-fil_reset_too_high_lsns(
-/*====================*/
- /* out: TRUE if success */
- const char* name, /* in: table name in the
- databasename/tablename format */
- dulint current_lsn) /* in: reset lsn's if the lsn stamped
- to FIL_PAGE_FILE_FLUSH_LSN in the
- first page is too high */
-{
- os_file_t file;
- char* filepath;
- byte* page;
- byte* buf2;
- dulint flush_lsn;
- ulint space_id;
- ib_longlong file_size;
- ib_longlong offset;
- ulint page_no;
- ibool success;
-
- filepath = fil_make_ibd_name(name, FALSE);
-
- file = os_file_create_simple_no_error_handling(
- filepath, OS_FILE_OPEN, OS_FILE_READ_WRITE, &success);
- if (!success) {
- /* The following call prints an error message */
- os_file_get_last_error(TRUE);
-
- ut_print_timestamp(stderr);
-
- fputs(" InnoDB: Error: trying to open a table,"
- " but could not\n"
- "InnoDB: open the tablespace file ", stderr);
- ut_print_filename(stderr, filepath);
- fputs("!\n", stderr);
- mem_free(filepath);
-
- return(FALSE);
- }
-
- /* Read the first page of the tablespace */
-
- buf2 = ut_malloc(2 * UNIV_PAGE_SIZE);
- /* Align the memory for file i/o if we might have O_DIRECT set */
- page = ut_align(buf2, UNIV_PAGE_SIZE);
-
- success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE);
- if (!success) {
-
- goto func_exit;
- }
-
- /* We have to read the file flush lsn from the header of the file */
-
- flush_lsn = mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN);
-
- if (ut_dulint_cmp(current_lsn, flush_lsn) >= 0) {
- /* Ok */
- success = TRUE;
-
- goto func_exit;
- }
-
- space_id = fsp_header_get_space_id(page);
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Flush lsn in the tablespace file %lu"
- " to be imported\n"
- "InnoDB: is %lu %lu, which exceeds current"
- " system lsn %lu %lu.\n"
- "InnoDB: We reset the lsn's in the file ",
- (ulong) space_id,
- (ulong) ut_dulint_get_high(flush_lsn),
- (ulong) ut_dulint_get_low(flush_lsn),
- (ulong) ut_dulint_get_high(current_lsn),
- (ulong) ut_dulint_get_low(current_lsn));
- ut_print_filename(stderr, filepath);
- fputs(".\n", stderr);
-
- /* Loop through all the pages in the tablespace and reset the lsn and
- the page checksum if necessary */
-
- file_size = os_file_get_size_as_iblonglong(file);
-
- for (offset = 0; offset < file_size; offset += UNIV_PAGE_SIZE) {
- success = os_file_read(file, page,
- (ulint)(offset & 0xFFFFFFFFUL),
- (ulint)(offset >> 32), UNIV_PAGE_SIZE);
- if (!success) {
-
- goto func_exit;
- }
- if (ut_dulint_cmp(mach_read_from_8(page + FIL_PAGE_LSN),
- current_lsn) > 0) {
- /* We have to reset the lsn */
- space_id = mach_read_from_4(
- page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
- page_no = mach_read_from_4(page + FIL_PAGE_OFFSET);
-
- buf_flush_init_for_writing(page, current_lsn, space_id,
- page_no);
- success = os_file_write(filepath, file, page,
- (ulint)(offset & 0xFFFFFFFFUL),
- (ulint)(offset >> 32),
- UNIV_PAGE_SIZE);
- if (!success) {
-
- goto func_exit;
- }
- }
- }
-
- success = os_file_flush(file);
- if (!success) {
-
- goto func_exit;
- }
-
- /* We now update the flush_lsn stamp at the start of the file */
- success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE);
- if (!success) {
-
- goto func_exit;
- }
-
- mach_write_to_8(page + FIL_PAGE_FILE_FLUSH_LSN, current_lsn);
-
- success = os_file_write(filepath, file, page, 0, 0, UNIV_PAGE_SIZE);
- if (!success) {
-
- goto func_exit;
- }
- success = os_file_flush(file);
-func_exit:
- os_file_close(file);
- ut_free(buf2);
- mem_free(filepath);
-
- return(success);
-}
-
-/************************************************************************
-Tries to open a single-table tablespace and optionally checks the space id is
-right in it. If does not succeed, prints an error message to the .err log. This
-function is used to open a tablespace when we start up mysqld, and also in
-IMPORT TABLESPACE.
-NOTE that we assume this operation is used either at the database startup
-or under the protection of the dictionary mutex, so that two users cannot
-race here. This operation does not leave the file associated with the
-tablespace open, but closes it after we have looked at the space id in it. */
-
-ibool
-fil_open_single_table_tablespace(
-/*=============================*/
- /* out: TRUE if success */
- ibool check_space_id, /* in: should we check that the space
- id in the file is right; we assume
- that this function runs much faster
- if no check is made, since accessing
- the file inode probably is much
- faster (the OS caches them) than
- accessing the first page of the file */
- ulint id, /* in: space id */
- const char* name) /* in: table name in the
- databasename/tablename format */
-{
- os_file_t file;
- char* filepath;
- ibool success;
- byte* buf2;
- byte* page;
- ulint space_id;
- ibool ret = TRUE;
-
- filepath = fil_make_ibd_name(name, FALSE);
-
- file = os_file_create_simple_no_error_handling(
- filepath, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
- if (!success) {
- /* The following call prints an error message */
- os_file_get_last_error(TRUE);
-
- ut_print_timestamp(stderr);
-
- fputs(" InnoDB: Error: trying to open a table,"
- " but could not\n"
- "InnoDB: open the tablespace file ", stderr);
- ut_print_filename(stderr, filepath);
- fputs("!\n"
- "InnoDB: Have you moved InnoDB .ibd files around"
- " without using the\n"
- "InnoDB: commands DISCARD TABLESPACE and"
- " IMPORT TABLESPACE?\n"
- "InnoDB: It is also possible that this is"
- " a temporary table #sql...,\n"
- "InnoDB: and MySQL removed the .ibd file for this.\n"
- "InnoDB: Please refer to\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "innodb-troubleshooting.html\n"
- "InnoDB: for how to resolve the issue.\n", stderr);
-
- mem_free(filepath);
-
- return(FALSE);
- }
-
- if (!check_space_id) {
- space_id = id;
-
- goto skip_check;
- }
-
- /* Read the first page of the tablespace */
-
- buf2 = ut_malloc(2 * UNIV_PAGE_SIZE);
- /* Align the memory for file i/o if we might have O_DIRECT set */
- page = ut_align(buf2, UNIV_PAGE_SIZE);
-
- success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE);
-
- /* We have to read the tablespace id from the file */
-
- space_id = fsp_header_get_space_id(page);
-
- ut_free(buf2);
-
- if (space_id != id) {
- ut_print_timestamp(stderr);
-
- fputs(" InnoDB: Error: tablespace id in file ", stderr);
- ut_print_filename(stderr, filepath);
- fprintf(stderr, " is %lu, but in the InnoDB\n"
- "InnoDB: data dictionary it is %lu.\n"
- "InnoDB: Have you moved InnoDB .ibd files"
- " around without using the\n"
- "InnoDB: commands DISCARD TABLESPACE and"
- " IMPORT TABLESPACE?\n"
- "InnoDB: Please refer to\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "innodb-troubleshooting.html\n"
- "InnoDB: for how to resolve the issue.\n",
- (ulong) space_id, (ulong) id);
-
- ret = FALSE;
-
- goto func_exit;
- }
-
-skip_check:
- success = fil_space_create(filepath, space_id, FIL_TABLESPACE);
-
- if (!success) {
- goto func_exit;
- }
-
- /* We do not measure the size of the file, that is why we pass the 0
- below */
-
- fil_node_create(filepath, 0, space_id, FALSE);
-func_exit:
- os_file_close(file);
- mem_free(filepath);
-
- return(ret);
-}
-
-#ifdef UNIV_HOTBACKUP
-/***********************************************************************
-Allocates a file name for an old version of a single-table tablespace.
-The string must be freed by caller with mem_free()! */
-static
-char*
-fil_make_ibbackup_old_name(
-/*=======================*/
- /* out, own: file name */
- const char* name) /* in: original file name */
-{
- static const char suffix[] = "_ibbackup_old_vers_";
- ulint len = strlen(name);
- char* path = mem_alloc(len + (15 + sizeof suffix));
-
- memcpy(path, name, len);
- memcpy(path + len, suffix, (sizeof suffix) - 1);
- ut_sprintf_timestamp_without_extra_chars(path + len + sizeof suffix);
- return(path);
-}
-#endif /* UNIV_HOTBACKUP */
-
-/************************************************************************
-Opens an .ibd file and adds the associated single-table tablespace to the
-InnoDB fil0fil.c data structures. */
-static
-void
-fil_load_single_table_tablespace(
-/*=============================*/
- const char* dbname, /* in: database name */
- const char* filename) /* in: file name (not a path),
- including the .ibd extension */
-{
- os_file_t file;
- char* filepath;
- ibool success;
- byte* buf2;
- byte* page;
- ulint space_id;
- ulint size_low;
- ulint size_high;
- ib_longlong size;
-#ifdef UNIV_HOTBACKUP
- fil_space_t* space;
-#endif
- filepath = mem_alloc(strlen(dbname) + strlen(filename)
- + strlen(fil_path_to_mysql_datadir) + 3);
-
- sprintf(filepath, "%s/%s/%s", fil_path_to_mysql_datadir, dbname,
- filename);
- srv_normalize_path_for_win(filepath);
-#ifdef __WIN__
-# ifndef UNIV_HOTBACKUP
- /* If lower_case_table_names is 0 or 2, then MySQL allows database
- directory names with upper case letters. On Windows, all table and
- database names in InnoDB are internally always in lower case. Put the
- file path to lower case, so that we are consistent with InnoDB's
- internal data dictionary. */
-
- dict_casedn_str(filepath);
-# endif /* !UNIV_HOTBACKUP */
-#endif
- file = os_file_create_simple_no_error_handling(
- filepath, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
- if (!success) {
- /* The following call prints an error message */
- os_file_get_last_error(TRUE);
-
- fprintf(stderr,
- "InnoDB: Error: could not open single-table tablespace"
- " file\n"
- "InnoDB: %s!\n"
- "InnoDB: We do not continue the crash recovery,"
- " because the table may become\n"
- "InnoDB: corrupt if we cannot apply the log records"
- " in the InnoDB log to it.\n"
- "InnoDB: To fix the problem and start mysqld:\n"
- "InnoDB: 1) If there is a permission problem"
- " in the file and mysqld cannot\n"
- "InnoDB: open the file, you should"
- " modify the permissions.\n"
- "InnoDB: 2) If the table is not needed, or you can"
- " restore it from a backup,\n"
- "InnoDB: then you can remove the .ibd file,"
- " and InnoDB will do a normal\n"
- "InnoDB: crash recovery and ignore that table.\n"
- "InnoDB: 3) If the file system or the"
- " disk is broken, and you cannot remove\n"
- "InnoDB: the .ibd file, you can set"
- " innodb_force_recovery > 0 in my.cnf\n"
- "InnoDB: and force InnoDB to continue crash"
- " recovery here.\n", filepath);
-
- mem_free(filepath);
-
- if (srv_force_recovery > 0) {
- fprintf(stderr,
- "InnoDB: innodb_force_recovery"
- " was set to %lu. Continuing crash recovery\n"
- "InnoDB: even though we cannot access"
- " the .ibd file of this table.\n",
- srv_force_recovery);
- return;
- }
-
- exit(1);
- }
-
- success = os_file_get_size(file, &size_low, &size_high);
-
- if (!success) {
- /* The following call prints an error message */
- os_file_get_last_error(TRUE);
-
- fprintf(stderr,
- "InnoDB: Error: could not measure the size"
- " of single-table tablespace file\n"
- "InnoDB: %s!\n"
- "InnoDB: We do not continue crash recovery,"
- " because the table will become\n"
- "InnoDB: corrupt if we cannot apply the log records"
- " in the InnoDB log to it.\n"
- "InnoDB: To fix the problem and start mysqld:\n"
- "InnoDB: 1) If there is a permission problem"
- " in the file and mysqld cannot\n"
- "InnoDB: access the file, you should"
- " modify the permissions.\n"
- "InnoDB: 2) If the table is not needed,"
- " or you can restore it from a backup,\n"
- "InnoDB: then you can remove the .ibd file,"
- " and InnoDB will do a normal\n"
- "InnoDB: crash recovery and ignore that table.\n"
- "InnoDB: 3) If the file system or the disk is broken,"
- " and you cannot remove\n"
- "InnoDB: the .ibd file, you can set"
- " innodb_force_recovery > 0 in my.cnf\n"
- "InnoDB: and force InnoDB to continue"
- " crash recovery here.\n", filepath);
-
- os_file_close(file);
- mem_free(filepath);
-
- if (srv_force_recovery > 0) {
- fprintf(stderr,
- "InnoDB: innodb_force_recovery"
- " was set to %lu. Continuing crash recovery\n"
- "InnoDB: even though we cannot access"
- " the .ibd file of this table.\n",
- srv_force_recovery);
- return;
- }
-
- exit(1);
- }
-
- /* TODO: What to do in other cases where we cannot access an .ibd
- file during a crash recovery? */
-
- /* Every .ibd file is created >= 4 pages in size. Smaller files
- cannot be ok. */
-
- size = (((ib_longlong)size_high) << 32) + (ib_longlong)size_low;
-#ifndef UNIV_HOTBACKUP
- if (size < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
- fprintf(stderr,
- "InnoDB: Error: the size of single-table tablespace"
- " file %s\n"
- "InnoDB: is only %lu %lu, should be at least %lu!",
- filepath,
- (ulong) size_high,
- (ulong) size_low, (ulong) (4 * UNIV_PAGE_SIZE));
- os_file_close(file);
- mem_free(filepath);
-
- return;
- }
-#endif
- /* Read the first page of the tablespace if the size big enough */
-
- buf2 = ut_malloc(2 * UNIV_PAGE_SIZE);
- /* Align the memory for file i/o if we might have O_DIRECT set */
- page = ut_align(buf2, UNIV_PAGE_SIZE);
-
- if (size >= FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
- success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE);
-
- /* We have to read the tablespace id from the file */
-
- space_id = fsp_header_get_space_id(page);
- } else {
- space_id = ULINT_UNDEFINED;
- }
-
-#ifndef UNIV_HOTBACKUP
- if (space_id == ULINT_UNDEFINED || space_id == 0) {
- fprintf(stderr,
- "InnoDB: Error: tablespace id %lu in file %s"
- " is not sensible\n",
- (ulong) space_id,
- filepath);
- goto func_exit;
- }
-#else
- if (space_id == ULINT_UNDEFINED || space_id == 0) {
- char* new_path;
-
- fprintf(stderr,
- "InnoDB: Renaming tablespace %s of id %lu,\n"
- "InnoDB: to %s_ibbackup_old_vers_<timestamp>\n"
- "InnoDB: because its size %lld is too small"
- " (< 4 pages 16 kB each),\n"
- "InnoDB: or the space id in the file header"
- " is not sensible.\n"
- "InnoDB: This can happen in an ibbackup run,"
- " and is not dangerous.\n",
- filepath, space_id, filepath, size);
- os_file_close(file);
-
- new_path = fil_make_ibbackup_old_name(filepath);
- ut_a(os_file_rename(filepath, new_path));
-
- ut_free(buf2);
- mem_free(filepath);
- mem_free(new_path);
-
- return;
- }
-
- /* A backup may contain the same space several times, if the space got
- renamed at a sensitive time. Since it is enough to have one version of
- the space, we rename the file if a space with the same space id
- already exists in the tablespace memory cache. We rather rename the
- file than delete it, because if there is a bug, we do not want to
- destroy valuable data. */
-
- mutex_enter(&(fil_system->mutex));
-
- space = fil_get_space_for_id_low(space_id);
-
- if (space) {
- char* new_path;
-
- fprintf(stderr,
- "InnoDB: Renaming tablespace %s of id %lu,\n"
- "InnoDB: to %s_ibbackup_old_vers_<timestamp>\n"
- "InnoDB: because space %s with the same id\n"
- "InnoDB: was scanned earlier. This can happen"
- " if you have renamed tables\n"
- "InnoDB: during an ibbackup run.\n",
- filepath, space_id, filepath,
- space->name);
- os_file_close(file);
-
- new_path = fil_make_ibbackup_old_name(filepath);
-
- mutex_exit(&(fil_system->mutex));
-
- ut_a(os_file_rename(filepath, new_path));
-
- ut_free(buf2);
- mem_free(filepath);
- mem_free(new_path);
-
- return;
- }
- mutex_exit(&(fil_system->mutex));
-#endif
- success = fil_space_create(filepath, space_id, FIL_TABLESPACE);
-
- if (!success) {
-
- goto func_exit;
- }
-
- /* We do not use the size information we have about the file, because
- the rounding formula for extents and pages is somewhat complex; we
- let fil_node_open() do that task. */
-
- fil_node_create(filepath, 0, space_id, FALSE);
-func_exit:
- os_file_close(file);
- ut_free(buf2);
- mem_free(filepath);
-}
-
-/***************************************************************************
-A fault-tolerant function that tries to read the next file name in the
-directory. We retry 100 times if os_file_readdir_next_file() returns -1. The
-idea is to read as much good data as we can and jump over bad data. */
-static
-int
-fil_file_readdir_next_file(
-/*=======================*/
- /* out: 0 if ok, -1 if error even after the
- retries, 1 if at the end of the directory */
- ulint* err, /* out: this is set to DB_ERROR if an error
- was encountered, otherwise not changed */
- const char* dirname,/* in: directory name or path */
- os_file_dir_t dir, /* in: directory stream */
- os_file_stat_t* info) /* in/out: buffer where the info is returned */
-{
- ulint i;
- int ret;
-
- for (i = 0; i < 100; i++) {
- ret = os_file_readdir_next_file(dirname, dir, info);
-
- if (ret != -1) {
-
- return(ret);
- }
-
- fprintf(stderr,
- "InnoDB: Error: os_file_readdir_next_file()"
- " returned -1 in\n"
- "InnoDB: directory %s\n"
- "InnoDB: Crash recovery may have failed"
- " for some .ibd files!\n", dirname);
-
- *err = DB_ERROR;
- }
-
- return(-1);
-}
-
-/************************************************************************
-At the server startup, if we need crash recovery, scans the database
-directories under the MySQL datadir, looking for .ibd files. Those files are
-single-table tablespaces. We need to know the space id in each of them so that
-we know into which file we should look to check the contents of a page stored
-in the doublewrite buffer, also to know where to apply log records where the
-space id is != 0. */
-
-ulint
-fil_load_single_table_tablespaces(void)
-/*===================================*/
- /* out: DB_SUCCESS or error number */
-{
- int ret;
- char* dbpath = NULL;
- ulint dbpath_len = 100;
- os_file_dir_t dir;
- os_file_dir_t dbdir;
- os_file_stat_t dbinfo;
- os_file_stat_t fileinfo;
- ulint err = DB_SUCCESS;
-
- /* The datadir of MySQL is always the default directory of mysqld */
-
- dir = os_file_opendir(fil_path_to_mysql_datadir, TRUE);
-
- if (dir == NULL) {
-
- return(DB_ERROR);
- }
-
- dbpath = mem_alloc(dbpath_len);
-
- /* Scan all directories under the datadir. They are the database
- directories of MySQL. */
-
- ret = fil_file_readdir_next_file(&err, fil_path_to_mysql_datadir, dir,
- &dbinfo);
- while (ret == 0) {
- ulint len;
- /* printf("Looking at %s in datadir\n", dbinfo.name); */
-
- if (dbinfo.type == OS_FILE_TYPE_FILE
- || dbinfo.type == OS_FILE_TYPE_UNKNOWN) {
-
- goto next_datadir_item;
- }
-
- /* We found a symlink or a directory; try opening it to see
- if a symlink is a directory */
-
- len = strlen(fil_path_to_mysql_datadir)
- + strlen (dbinfo.name) + 2;
- if (len > dbpath_len) {
- dbpath_len = len;
-
- if (dbpath) {
- mem_free(dbpath);
- }
-
- dbpath = mem_alloc(dbpath_len);
- }
- sprintf(dbpath, "%s/%s", fil_path_to_mysql_datadir,
- dbinfo.name);
- srv_normalize_path_for_win(dbpath);
-
- dbdir = os_file_opendir(dbpath, FALSE);
-
- if (dbdir != NULL) {
- /* printf("Opened dir %s\n", dbinfo.name); */
-
- /* We found a database directory; loop through it,
- looking for possible .ibd files in it */
-
- ret = fil_file_readdir_next_file(&err, dbpath, dbdir,
- &fileinfo);
- while (ret == 0) {
- /* printf(
- " Looking at file %s\n", fileinfo.name); */
-
- if (fileinfo.type == OS_FILE_TYPE_DIR) {
-
- goto next_file_item;
- }
-
- /* We found a symlink or a file */
- if (strlen(fileinfo.name) > 4
- && 0 == strcmp(fileinfo.name
- + strlen(fileinfo.name) - 4,
- ".ibd")) {
- /* The name ends in .ibd; try opening
- the file */
- fil_load_single_table_tablespace(
- dbinfo.name, fileinfo.name);
- }
-next_file_item:
- ret = fil_file_readdir_next_file(&err,
- dbpath, dbdir,
- &fileinfo);
- }
-
- if (0 != os_file_closedir(dbdir)) {
- fputs("InnoDB: Warning: could not"
- " close database directory ", stderr);
- ut_print_filename(stderr, dbpath);
- putc('\n', stderr);
-
- err = DB_ERROR;
- }
- }
-
-next_datadir_item:
- ret = fil_file_readdir_next_file(&err,
- fil_path_to_mysql_datadir,
- dir, &dbinfo);
- }
-
- mem_free(dbpath);
-
- if (0 != os_file_closedir(dir)) {
- fprintf(stderr,
- "InnoDB: Error: could not close MySQL datadir\n");
-
- return(DB_ERROR);
- }
-
- return(err);
-}
-
-/************************************************************************
-If we need crash recovery, and we have called
-fil_load_single_table_tablespaces() and dict_load_single_table_tablespaces(),
-we can call this function to print an error message of orphaned .ibd files
-for which there is not a data dictionary entry with a matching table name
-and space id. */
-
-void
-fil_print_orphaned_tablespaces(void)
-/*================================*/
-{
- fil_system_t* system = fil_system;
- fil_space_t* space;
-
- mutex_enter(&(system->mutex));
-
- space = UT_LIST_GET_FIRST(system->space_list);
-
- while (space) {
- if (space->purpose == FIL_TABLESPACE && space->id != 0
- && !space->mark) {
- fputs("InnoDB: Warning: tablespace ", stderr);
- ut_print_filename(stderr, space->name);
- fprintf(stderr, " of id %lu has no matching table in\n"
- "InnoDB: the InnoDB data dictionary.\n",
- (ulong) space->id);
- }
-
- space = UT_LIST_GET_NEXT(space_list, space);
- }
-
- mutex_exit(&(system->mutex));
-}
-
-/***********************************************************************
-Returns TRUE if a single-table tablespace does not exist in the memory cache,
-or is being deleted there. */
-
-ibool
-fil_tablespace_deleted_or_being_deleted_in_mem(
-/*===========================================*/
- /* out: TRUE if does not exist or is being\
- deleted */
- ulint id, /* in: space id */
- ib_longlong version)/* in: tablespace_version should be this; if
- you pass -1 as the value of this, then this
- parameter is ignored */
-{
- fil_system_t* system = fil_system;
- fil_space_t* space;
-
- ut_ad(system);
-
- mutex_enter(&(system->mutex));
-
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
-
- if (space == NULL || space->is_being_deleted) {
- mutex_exit(&(system->mutex));
-
- return(TRUE);
- }
-
- if (version != ((ib_longlong)-1)
- && space->tablespace_version != version) {
- mutex_exit(&(system->mutex));
-
- return(TRUE);
- }
-
- mutex_exit(&(system->mutex));
-
- return(FALSE);
-}
-
-/***********************************************************************
-Returns TRUE if a single-table tablespace exists in the memory cache. */
-
-ibool
-fil_tablespace_exists_in_mem(
-/*=========================*/
- /* out: TRUE if exists */
- ulint id) /* in: space id */
-{
- fil_system_t* system = fil_system;
- fil_space_t* space;
-
- ut_ad(system);
-
- mutex_enter(&(system->mutex));
-
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
-
- if (space == NULL) {
- mutex_exit(&(system->mutex));
-
- return(FALSE);
- }
-
- mutex_exit(&(system->mutex));
-
- return(TRUE);
-}
-
-/***********************************************************************
-Returns TRUE if a matching tablespace exists in the InnoDB tablespace memory
-cache. Note that if we have not done a crash recovery at the database startup,
-there may be many tablespaces which are not yet in the memory cache. */
-
-ibool
-fil_space_for_table_exists_in_mem(
-/*==============================*/
- /* out: TRUE if a matching tablespace
- exists in the memory cache */
- ulint id, /* in: space id */
- const char* name, /* in: table name in the standard
- 'databasename/tablename' format or
- the dir path to a temp table */
- ibool is_temp, /* in: TRUE if created with CREATE
- TEMPORARY TABLE */
- ibool mark_space, /* in: in crash recovery, at database
- startup we mark all spaces which have
- an associated table in the InnoDB
- data dictionary, so that
- we can print a warning about orphaned
- tablespaces */
- ibool print_error_if_does_not_exist)
- /* in: print detailed error
- information to the .err log if a
- matching tablespace is not found from
- memory */
-{
- fil_system_t* system = fil_system;
- fil_space_t* namespace;
- fil_space_t* space;
- char* path;
-
- ut_ad(system);
-
- mutex_enter(&(system->mutex));
-
- path = fil_make_ibd_name(name, is_temp);
-
- /* Look if there is a space with the same id */
-
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
-
- /* Look if there is a space with the same name; the name is the
- directory path from the datadir to the file */
-
- HASH_SEARCH(name_hash, system->name_hash,
- ut_fold_string(path), namespace,
- 0 == strcmp(namespace->name, path));
- if (space && space == namespace) {
- /* Found */
-
- if (mark_space) {
- space->mark = TRUE;
- }
-
- mem_free(path);
- mutex_exit(&(system->mutex));
-
- return(TRUE);
- }
-
- if (!print_error_if_does_not_exist) {
-
- mem_free(path);
- mutex_exit(&(system->mutex));
-
- return(FALSE);
- }
-
- if (space == NULL) {
- if (namespace == NULL) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: table ", stderr);
- ut_print_filename(stderr, name);
- fprintf(stderr, "\n"
- "InnoDB: in InnoDB data dictionary"
- " has tablespace id %lu,\n"
- "InnoDB: but tablespace with that id"
- " or name does not exist. Have\n"
- "InnoDB: you deleted or moved .ibd files?\n"
- "InnoDB: This may also be a table created with"
- " CREATE TEMPORARY TABLE\n"
- "InnoDB: whose .ibd and .frm files"
- " MySQL automatically removed, but the\n"
- "InnoDB: table still exists in the"
- " InnoDB internal data dictionary.\n",
- (ulong) id);
- } else {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: table ", stderr);
- ut_print_filename(stderr, name);
- fprintf(stderr, "\n"
- "InnoDB: in InnoDB data dictionary has"
- " tablespace id %lu,\n"
- "InnoDB: but a tablespace with that id"
- " does not exist. There is\n"
- "InnoDB: a tablespace of name %s and id %lu,"
- " though. Have\n"
- "InnoDB: you deleted or moved .ibd files?\n",
- (ulong) id, namespace->name,
- (ulong) namespace->id);
- }
-error_exit:
- fputs("InnoDB: Please refer to\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "innodb-troubleshooting.html\n"
- "InnoDB: for how to resolve the issue.\n", stderr);
-
- mem_free(path);
- mutex_exit(&(system->mutex));
-
- return(FALSE);
- }
-
- if (0 != strcmp(space->name, path)) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: table ", stderr);
- ut_print_filename(stderr, name);
- fprintf(stderr, "\n"
- "InnoDB: in InnoDB data dictionary has"
- " tablespace id %lu,\n"
- "InnoDB: but the tablespace with that id"
- " has name %s.\n"
- "InnoDB: Have you deleted or moved .ibd files?\n",
- (ulong) id, space->name);
-
- if (namespace != NULL) {
- fputs("InnoDB: There is a tablespace"
- " with the right name\n"
- "InnoDB: ", stderr);
- ut_print_filename(stderr, namespace->name);
- fprintf(stderr, ", but its id is %lu.\n",
- (ulong) namespace->id);
- }
-
- goto error_exit;
- }
-
- mem_free(path);
- mutex_exit(&(system->mutex));
-
- return(FALSE);
-}
-
-/***********************************************************************
-Checks if a single-table tablespace for a given table name exists in the
-tablespace memory cache. */
-static
-ulint
-fil_get_space_id_for_table(
-/*=======================*/
- /* out: space id, ULINT_UNDEFINED if not
- found */
- const char* name) /* in: table name in the standard
- 'databasename/tablename' format */
-{
- fil_system_t* system = fil_system;
- fil_space_t* namespace;
- ulint id = ULINT_UNDEFINED;
- char* path;
-
- ut_ad(system);
-
- mutex_enter(&(system->mutex));
-
- path = fil_make_ibd_name(name, FALSE);
-
- /* Look if there is a space with the same name; the name is the
- directory path to the file */
-
- HASH_SEARCH(name_hash, system->name_hash,
- ut_fold_string(path), namespace,
- 0 == strcmp(namespace->name, path));
- if (namespace) {
- id = namespace->id;
- }
-
- mem_free(path);
-
- mutex_exit(&(system->mutex));
-
- return(id);
-}
-
-/**************************************************************************
-Tries to extend a data file so that it would accommodate the number of pages
-given. The tablespace must be cached in the memory cache. If the space is big
-enough already, does nothing. */
-
-ibool
-fil_extend_space_to_desired_size(
-/*=============================*/
- /* out: TRUE if success */
- ulint* actual_size, /* out: size of the space after extension;
- if we ran out of disk space this may be lower
- than the desired size */
- ulint space_id, /* in: space id */
- ulint size_after_extend)/* in: desired size in pages after the
- extension; if the current space size is bigger
- than this already, the function does nothing */
-{
- fil_system_t* system = fil_system;
- fil_node_t* node;
- fil_space_t* space;
- byte* buf2;
- byte* buf;
- ulint buf_size;
- ulint start_page_no;
- ulint file_start_page_no;
- ulint offset_high;
- ulint offset_low;
- ibool success = TRUE;
-
- fil_mutex_enter_and_prepare_for_io(space_id);
-
- HASH_SEARCH(hash, system->spaces, space_id, space,
- space->id == space_id);
- ut_a(space);
-
- if (space->size >= size_after_extend) {
- /* Space already big enough */
-
- *actual_size = space->size;
-
- mutex_exit(&(system->mutex));
-
- return(TRUE);
- }
-
- node = UT_LIST_GET_LAST(space->chain);
-
- fil_node_prepare_for_io(node, system, space);
-
- start_page_no = space->size;
- file_start_page_no = space->size - node->size;
-
- /* Extend at most 64 pages at a time */
- buf_size = ut_min(64, size_after_extend - start_page_no)
- * UNIV_PAGE_SIZE;
- buf2 = mem_alloc(buf_size + UNIV_PAGE_SIZE);
- buf = ut_align(buf2, UNIV_PAGE_SIZE);
-
- memset(buf, 0, buf_size);
-
- while (start_page_no < size_after_extend) {
- ulint n_pages = ut_min(buf_size / UNIV_PAGE_SIZE,
- size_after_extend - start_page_no);
-
- offset_high = (start_page_no - file_start_page_no)
- / (4096 * ((1024 * 1024) / UNIV_PAGE_SIZE));
- offset_low = ((start_page_no - file_start_page_no)
- % (4096 * ((1024 * 1024) / UNIV_PAGE_SIZE)))
- * UNIV_PAGE_SIZE;
-#ifdef UNIV_HOTBACKUP
- success = os_file_write(node->name, node->handle, buf,
- offset_low, offset_high,
- UNIV_PAGE_SIZE * n_pages);
-#else
- success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC,
- node->name, node->handle, buf,
- offset_low, offset_high,
- UNIV_PAGE_SIZE * n_pages,
- NULL, NULL);
-#endif
- if (success) {
- node->size += n_pages;
- space->size += n_pages;
-
- os_has_said_disk_full = FALSE;
- } else {
- /* Let us measure the size of the file to determine
- how much we were able to extend it */
-
- n_pages = ((ulint)
- (os_file_get_size_as_iblonglong
- (node->handle)
- / UNIV_PAGE_SIZE)) - node->size;
-
- node->size += n_pages;
- space->size += n_pages;
-
- break;
- }
-
- start_page_no += n_pages;
- }
-
- mem_free(buf2);
-
- fil_node_complete_io(node, system, OS_FILE_WRITE);
-
- *actual_size = space->size;
-
-#ifndef UNIV_HOTBACKUP
- if (space_id == 0) {
- ulint pages_per_mb = (1024 * 1024) / UNIV_PAGE_SIZE;
-
- /* Keep the last data file size info up to date, rounded to
- full megabytes */
-
- srv_data_file_sizes[srv_n_data_files - 1]
- = (node->size / pages_per_mb) * pages_per_mb;
- }
-#endif /* !UNIV_HOTBACKUP */
-
- /*
- printf("Extended %s to %lu, actual size %lu pages\n", space->name,
- size_after_extend, *actual_size); */
- mutex_exit(&(system->mutex));
-
- fil_flush(space_id);
-
- return(success);
-}
-
-#ifdef UNIV_HOTBACKUP
-/************************************************************************
-Extends all tablespaces to the size stored in the space header. During the
-ibbackup --apply-log phase we extended the spaces on-demand so that log records
-could be applied, but that may have left spaces still too small compared to
-the size stored in the space header. */
-
-void
-fil_extend_tablespaces_to_stored_len(void)
-/*======================================*/
-{
- fil_system_t* system = fil_system;
- fil_space_t* space;
- byte* buf;
- ulint actual_size;
- ulint size_in_header;
- ulint error;
- ibool success;
-
- buf = mem_alloc(UNIV_PAGE_SIZE);
-
- mutex_enter(&(system->mutex));
-
- space = UT_LIST_GET_FIRST(system->space_list);
-
- while (space) {
- ut_a(space->purpose == FIL_TABLESPACE);
-
- mutex_exit(&(system->mutex)); /* no need to protect with a
- mutex, because this is a
- single-threaded operation */
- error = fil_read(TRUE, space->id, 0, 0, UNIV_PAGE_SIZE, buf,
- NULL);
- ut_a(error == DB_SUCCESS);
-
- size_in_header = fsp_get_size_low(buf);
-
- success = fil_extend_space_to_desired_size(
- &actual_size, space->id, size_in_header);
- if (!success) {
- fprintf(stderr,
- "InnoDB: Error: could not extend the"
- " tablespace of %s\n"
- "InnoDB: to the size stored in header,"
- " %lu pages;\n"
- "InnoDB: size after extension %lu pages\n"
- "InnoDB: Check that you have free disk space"
- " and retry!\n",
- space->name, size_in_header, actual_size);
- exit(1);
- }
-
- mutex_enter(&(system->mutex));
-
- space = UT_LIST_GET_NEXT(space_list, space);
- }
-
- mutex_exit(&(system->mutex));
-
- mem_free(buf);
-}
-#endif
-
-/*========== RESERVE FREE EXTENTS (for a B-tree split, for example) ===*/
-
-/***********************************************************************
-Tries to reserve free extents in a file space. */
-
-ibool
-fil_space_reserve_free_extents(
-/*===========================*/
- /* out: TRUE if succeed */
- ulint id, /* in: space id */
- ulint n_free_now, /* in: number of free extents now */
- ulint n_to_reserve) /* in: how many one wants to reserve */
-{
- fil_system_t* system = fil_system;
- fil_space_t* space;
- ibool success;
-
- ut_ad(system);
-
- mutex_enter(&(system->mutex));
-
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
-
- ut_a(space);
-
- if (space->n_reserved_extents + n_to_reserve > n_free_now) {
- success = FALSE;
- } else {
- space->n_reserved_extents += n_to_reserve;
- success = TRUE;
- }
-
- mutex_exit(&(system->mutex));
-
- return(success);
-}
-
-/***********************************************************************
-Releases free extents in a file space. */
-
-void
-fil_space_release_free_extents(
-/*===========================*/
- ulint id, /* in: space id */
- ulint n_reserved) /* in: how many one reserved */
-{
- fil_system_t* system = fil_system;
- fil_space_t* space;
-
- ut_ad(system);
-
- mutex_enter(&(system->mutex));
-
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
-
- ut_a(space);
- ut_a(space->n_reserved_extents >= n_reserved);
-
- space->n_reserved_extents -= n_reserved;
-
- mutex_exit(&(system->mutex));
-}
-
-/***********************************************************************
-Gets the number of reserved extents. If the database is silent, this number
-should be zero. */
-
-ulint
-fil_space_get_n_reserved_extents(
-/*=============================*/
- ulint id) /* in: space id */
-{
- fil_system_t* system = fil_system;
- fil_space_t* space;
- ulint n;
-
- ut_ad(system);
-
- mutex_enter(&(system->mutex));
-
- HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
-
- ut_a(space);
-
- n = space->n_reserved_extents;
-
- mutex_exit(&(system->mutex));
-
- return(n);
-}
-
-/*============================ FILE I/O ================================*/
-
-/************************************************************************
-NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
-
-Prepares a file node for i/o. Opens the file if it is closed. Updates the
-pending i/o's field in the node and the system appropriately. Takes the node
-off the LRU list if it is in the LRU list. The caller must hold the fil_sys
-mutex. */
-static
-void
-fil_node_prepare_for_io(
-/*====================*/
- fil_node_t* node, /* in: file node */
- fil_system_t* system, /* in: tablespace memory cache */
- fil_space_t* space) /* in: space */
-{
- ut_ad(node && system && space);
- ut_ad(mutex_own(&(system->mutex)));
-
- if (system->n_open > system->max_n_open + 5) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: open files %lu"
- " exceeds the limit %lu\n",
- (ulong) system->n_open,
- (ulong) system->max_n_open);
- }
-
- if (node->open == FALSE) {
- /* File is closed: open it */
- ut_a(node->n_pending == 0);
-
- fil_node_open_file(node, system, space);
- }
-
- if (node->n_pending == 0 && space->purpose == FIL_TABLESPACE
- && space->id != 0) {
- /* The node is in the LRU list, remove it */
-
- ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
-
- UT_LIST_REMOVE(LRU, system->LRU, node);
- }
-
- node->n_pending++;
-}
-
-/************************************************************************
-Updates the data structures when an i/o operation finishes. Updates the
-pending i/o's field in the node appropriately. */
-static
-void
-fil_node_complete_io(
-/*=================*/
- fil_node_t* node, /* in: file node */
- fil_system_t* system, /* in: tablespace memory cache */
- ulint type) /* in: OS_FILE_WRITE or OS_FILE_READ; marks
- the node as modified if
- type == OS_FILE_WRITE */
-{
- ut_ad(node);
- ut_ad(system);
- ut_ad(mutex_own(&(system->mutex)));
-
- ut_a(node->n_pending > 0);
-
- node->n_pending--;
-
- if (type == OS_FILE_WRITE) {
- system->modification_counter++;
- node->modification_counter = system->modification_counter;
-
- if (!node->space->is_in_unflushed_spaces) {
-
- node->space->is_in_unflushed_spaces = TRUE;
- UT_LIST_ADD_FIRST(unflushed_spaces,
- system->unflushed_spaces,
- node->space);
- }
- }
-
- if (node->n_pending == 0 && node->space->purpose == FIL_TABLESPACE
- && node->space->id != 0) {
- /* The node must be put back to the LRU list */
- UT_LIST_ADD_FIRST(LRU, system->LRU, node);
- }
-}
-
-/************************************************************************
-Report information about an invalid page access. */
-static
-void
-fil_report_invalid_page_access(
-/*===========================*/
- ulint block_offset, /* in: block offset */
- ulint space_id, /* in: space id */
- const char* space_name, /* in: space name */
- ulint byte_offset, /* in: byte offset */
- ulint len, /* in: I/O length */
- ulint type) /* in: I/O type */
-{
- fprintf(stderr,
- "InnoDB: Error: trying to access page number %lu"
- " in space %lu,\n"
- "InnoDB: space name %s,\n"
- "InnoDB: which is outside the tablespace bounds.\n"
- "InnoDB: Byte offset %lu, len %lu, i/o type %lu.\n"
- "InnoDB: If you get this error at mysqld startup,"
- " please check that\n"
- "InnoDB: your my.cnf matches the ibdata files"
- " that you have in the\n"
- "InnoDB: MySQL server.\n",
- (ulong) block_offset, (ulong) space_id, space_name,
- (ulong) byte_offset, (ulong) len, (ulong) type);
-}
-
-/************************************************************************
-Reads or writes data. This operation is asynchronous (aio). */
-
-ulint
-fil_io(
-/*===*/
- /* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
- if we are trying to do i/o on a tablespace
- which does not exist */
- ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE,
- ORed to OS_FILE_LOG, if a log i/o
- and ORed to OS_AIO_SIMULATED_WAKE_LATER
- if simulated aio and we want to post a
- batch of i/os; NOTE that a simulated batch
- may introduce hidden chances of deadlocks,
- because i/os are not actually handled until
- all have been posted: use with great
- caution! */
- ibool sync, /* in: TRUE if synchronous aio is desired */
- ulint space_id, /* in: space id */
- ulint block_offset, /* in: offset in number of blocks */
- ulint byte_offset, /* in: remainder of offset in bytes; in
- aio this must be divisible by the OS block
- size */
- ulint len, /* in: how many bytes to read or write; this
- must not cross a file boundary; in aio this
- must be a block size multiple */
- void* buf, /* in/out: buffer where to store read data
- or from where to write; in aio this must be
- appropriately aligned */
- void* message) /* in: message for aio handler if non-sync
- aio used, else ignored */
-{
- fil_system_t* system = fil_system;
- ulint mode;
- fil_space_t* space;
- fil_node_t* node;
- ulint offset_high;
- ulint offset_low;
- ibool ret;
- ulint is_log;
- ulint wake_later;
-
- is_log = type & OS_FILE_LOG;
- type = type & ~OS_FILE_LOG;
-
- wake_later = type & OS_AIO_SIMULATED_WAKE_LATER;
- type = type & ~OS_AIO_SIMULATED_WAKE_LATER;
-
- ut_ad(byte_offset < UNIV_PAGE_SIZE);
- ut_ad(buf);
- ut_ad(len > 0);
- ut_a((1 << UNIV_PAGE_SIZE_SHIFT) == UNIV_PAGE_SIZE);
- ut_ad(fil_validate());
-#ifndef UNIV_LOG_DEBUG
- /* ibuf bitmap pages must be read in the sync aio mode: */
- ut_ad(recv_no_ibuf_operations || (type == OS_FILE_WRITE)
- || !ibuf_bitmap_page(block_offset) || sync || is_log);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!ibuf_inside() || is_log || (type == OS_FILE_WRITE)
- || ibuf_page(space_id, block_offset));
-#endif
-#endif
- if (sync) {
- mode = OS_AIO_SYNC;
- } else if (type == OS_FILE_READ && !is_log
- && ibuf_page(space_id, block_offset)) {
- mode = OS_AIO_IBUF;
- } else if (is_log) {
- mode = OS_AIO_LOG;
- } else {
- mode = OS_AIO_NORMAL;
- }
-
- if (type == OS_FILE_READ) {
- srv_data_read+= len;
- } else if (type == OS_FILE_WRITE) {
- srv_data_written+= len;
- }
-
- /* Reserve the fil_system mutex and make sure that we can open at
- least one file while holding it, if the file is not already open */
-
- fil_mutex_enter_and_prepare_for_io(space_id);
-
- HASH_SEARCH(hash, system->spaces, space_id, space,
- space->id == space_id);
- if (!space) {
- mutex_exit(&(system->mutex));
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: trying to do i/o"
- " to a tablespace which does not exist.\n"
- "InnoDB: i/o type %lu, space id %lu,"
- " page no. %lu, i/o length %lu bytes\n",
- (ulong) type, (ulong) space_id, (ulong) block_offset,
- (ulong) len);
-
- return(DB_TABLESPACE_DELETED);
- }
-
- ut_ad((mode != OS_AIO_IBUF) || (space->purpose == FIL_TABLESPACE));
-
- node = UT_LIST_GET_FIRST(space->chain);
-
- for (;;) {
- if (node == NULL) {
- fil_report_invalid_page_access(
- block_offset, space_id, space->name,
- byte_offset, len, type);
-
- ut_error;
- }
-
- if (space->id != 0 && node->size == 0) {
- /* We do not know the size of a single-table tablespace
- before we open the file */
-
- break;
- }
-
- if (node->size > block_offset) {
- /* Found! */
- break;
- } else {
- block_offset -= node->size;
- node = UT_LIST_GET_NEXT(chain, node);
- }
- }
-
- /* Open file if closed */
- fil_node_prepare_for_io(node, system, space);
-
- /* Check that at least the start offset is within the bounds of a
- single-table tablespace */
- if (space->purpose == FIL_TABLESPACE && space->id != 0
- && node->size <= block_offset) {
-
- fil_report_invalid_page_access(
- block_offset, space_id, space->name, byte_offset,
- len, type);
-
- ut_error;
- }
-
- /* Now we have made the changes in the data structures of system */
- mutex_exit(&(system->mutex));
-
- /* Calculate the low 32 bits and the high 32 bits of the file offset */
-
- offset_high = (block_offset >> (32 - UNIV_PAGE_SIZE_SHIFT));
- offset_low = ((block_offset << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL)
- + byte_offset;
-
- ut_a(node->size - block_offset
- >= (byte_offset + len + (UNIV_PAGE_SIZE - 1)) / UNIV_PAGE_SIZE);
-
- /* Do aio */
-
- ut_a(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
- ut_a((len % OS_FILE_LOG_BLOCK_SIZE) == 0);
-
-#ifdef UNIV_HOTBACKUP
- /* In ibbackup do normal i/o, not aio */
- if (type == OS_FILE_READ) {
- ret = os_file_read(node->handle, buf, offset_low, offset_high,
- len);
- } else {
- ret = os_file_write(node->name, node->handle, buf,
- offset_low, offset_high, len);
- }
-#else
- /* Queue the aio request */
- ret = os_aio(type, mode | wake_later, node->name, node->handle, buf,
- offset_low, offset_high, len, node, message);
-#endif
- ut_a(ret);
-
- if (mode == OS_AIO_SYNC) {
- /* The i/o operation is already completed when we return from
- os_aio: */
-
- mutex_enter(&(system->mutex));
-
- fil_node_complete_io(node, system, type);
-
- mutex_exit(&(system->mutex));
-
- ut_ad(fil_validate());
- }
-
- return(DB_SUCCESS);
-}
-
-/************************************************************************
-Reads data from a space to a buffer. Remember that the possible incomplete
-blocks at the end of file are ignored: they are not taken into account when
-calculating the byte offset within a space. */
-
-ulint
-fil_read(
-/*=====*/
- /* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
- if we are trying to do i/o on a tablespace
- which does not exist */
- ibool sync, /* in: TRUE if synchronous aio is desired */
- ulint space_id, /* in: space id */
- ulint block_offset, /* in: offset in number of blocks */
- ulint byte_offset, /* in: remainder of offset in bytes; in aio
- this must be divisible by the OS block size */
- ulint len, /* in: how many bytes to read; this must not
- cross a file boundary; in aio this must be a
- block size multiple */
- void* buf, /* in/out: buffer where to store data read;
- in aio this must be appropriately aligned */
- void* message) /* in: message for aio handler if non-sync
- aio used, else ignored */
-{
- return(fil_io(OS_FILE_READ, sync, space_id, block_offset,
- byte_offset, len, buf, message));
-}
-
-/************************************************************************
-Writes data to a space from a buffer. Remember that the possible incomplete
-blocks at the end of file are ignored: they are not taken into account when
-calculating the byte offset within a space. */
-
-ulint
-fil_write(
-/*======*/
- /* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
- if we are trying to do i/o on a tablespace
- which does not exist */
- ibool sync, /* in: TRUE if synchronous aio is desired */
- ulint space_id, /* in: space id */
- ulint block_offset, /* in: offset in number of blocks */
- ulint byte_offset, /* in: remainder of offset in bytes; in aio
- this must be divisible by the OS block size */
- ulint len, /* in: how many bytes to write; this must
- not cross a file boundary; in aio this must
- be a block size multiple */
- void* buf, /* in: buffer from which to write; in aio
- this must be appropriately aligned */
- void* message) /* in: message for aio handler if non-sync
- aio used, else ignored */
-{
- return(fil_io(OS_FILE_WRITE, sync, space_id, block_offset,
- byte_offset, len, buf, message));
-}
-
-/**************************************************************************
-Waits for an aio operation to complete. This function is used to write the
-handler for completed requests. The aio array of pending requests is divided
-into segments (see os0file.c for more info). The thread specifies which
-segment it wants to wait for. */
-
-void
-fil_aio_wait(
-/*=========*/
- ulint segment) /* in: the number of the segment in the aio
- array to wait for */
-{
- fil_system_t* system = fil_system;
- ibool ret;
- fil_node_t* fil_node;
- void* message;
- ulint type;
-
- ut_ad(fil_validate());
-
- if (os_aio_use_native_aio) {
- srv_set_io_thread_op_info(segment, "native aio handle");
-#ifdef WIN_ASYNC_IO
- ret = os_aio_windows_handle(segment, 0, &fil_node,
- &message, &type);
-#elif defined(POSIX_ASYNC_IO)
- ret = os_aio_posix_handle(segment, &fil_node, &message);
-#else
- ret = 0; /* Eliminate compiler warning */
- ut_error;
-#endif
- } else {
- srv_set_io_thread_op_info(segment, "simulated aio handle");
-
- ret = os_aio_simulated_handle(segment, &fil_node,
- &message, &type);
- }
-
- ut_a(ret);
-
- srv_set_io_thread_op_info(segment, "complete io for fil node");
-
- mutex_enter(&(system->mutex));
-
- fil_node_complete_io(fil_node, fil_system, type);
-
- mutex_exit(&(system->mutex));
-
- ut_ad(fil_validate());
-
- /* Do the i/o handling */
- /* IMPORTANT: since i/o handling for reads will read also the insert
- buffer in tablespace 0, you have to be very careful not to introduce
- deadlocks in the i/o system. We keep tablespace 0 data files always
- open, and use a special i/o thread to serve insert buffer requests. */
-
- if (buf_pool_is_block(message)) {
- srv_set_io_thread_op_info(segment, "complete io for buf page");
- buf_page_io_complete(message);
- } else {
- srv_set_io_thread_op_info(segment, "complete io for log");
- log_io_complete(message);
- }
-}
-
-/**************************************************************************
-Flushes to disk possible writes cached by the OS. If the space does not exist
-or is being dropped, does not do anything. */
-
-void
-fil_flush(
-/*======*/
- ulint space_id) /* in: file space id (this can be a group of
- log files or a tablespace of the database) */
-{
- fil_system_t* system = fil_system;
- fil_space_t* space;
- fil_node_t* node;
- os_file_t file;
- ib_longlong old_mod_counter;
-
- mutex_enter(&(system->mutex));
-
- HASH_SEARCH(hash, system->spaces, space_id, space,
- space->id == space_id);
- if (!space || space->is_being_deleted) {
- mutex_exit(&(system->mutex));
-
- return;
- }
-
- space->n_pending_flushes++; /* prevent dropping of the space while
- we are flushing */
- node = UT_LIST_GET_FIRST(space->chain);
-
- while (node) {
- if (node->modification_counter > node->flush_counter) {
- ut_a(node->open);
-
- /* We want to flush the changes at least up to
- old_mod_counter */
- old_mod_counter = node->modification_counter;
-
- if (space->purpose == FIL_TABLESPACE) {
- fil_n_pending_tablespace_flushes++;
- } else {
- fil_n_pending_log_flushes++;
- fil_n_log_flushes++;
- }
-#ifdef __WIN__
- if (node->is_raw_disk) {
-
- goto skip_flush;
- }
-#endif
-retry:
- if (node->n_pending_flushes > 0) {
- /* We want to avoid calling os_file_flush() on
- the file twice at the same time, because we do
- not know what bugs OS's may contain in file
- i/o; sleep for a while */
-
- mutex_exit(&(system->mutex));
-
- os_thread_sleep(20000);
-
- mutex_enter(&(system->mutex));
-
- if (node->flush_counter >= old_mod_counter) {
-
- goto skip_flush;
- }
-
- goto retry;
- }
-
- ut_a(node->open);
- file = node->handle;
- node->n_pending_flushes++;
-
- mutex_exit(&(system->mutex));
-
- /* fprintf(stderr, "Flushing to file %s\n",
- node->name); */
-
- os_file_flush(file);
-
- mutex_enter(&(system->mutex));
-
- node->n_pending_flushes--;
-skip_flush:
- if (node->flush_counter < old_mod_counter) {
- node->flush_counter = old_mod_counter;
-
- if (space->is_in_unflushed_spaces
- && fil_space_is_flushed(space)) {
-
- space->is_in_unflushed_spaces = FALSE;
-
- UT_LIST_REMOVE(
- unflushed_spaces,
- system->unflushed_spaces,
- space);
- }
- }
-
- if (space->purpose == FIL_TABLESPACE) {
- fil_n_pending_tablespace_flushes--;
- } else {
- fil_n_pending_log_flushes--;
- }
- }
-
- node = UT_LIST_GET_NEXT(chain, node);
- }
-
- space->n_pending_flushes--;
-
- mutex_exit(&(system->mutex));
-}
-
-/**************************************************************************
-Flushes to disk the writes in file spaces of the given type possibly cached by
-the OS. */
-
-void
-fil_flush_file_spaces(
-/*==================*/
- ulint purpose) /* in: FIL_TABLESPACE, FIL_LOG */
-{
- fil_system_t* system = fil_system;
- fil_space_t* space;
- ulint* space_ids;
- ulint n_space_ids;
- ulint i;
-
- mutex_enter(&(system->mutex));
-
- n_space_ids = UT_LIST_GET_LEN(system->unflushed_spaces);
- if (n_space_ids == 0) {
-
- mutex_exit(&system->mutex);
- return;
- }
-
- /* Assemble a list of space ids to flush. Previously, we
- traversed system->unflushed_spaces and called UT_LIST_GET_NEXT()
- on a space that was just removed from the list by fil_flush().
- Thus, the space could be dropped and the memory overwritten. */
- space_ids = mem_alloc(n_space_ids * sizeof *space_ids);
-
- n_space_ids = 0;
-
- for (space = UT_LIST_GET_FIRST(system->unflushed_spaces);
- space;
- space = UT_LIST_GET_NEXT(unflushed_spaces, space)) {
-
- if (space->purpose == purpose && !space->is_being_deleted) {
-
- space_ids[n_space_ids++] = space->id;
- }
- }
-
- mutex_exit(&system->mutex);
-
- /* Flush the spaces. It will not hurt to call fil_flush() on
- a non-existing space id. */
- for (i = 0; i < n_space_ids; i++) {
-
- fil_flush(space_ids[i]);
- }
-
- mem_free(space_ids);
-}
-
-/**********************************************************************
-Checks the consistency of the tablespace cache. */
-
-ibool
-fil_validate(void)
-/*==============*/
- /* out: TRUE if ok */
-{
- fil_system_t* system = fil_system;
- fil_space_t* space;
- fil_node_t* fil_node;
- ulint n_open = 0;
- ulint i;
-
- mutex_enter(&(system->mutex));
-
- /* Look for spaces in the hash table */
-
- for (i = 0; i < hash_get_n_cells(system->spaces); i++) {
-
- space = HASH_GET_FIRST(system->spaces, i);
-
- while (space != NULL) {
- UT_LIST_VALIDATE(chain, fil_node_t, space->chain);
-
- fil_node = UT_LIST_GET_FIRST(space->chain);
-
- while (fil_node != NULL) {
- if (fil_node->n_pending > 0) {
- ut_a(fil_node->open);
- }
-
- if (fil_node->open) {
- n_open++;
- }
- fil_node = UT_LIST_GET_NEXT(chain, fil_node);
- }
- space = HASH_GET_NEXT(hash, space);
- }
- }
-
- ut_a(system->n_open == n_open);
-
- UT_LIST_VALIDATE(LRU, fil_node_t, system->LRU);
-
- fil_node = UT_LIST_GET_FIRST(system->LRU);
-
- while (fil_node != NULL) {
- ut_a(fil_node->n_pending == 0);
- ut_a(fil_node->open);
- ut_a(fil_node->space->purpose == FIL_TABLESPACE);
- ut_a(fil_node->space->id != 0);
-
- fil_node = UT_LIST_GET_NEXT(LRU, fil_node);
- }
-
- mutex_exit(&(system->mutex));
-
- return(TRUE);
-}
-
-/************************************************************************
-Returns TRUE if file address is undefined. */
-ibool
-fil_addr_is_null(
-/*=============*/
- /* out: TRUE if undefined */
- fil_addr_t addr) /* in: address */
-{
- if (addr.page == FIL_NULL) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/************************************************************************
-Accessor functions for a file page */
-
-ulint
-fil_page_get_prev(byte* page)
-{
- return(mach_read_from_4(page + FIL_PAGE_PREV));
-}
-
-ulint
-fil_page_get_next(byte* page)
-{
- return(mach_read_from_4(page + FIL_PAGE_NEXT));
-}
-
-/*************************************************************************
-Sets the file page type. */
-
-void
-fil_page_set_type(
-/*==============*/
- byte* page, /* in: file page */
- ulint type) /* in: type */
-{
- ut_ad(page);
-
- mach_write_to_2(page + FIL_PAGE_TYPE, type);
-}
-
-/*************************************************************************
-Gets the file page type. */
-
-ulint
-fil_page_get_type(
-/*==============*/
- /* out: type; NOTE that if the type has not been
- written to page, the return value not defined */
- byte* page) /* in: file page */
-{
- ut_ad(page);
-
- return(mach_read_from_2(page + FIL_PAGE_TYPE));
-}
diff --git a/storage/innobase/fsp/fsp0fsp.c b/storage/innobase/fsp/fsp0fsp.c
deleted file mode 100644
index e1074933fe8..00000000000
--- a/storage/innobase/fsp/fsp0fsp.c
+++ /dev/null
@@ -1,3990 +0,0 @@
-/**********************************************************************
-File space management
-
-(c) 1995 Innobase Oy
-
-Created 11/29/1995 Heikki Tuuri
-***********************************************************************/
-
-#include "fsp0fsp.h"
-
-#ifdef UNIV_NONINL
-#include "fsp0fsp.ic"
-#endif
-
-#include "buf0buf.h"
-#include "fil0fil.h"
-#include "sync0sync.h"
-#include "mtr0log.h"
-#include "fut0fut.h"
-#include "ut0byte.h"
-#include "srv0srv.h"
-#include "page0types.h"
-#include "ibuf0ibuf.h"
-#include "btr0btr.h"
-#include "btr0sea.h"
-#include "dict0boot.h"
-#include "dict0mem.h"
-#include "log0log.h"
-
-
-#define FSP_HEADER_OFFSET FIL_PAGE_DATA /* Offset of the space header
- within a file page */
-
-/* The data structures in files are defined just as byte strings in C */
-typedef byte fsp_header_t;
-typedef byte xdes_t;
-
-/* SPACE HEADER
- ============
-
-File space header data structure: this data structure is contained in the
-first page of a space. The space for this header is reserved in every extent
-descriptor page, but used only in the first. */
-
-/*-------------------------------------*/
-#define FSP_SPACE_ID 0 /* space id */
-#define FSP_NOT_USED 4 /* this field contained a value up to
- which we know that the modifications
- in the database have been flushed to
- the file space; not used now */
-#define FSP_SIZE 8 /* Current size of the space in
- pages */
-#define FSP_FREE_LIMIT 12 /* Minimum page number for which the
- free list has not been initialized:
- the pages >= this limit are, by
- definition, free; note that in a
- single-table tablespace where size
- < 64 pages, this number is 64, i.e.,
- we have initialized the space
- about the first extent, but have not
- physically allocted those pages to the
- file */
-#define FSP_LOWEST_NO_WRITE 16 /* The lowest page offset for which
- the page has not been written to disk
- (if it has been written, we know that
- the OS has really reserved the
- physical space for the page) */
-#define FSP_FRAG_N_USED 20 /* number of used pages in the
- FSP_FREE_FRAG list */
-#define FSP_FREE 24 /* list of free extents */
-#define FSP_FREE_FRAG (24 + FLST_BASE_NODE_SIZE)
- /* list of partially free extents not
- belonging to any segment */
-#define FSP_FULL_FRAG (24 + 2 * FLST_BASE_NODE_SIZE)
- /* list of full extents not belonging
- to any segment */
-#define FSP_SEG_ID (24 + 3 * FLST_BASE_NODE_SIZE)
- /* 8 bytes which give the first unused
- segment id */
-#define FSP_SEG_INODES_FULL (32 + 3 * FLST_BASE_NODE_SIZE)
- /* list of pages containing segment
- headers, where all the segment inode
- slots are reserved */
-#define FSP_SEG_INODES_FREE (32 + 4 * FLST_BASE_NODE_SIZE)
- /* list of pages containing segment
- headers, where not all the segment
- header slots are reserved */
-/*-------------------------------------*/
-/* File space header size */
-#define FSP_HEADER_SIZE (32 + 5 * FLST_BASE_NODE_SIZE)
-
-#define FSP_FREE_ADD 4 /* this many free extents are added
- to the free list from above
- FSP_FREE_LIMIT at a time */
-
-/* FILE SEGMENT INODE
- ==================
-
-Segment inode which is created for each segment in a tablespace. NOTE: in
-purge we assume that a segment having only one currently used page can be
-freed in a few steps, so that the freeing cannot fill the file buffer with
-bufferfixed file pages. */
-
-typedef byte fseg_inode_t;
-
-#define FSEG_INODE_PAGE_NODE FSEG_PAGE_DATA
- /* the list node for linking
- segment inode pages */
-
-#define FSEG_ARR_OFFSET (FSEG_PAGE_DATA + FLST_NODE_SIZE)
-/*-------------------------------------*/
-#define FSEG_ID 0 /* 8 bytes of segment id: if this is
- ut_dulint_zero, it means that the
- header is unused */
-#define FSEG_NOT_FULL_N_USED 8
- /* number of used segment pages in
- the FSEG_NOT_FULL list */
-#define FSEG_FREE 12
- /* list of free extents of this
- segment */
-#define FSEG_NOT_FULL (12 + FLST_BASE_NODE_SIZE)
- /* list of partially free extents */
-#define FSEG_FULL (12 + 2 * FLST_BASE_NODE_SIZE)
- /* list of full extents */
-#define FSEG_MAGIC_N (12 + 3 * FLST_BASE_NODE_SIZE)
- /* magic number used in debugging */
-#define FSEG_FRAG_ARR (16 + 3 * FLST_BASE_NODE_SIZE)
- /* array of individual pages
- belonging to this segment in fsp
- fragment extent lists */
-#define FSEG_FRAG_ARR_N_SLOTS (FSP_EXTENT_SIZE / 2)
- /* number of slots in the array for
- the fragment pages */
-#define FSEG_FRAG_SLOT_SIZE 4 /* a fragment page slot contains its
- page number within space, FIL_NULL
- means that the slot is not in use */
-/*-------------------------------------*/
-#define FSEG_INODE_SIZE \
- (16 + 3 * FLST_BASE_NODE_SIZE \
- + FSEG_FRAG_ARR_N_SLOTS * FSEG_FRAG_SLOT_SIZE)
-
-#define FSP_SEG_INODES_PER_PAGE \
- ((UNIV_PAGE_SIZE - FSEG_ARR_OFFSET - 10) / FSEG_INODE_SIZE)
- /* Number of segment inodes which fit on a
- single page */
-
-#define FSEG_MAGIC_N_VALUE 97937874
-
-#define FSEG_FILLFACTOR 8 /* If this value is x, then if
- the number of unused but reserved
- pages in a segment is less than
- reserved pages * 1/x, and there are
- at least FSEG_FRAG_LIMIT used pages,
- then we allow a new empty extent to
- be added to the segment in
- fseg_alloc_free_page. Otherwise, we
- use unused pages of the segment. */
-
-#define FSEG_FRAG_LIMIT FSEG_FRAG_ARR_N_SLOTS
- /* If the segment has >= this many
- used pages, it may be expanded by
- allocating extents to the segment;
- until that only individual fragment
- pages are allocated from the space */
-
-#define FSEG_FREE_LIST_LIMIT 40 /* If the reserved size of a segment
- is at least this many extents, we
- allow extents to be put to the free
- list of the extent: at most
- FSEG_FREE_LIST_MAX_LEN many */
-#define FSEG_FREE_LIST_MAX_LEN 4
-
-
-/* EXTENT DESCRIPTOR
- =================
-
-File extent descriptor data structure: contains bits to tell which pages in
-the extent are free and which contain old tuple version to clean. */
-
-/*-------------------------------------*/
-#define XDES_ID 0 /* The identifier of the segment
- to which this extent belongs */
-#define XDES_FLST_NODE 8 /* The list node data structure
- for the descriptors */
-#define XDES_STATE (FLST_NODE_SIZE + 8)
- /* contains state information
- of the extent */
-#define XDES_BITMAP (FLST_NODE_SIZE + 12)
- /* Descriptor bitmap of the pages
- in the extent */
-/*-------------------------------------*/
-
-#define XDES_BITS_PER_PAGE 2 /* How many bits are there per page */
-#define XDES_FREE_BIT 0 /* Index of the bit which tells if
- the page is free */
-#define XDES_CLEAN_BIT 1 /* NOTE: currently not used!
- Index of the bit which tells if
- there are old versions of tuples
- on the page */
-/* States of a descriptor */
-#define XDES_FREE 1 /* extent is in free list of space */
-#define XDES_FREE_FRAG 2 /* extent is in free fragment list of
- space */
-#define XDES_FULL_FRAG 3 /* extent is in full fragment list of
- space */
-#define XDES_FSEG 4 /* extent belongs to a segment */
-
-/* File extent data structure size in bytes. */
-#define XDES_SIZE \
- (XDES_BITMAP + UT_BITS_IN_BYTES(FSP_EXTENT_SIZE * XDES_BITS_PER_PAGE))
-
-/* Offset of the descriptor array on a descriptor page */
-#define XDES_ARR_OFFSET (FSP_HEADER_OFFSET + FSP_HEADER_SIZE)
-
-/**************************************************************************
-Returns an extent to the free list of a space. */
-static
-void
-fsp_free_extent(
-/*============*/
- ulint space, /* in: space id */
- ulint page, /* in: page offset in the extent */
- mtr_t* mtr); /* in: mtr */
-/**************************************************************************
-Frees an extent of a segment to the space free list. */
-static
-void
-fseg_free_extent(
-/*=============*/
- fseg_inode_t* seg_inode, /* in: segment inode */
- ulint space, /* in: space id */
- ulint page, /* in: page offset in the extent */
- mtr_t* mtr); /* in: mtr handle */
-/**************************************************************************
-Calculates the number of pages reserved by a segment, and how
-many pages are currently used. */
-static
-ulint
-fseg_n_reserved_pages_low(
-/*======================*/
- /* out: number of reserved pages */
- fseg_inode_t* header, /* in: segment inode */
- ulint* used, /* out: number of pages used (<= reserved) */
- mtr_t* mtr); /* in: mtr handle */
-/************************************************************************
-Marks a page used. The page must reside within the extents of the given
-segment. */
-static
-void
-fseg_mark_page_used(
-/*================*/
- fseg_inode_t* seg_inode,/* in: segment inode */
- ulint space, /* in: space id */
- ulint page, /* in: page offset */
- mtr_t* mtr); /* in: mtr */
-/**************************************************************************
-Returns the first extent descriptor for a segment. We think of the extent
-lists of the segment catenated in the order FSEG_FULL -> FSEG_NOT_FULL
--> FSEG_FREE. */
-static
-xdes_t*
-fseg_get_first_extent(
-/*==================*/
- /* out: the first extent descriptor, or NULL if
- none */
- fseg_inode_t* inode, /* in: segment inode */
- mtr_t* mtr); /* in: mtr */
-/**************************************************************************
-Puts new extents to the free list if
-there are free extents above the free limit. If an extent happens
-to contain an extent descriptor page, the extent is put to
-the FSP_FREE_FRAG list with the page marked as used. */
-static
-void
-fsp_fill_free_list(
-/*===============*/
- ibool init_space, /* in: TRUE if this is a single-table
- tablespace and we are only initing
- the tablespace's first extent
- descriptor page and ibuf bitmap page;
- then we do not allocate more extents */
- ulint space, /* in: space */
- fsp_header_t* header, /* in: space header */
- mtr_t* mtr); /* in: mtr */
-/**************************************************************************
-Allocates a single free page from a segment. This function implements
-the intelligent allocation strategy which tries to minimize file space
-fragmentation. */
-static
-ulint
-fseg_alloc_free_page_low(
-/*=====================*/
- /* out: the allocated page number, FIL_NULL
- if no page could be allocated */
- ulint space, /* in: space */
- fseg_inode_t* seg_inode, /* in: segment inode */
- ulint hint, /* in: hint of which page would be desirable */
- byte direction, /* in: if the new page is needed because
- of an index page split, and records are
- inserted there in order, into which
- direction they go alphabetically: FSP_DOWN,
- FSP_UP, FSP_NO_DIR */
- mtr_t* mtr); /* in: mtr handle */
-
-
-/**************************************************************************
-Reads the file space size stored in the header page. */
-
-ulint
-fsp_get_size_low(
-/*=============*/
- /* out: tablespace size stored in the space header */
- page_t* page) /* in: header page (page 0 in the tablespace) */
-{
- return(mach_read_from_4(page + FSP_HEADER_OFFSET + FSP_SIZE));
-}
-
-/**************************************************************************
-Gets a pointer to the space header and x-locks its page. */
-UNIV_INLINE
-fsp_header_t*
-fsp_get_space_header(
-/*=================*/
- /* out: pointer to the space header, page x-locked */
- ulint id, /* in: space id */
- mtr_t* mtr) /* in: mtr */
-{
- fsp_header_t* header;
-
- ut_ad(mtr);
-
- header = FSP_HEADER_OFFSET + buf_page_get(id, 0, RW_X_LATCH, mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(header, SYNC_FSP_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
- return(header);
-}
-
-/**************************************************************************
-Gets a descriptor bit of a page. */
-UNIV_INLINE
-ibool
-xdes_get_bit(
-/*=========*/
- /* out: TRUE if free */
- xdes_t* descr, /* in: descriptor */
- ulint bit, /* in: XDES_FREE_BIT or XDES_CLEAN_BIT */
- ulint offset, /* in: page offset within extent:
- 0 ... FSP_EXTENT_SIZE - 1 */
- mtr_t* mtr) /* in: mtr */
-{
- ulint index;
- ulint byte_index;
- ulint bit_index;
-
- ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
- MTR_MEMO_PAGE_X_FIX));
- ut_ad((bit == XDES_FREE_BIT) || (bit == XDES_CLEAN_BIT));
- ut_ad(offset < FSP_EXTENT_SIZE);
-
- index = bit + XDES_BITS_PER_PAGE * offset;
-
- byte_index = index / 8;
- bit_index = index % 8;
-
- return(ut_bit_get_nth(mtr_read_ulint(descr + XDES_BITMAP + byte_index,
- MLOG_1BYTE, mtr),
- bit_index));
-}
-
-/**************************************************************************
-Sets a descriptor bit of a page. */
-UNIV_INLINE
-void
-xdes_set_bit(
-/*=========*/
- xdes_t* descr, /* in: descriptor */
- ulint bit, /* in: XDES_FREE_BIT or XDES_CLEAN_BIT */
- ulint offset, /* in: page offset within extent:
- 0 ... FSP_EXTENT_SIZE - 1 */
- ibool val, /* in: bit value */
- mtr_t* mtr) /* in: mtr */
-{
- ulint index;
- ulint byte_index;
- ulint bit_index;
- ulint descr_byte;
-
- ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
- MTR_MEMO_PAGE_X_FIX));
- ut_ad((bit == XDES_FREE_BIT) || (bit == XDES_CLEAN_BIT));
- ut_ad(offset < FSP_EXTENT_SIZE);
-
- index = bit + XDES_BITS_PER_PAGE * offset;
-
- byte_index = index / 8;
- bit_index = index % 8;
-
- descr_byte = mtr_read_ulint(descr + XDES_BITMAP + byte_index,
- MLOG_1BYTE, mtr);
- descr_byte = ut_bit_set_nth(descr_byte, bit_index, val);
-
- mlog_write_ulint(descr + XDES_BITMAP + byte_index, descr_byte,
- MLOG_1BYTE, mtr);
-}
-
-/**************************************************************************
-Looks for a descriptor bit having the desired value. Starts from hint
-and scans upward; at the end of the extent the search is wrapped to
-the start of the extent. */
-UNIV_INLINE
-ulint
-xdes_find_bit(
-/*==========*/
- /* out: bit index of the bit, ULINT_UNDEFINED if not
- found */
- xdes_t* descr, /* in: descriptor */
- ulint bit, /* in: XDES_FREE_BIT or XDES_CLEAN_BIT */
- ibool val, /* in: desired bit value */
- ulint hint, /* in: hint of which bit position would be desirable */
- mtr_t* mtr) /* in: mtr */
-{
- ulint i;
-
- ut_ad(descr && mtr);
- ut_ad(val <= TRUE);
- ut_ad(hint < FSP_EXTENT_SIZE);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
- MTR_MEMO_PAGE_X_FIX));
- for (i = hint; i < FSP_EXTENT_SIZE; i++) {
- if (val == xdes_get_bit(descr, bit, i, mtr)) {
-
- return(i);
- }
- }
-
- for (i = 0; i < hint; i++) {
- if (val == xdes_get_bit(descr, bit, i, mtr)) {
-
- return(i);
- }
- }
-
- return(ULINT_UNDEFINED);
-}
-
-/**************************************************************************
-Looks for a descriptor bit having the desired value. Scans the extent in
-a direction opposite to xdes_find_bit. */
-UNIV_INLINE
-ulint
-xdes_find_bit_downward(
-/*===================*/
- /* out: bit index of the bit, ULINT_UNDEFINED if not
- found */
- xdes_t* descr, /* in: descriptor */
- ulint bit, /* in: XDES_FREE_BIT or XDES_CLEAN_BIT */
- ibool val, /* in: desired bit value */
- ulint hint, /* in: hint of which bit position would be desirable */
- mtr_t* mtr) /* in: mtr */
-{
- ulint i;
-
- ut_ad(descr && mtr);
- ut_ad(val <= TRUE);
- ut_ad(hint < FSP_EXTENT_SIZE);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
- MTR_MEMO_PAGE_X_FIX));
- for (i = hint + 1; i > 0; i--) {
- if (val == xdes_get_bit(descr, bit, i - 1, mtr)) {
-
- return(i - 1);
- }
- }
-
- for (i = FSP_EXTENT_SIZE - 1; i > hint; i--) {
- if (val == xdes_get_bit(descr, bit, i, mtr)) {
-
- return(i);
- }
- }
-
- return(ULINT_UNDEFINED);
-}
-
-/**************************************************************************
-Returns the number of used pages in a descriptor. */
-UNIV_INLINE
-ulint
-xdes_get_n_used(
-/*============*/
- /* out: number of pages used */
- xdes_t* descr, /* in: descriptor */
- mtr_t* mtr) /* in: mtr */
-{
- ulint i;
- ulint count = 0;
-
- ut_ad(descr && mtr);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
- MTR_MEMO_PAGE_X_FIX));
- for (i = 0; i < FSP_EXTENT_SIZE; i++) {
- if (FALSE == xdes_get_bit(descr, XDES_FREE_BIT, i, mtr)) {
- count++;
- }
- }
-
- return(count);
-}
-
-/**************************************************************************
-Returns true if extent contains no used pages. */
-UNIV_INLINE
-ibool
-xdes_is_free(
-/*=========*/
- /* out: TRUE if totally free */
- xdes_t* descr, /* in: descriptor */
- mtr_t* mtr) /* in: mtr */
-{
- if (0 == xdes_get_n_used(descr, mtr)) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/**************************************************************************
-Returns true if extent contains no free pages. */
-UNIV_INLINE
-ibool
-xdes_is_full(
-/*=========*/
- /* out: TRUE if full */
- xdes_t* descr, /* in: descriptor */
- mtr_t* mtr) /* in: mtr */
-{
- if (FSP_EXTENT_SIZE == xdes_get_n_used(descr, mtr)) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/**************************************************************************
-Sets the state of an xdes. */
-UNIV_INLINE
-void
-xdes_set_state(
-/*===========*/
- xdes_t* descr, /* in: descriptor */
- ulint state, /* in: state to set */
- mtr_t* mtr) /* in: mtr handle */
-{
- ut_ad(descr && mtr);
- ut_ad(state >= XDES_FREE);
- ut_ad(state <= XDES_FSEG);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
- MTR_MEMO_PAGE_X_FIX));
-
- mlog_write_ulint(descr + XDES_STATE, state, MLOG_4BYTES, mtr);
-}
-
-/**************************************************************************
-Gets the state of an xdes. */
-UNIV_INLINE
-ulint
-xdes_get_state(
-/*===========*/
- /* out: state */
- xdes_t* descr, /* in: descriptor */
- mtr_t* mtr) /* in: mtr handle */
-{
- ut_ad(descr && mtr);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
- MTR_MEMO_PAGE_X_FIX));
-
- return(mtr_read_ulint(descr + XDES_STATE, MLOG_4BYTES, mtr));
-}
-
-/**************************************************************************
-Inits an extent descriptor to the free and clean state. */
-UNIV_INLINE
-void
-xdes_init(
-/*======*/
- xdes_t* descr, /* in: descriptor */
- mtr_t* mtr) /* in: mtr */
-{
- ulint i;
-
- ut_ad(descr && mtr);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(descr),
- MTR_MEMO_PAGE_X_FIX));
- ut_ad((XDES_SIZE - XDES_BITMAP) % 4 == 0);
-
- for (i = XDES_BITMAP; i < XDES_SIZE; i += 4) {
- mlog_write_ulint(descr + i, 0xFFFFFFFFUL, MLOG_4BYTES, mtr);
- }
-
- xdes_set_state(descr, XDES_FREE, mtr);
-}
-
-/************************************************************************
-Calculates the page where the descriptor of a page resides. */
-UNIV_INLINE
-ulint
-xdes_calc_descriptor_page(
-/*======================*/
- /* out: descriptor page offset */
- ulint offset) /* in: page offset */
-{
-#if UNIV_PAGE_SIZE <= XDES_ARR_OFFSET \
- + (XDES_DESCRIBED_PER_PAGE / FSP_EXTENT_SIZE) * XDES_SIZE
-# error
-#endif
-
- return(ut_2pow_round(offset, XDES_DESCRIBED_PER_PAGE));
-}
-
-/************************************************************************
-Calculates the descriptor index within a descriptor page. */
-UNIV_INLINE
-ulint
-xdes_calc_descriptor_index(
-/*=======================*/
- /* out: descriptor index */
- ulint offset) /* in: page offset */
-{
- return(ut_2pow_remainder(offset, XDES_DESCRIBED_PER_PAGE)
- / FSP_EXTENT_SIZE);
-}
-
-/************************************************************************
-Gets pointer to a the extent descriptor of a page. The page where the extent
-descriptor resides is x-locked. If the page offset is equal to the free limit
-of the space, adds new extents from above the free limit to the space free
-list, if not free limit == space size. This adding is necessary to make the
-descriptor defined, as they are uninitialized above the free limit. */
-UNIV_INLINE
-xdes_t*
-xdes_get_descriptor_with_space_hdr(
-/*===============================*/
- /* out: pointer to the extent descriptor,
- NULL if the page does not exist in the
- space or if offset > free limit */
- fsp_header_t* sp_header,/* in: space header, x-latched */
- ulint space, /* in: space id */
- ulint offset, /* in: page offset;
- if equal to the free limit,
- we try to add new extents to
- the space free list */
- mtr_t* mtr) /* in: mtr handle */
-{
- ulint limit;
- ulint size;
- ulint descr_page_no;
- page_t* descr_page;
-
- ut_ad(mtr);
- ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space),
- MTR_MEMO_X_LOCK));
- /* Read free limit and space size */
- limit = mtr_read_ulint(sp_header + FSP_FREE_LIMIT, MLOG_4BYTES, mtr);
- size = mtr_read_ulint(sp_header + FSP_SIZE, MLOG_4BYTES, mtr);
-
- /* If offset is >= size or > limit, return NULL */
-
- if ((offset >= size) || (offset > limit)) {
-
- return(NULL);
- }
-
- /* If offset is == limit, fill free list of the space. */
-
- if (offset == limit) {
- fsp_fill_free_list(FALSE, space, sp_header, mtr);
- }
-
- descr_page_no = xdes_calc_descriptor_page(offset);
-
- if (descr_page_no == 0) {
- /* It is on the space header page */
-
- descr_page = buf_frame_align(sp_header);
- } else {
- descr_page = buf_page_get(space, descr_page_no, RW_X_LATCH,
- mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(descr_page, SYNC_FSP_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
- }
-
- return(descr_page + XDES_ARR_OFFSET
- + XDES_SIZE * xdes_calc_descriptor_index(offset));
-}
-
-/************************************************************************
-Gets pointer to a the extent descriptor of a page. The page where the
-extent descriptor resides is x-locked. If the page offset is equal to
-the free limit of the space, adds new extents from above the free limit
-to the space free list, if not free limit == space size. This adding
-is necessary to make the descriptor defined, as they are uninitialized
-above the free limit. */
-static
-xdes_t*
-xdes_get_descriptor(
-/*================*/
- /* out: pointer to the extent descriptor, NULL if the
- page does not exist in the space or if offset > free
- limit */
- ulint space, /* in: space id */
- ulint offset, /* in: page offset; if equal to the free limit,
- we try to add new extents to the space free list */
- mtr_t* mtr) /* in: mtr handle */
-{
- fsp_header_t* sp_header;
-
- sp_header = FSP_HEADER_OFFSET
- + buf_page_get(space, 0, RW_X_LATCH, mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(sp_header, SYNC_FSP_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
- return(xdes_get_descriptor_with_space_hdr(sp_header, space, offset,
- mtr));
-}
-
-/************************************************************************
-Gets pointer to a the extent descriptor if the file address
-of the descriptor list node is known. The page where the
-extent descriptor resides is x-locked. */
-UNIV_INLINE
-xdes_t*
-xdes_lst_get_descriptor(
-/*====================*/
- /* out: pointer to the extent descriptor */
- ulint space, /* in: space id */
- fil_addr_t lst_node,/* in: file address of the list node
- contained in the descriptor */
- mtr_t* mtr) /* in: mtr handle */
-{
- xdes_t* descr;
-
- ut_ad(mtr);
- ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space),
- MTR_MEMO_X_LOCK));
- descr = fut_get_ptr(space, lst_node, RW_X_LATCH, mtr) - XDES_FLST_NODE;
-
- return(descr);
-}
-
-/************************************************************************
-Gets pointer to the next descriptor in a descriptor list and x-locks its
-page. */
-UNIV_INLINE
-xdes_t*
-xdes_lst_get_next(
-/*==============*/
- xdes_t* descr, /* in: pointer to a descriptor */
- mtr_t* mtr) /* in: mtr handle */
-{
- ulint space;
-
- ut_ad(mtr && descr);
-
- space = buf_frame_get_space_id(descr);
-
- return(xdes_lst_get_descriptor(
- space,
- flst_get_next_addr(descr + XDES_FLST_NODE, mtr), mtr));
-}
-
-/************************************************************************
-Returns page offset of the first page in extent described by a descriptor. */
-UNIV_INLINE
-ulint
-xdes_get_offset(
-/*============*/
- /* out: offset of the first page in extent */
- xdes_t* descr) /* in: extent descriptor */
-{
- ut_ad(descr);
-
- return(buf_frame_get_page_no(descr)
- + ((descr - buf_frame_align(descr) - XDES_ARR_OFFSET)
- / XDES_SIZE)
- * FSP_EXTENT_SIZE);
-}
-
-/***************************************************************
-Inits a file page whose prior contents should be ignored. */
-static
-void
-fsp_init_file_page_low(
-/*===================*/
- byte* ptr) /* in: pointer to a page */
-{
- page_t* page;
- page = buf_frame_align(ptr);
-
- buf_block_align(page)->check_index_page_at_flush = FALSE;
-
-#ifdef UNIV_BASIC_LOG_DEBUG
- memset(page, 0xff, UNIV_PAGE_SIZE);
-#endif
- mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
- ut_dulint_zero);
- mach_write_to_8(page + FIL_PAGE_LSN, ut_dulint_zero);
-}
-
-/***************************************************************
-Inits a file page whose prior contents should be ignored. */
-static
-void
-fsp_init_file_page(
-/*===============*/
- page_t* page, /* in: page */
- mtr_t* mtr) /* in: mtr */
-{
- fsp_init_file_page_low(page);
-
- mlog_write_initial_log_record(page, MLOG_INIT_FILE_PAGE, mtr);
-}
-
-/***************************************************************
-Parses a redo log record of a file page init. */
-
-byte*
-fsp_parse_init_file_page(
-/*=====================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr __attribute__((unused)), /* in: buffer end */
- page_t* page) /* in: page or NULL */
-{
- ut_ad(ptr && end_ptr);
-
- if (page) {
- fsp_init_file_page_low(page);
- }
-
- return(ptr);
-}
-
-/**************************************************************************
-Initializes the fsp system. */
-
-void
-fsp_init(void)
-/*==========*/
-{
- /* Does nothing at the moment */
-}
-
-/**************************************************************************
-Writes the space id to a tablespace header. This function is used past the
-buffer pool when we in fil0fil.c create a new single-table tablespace. */
-
-void
-fsp_header_write_space_id(
-/*======================*/
- page_t* page, /* in: first page in the space */
- ulint space_id) /* in: space id */
-{
- mach_write_to_4(page + FSP_HEADER_OFFSET + FSP_SPACE_ID, space_id);
-}
-
-/**************************************************************************
-Initializes the space header of a new created space and creates also the
-insert buffer tree root if space == 0. */
-
-void
-fsp_header_init(
-/*============*/
- ulint space, /* in: space id */
- ulint size, /* in: current size in blocks */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- fsp_header_t* header;
- page_t* page;
-
- ut_ad(mtr);
-
- mtr_x_lock(fil_space_get_latch(space), mtr);
-
- page = buf_page_create(space, 0, mtr);
- buf_page_get(space, 0, RW_X_LATCH, mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_FSP_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
-
- /* The prior contents of the file page should be ignored */
-
- fsp_init_file_page(page, mtr);
-
- mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_FSP_HDR,
- MLOG_2BYTES, mtr);
-
- header = FSP_HEADER_OFFSET + page;
-
- mlog_write_ulint(header + FSP_SPACE_ID, space, MLOG_4BYTES, mtr);
- mlog_write_ulint(header + FSP_NOT_USED, 0, MLOG_4BYTES, mtr);
-
- mlog_write_ulint(header + FSP_SIZE, size, MLOG_4BYTES, mtr);
- mlog_write_ulint(header + FSP_FREE_LIMIT, 0, MLOG_4BYTES, mtr);
- mlog_write_ulint(header + FSP_LOWEST_NO_WRITE, 0, MLOG_4BYTES, mtr);
- mlog_write_ulint(header + FSP_FRAG_N_USED, 0, MLOG_4BYTES, mtr);
-
- flst_init(header + FSP_FREE, mtr);
- flst_init(header + FSP_FREE_FRAG, mtr);
- flst_init(header + FSP_FULL_FRAG, mtr);
- flst_init(header + FSP_SEG_INODES_FULL, mtr);
- flst_init(header + FSP_SEG_INODES_FREE, mtr);
-
- mlog_write_dulint(header + FSP_SEG_ID, ut_dulint_create(0, 1), mtr);
- if (space == 0) {
- fsp_fill_free_list(FALSE, space, header, mtr);
- btr_create(DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF, space,
- ut_dulint_add(DICT_IBUF_ID_MIN, space), FALSE, mtr);
- } else {
- fsp_fill_free_list(TRUE, space, header, mtr);
- }
-}
-
-/**************************************************************************
-Reads the space id from the first page of a tablespace. */
-
-ulint
-fsp_header_get_space_id(
-/*====================*/
- /* out: space id, ULINT UNDEFINED if error */
- page_t* page) /* in: first page of a tablespace */
-{
- ulint fsp_id;
- ulint id;
-
- fsp_id = mach_read_from_4(FSP_HEADER_OFFSET + page + FSP_SPACE_ID);
-
- id = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
-
- if (id != fsp_id) {
- fprintf(stderr,
- "InnoDB: Error: space id in fsp header %lu,"
- " but in the page header %lu\n",
- (ulong) fsp_id, (ulong) id);
-
- return(ULINT_UNDEFINED);
- }
-
- return(id);
-}
-
-/**************************************************************************
-Increases the space size field of a space. */
-
-void
-fsp_header_inc_size(
-/*================*/
- ulint space, /* in: space id */
- ulint size_inc,/* in: size increment in pages */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- fsp_header_t* header;
- ulint size;
-
- ut_ad(mtr);
-
- mtr_x_lock(fil_space_get_latch(space), mtr);
-
- header = fsp_get_space_header(space, mtr);
-
- size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
-
- mlog_write_ulint(header + FSP_SIZE, size + size_inc, MLOG_4BYTES,
- mtr);
-}
-
-/**************************************************************************
-Gets the current free limit of a tablespace. The free limit means the
-place of the first page which has never been put to the the free list
-for allocation. The space above that address is initialized to zero.
-Sets also the global variable log_fsp_current_free_limit. */
-
-ulint
-fsp_header_get_free_limit(
-/*======================*/
- /* out: free limit in megabytes */
- ulint space) /* in: space id, must be 0 */
-{
- fsp_header_t* header;
- ulint limit;
- mtr_t mtr;
-
- ut_a(space == 0); /* We have only one log_fsp_current_... variable */
-
- mtr_start(&mtr);
-
- mtr_x_lock(fil_space_get_latch(space), &mtr);
-
- header = fsp_get_space_header(space, &mtr);
-
- limit = mtr_read_ulint(header + FSP_FREE_LIMIT, MLOG_4BYTES, &mtr);
-
- limit = limit / ((1024 * 1024) / UNIV_PAGE_SIZE);
-
- log_fsp_current_free_limit_set_and_checkpoint(limit);
-
- mtr_commit(&mtr);
-
- return(limit);
-}
-
-/**************************************************************************
-Gets the size of the tablespace from the tablespace header. If we do not
-have an auto-extending data file, this should be equal to the size of the
-data files. If there is an auto-extending data file, this can be smaller. */
-
-ulint
-fsp_header_get_tablespace_size(
-/*===========================*/
- /* out: size in pages */
- ulint space) /* in: space id, must be 0 */
-{
- fsp_header_t* header;
- ulint size;
- mtr_t mtr;
-
- ut_a(space == 0); /* We have only one log_fsp_current_... variable */
-
- mtr_start(&mtr);
-
- mtr_x_lock(fil_space_get_latch(space), &mtr);
-
- header = fsp_get_space_header(space, &mtr);
-
- size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, &mtr);
-
- mtr_commit(&mtr);
-
- return(size);
-}
-
-/***************************************************************************
-Tries to extend a single-table tablespace so that a page would fit in the
-data file. */
-static
-ibool
-fsp_try_extend_data_file_with_pages(
-/*================================*/
- /* out: TRUE if success */
- ulint space, /* in: space */
- ulint page_no, /* in: page number */
- fsp_header_t* header, /* in: space header */
- mtr_t* mtr) /* in: mtr */
-{
- ibool success;
- ulint actual_size;
- ulint size;
-
- ut_a(space != 0);
-
- size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
-
- ut_a(page_no >= size);
-
- success = fil_extend_space_to_desired_size(&actual_size, space,
- page_no + 1);
- /* actual_size now has the space size in pages; it may be less than
- we wanted if we ran out of disk space */
-
- mlog_write_ulint(header + FSP_SIZE, actual_size, MLOG_4BYTES, mtr);
-
- return(success);
-}
-
-/***************************************************************************
-Tries to extend the last data file of a tablespace if it is auto-extending. */
-static
-ibool
-fsp_try_extend_data_file(
-/*=====================*/
- /* out: FALSE if not auto-extending */
- ulint* actual_increase,/* out: actual increase in pages, where
- we measure the tablespace size from
- what the header field says; it may be
- the actual file size rounded down to
- megabyte */
- ulint space, /* in: space */
- fsp_header_t* header, /* in: space header */
- mtr_t* mtr) /* in: mtr */
-{
- ulint size;
- ulint new_size;
- ulint old_size;
- ulint size_increase;
- ulint actual_size;
- ibool success;
-
- *actual_increase = 0;
-
- if (space == 0 && !srv_auto_extend_last_data_file) {
-
- return(FALSE);
- }
-
- size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
-
- old_size = size;
-
- if (space == 0 && srv_last_file_size_max != 0) {
- if (srv_last_file_size_max
- < srv_data_file_sizes[srv_n_data_files - 1]) {
-
- fprintf(stderr,
- "InnoDB: Error: Last data file size is %lu,"
- " max size allowed %lu\n",
- (ulong) srv_data_file_sizes[
- srv_n_data_files - 1],
- (ulong) srv_last_file_size_max);
- }
-
- size_increase = srv_last_file_size_max
- - srv_data_file_sizes[srv_n_data_files - 1];
- if (size_increase > SRV_AUTO_EXTEND_INCREMENT) {
- size_increase = SRV_AUTO_EXTEND_INCREMENT;
- }
- } else {
- if (space == 0) {
- size_increase = SRV_AUTO_EXTEND_INCREMENT;
- } else {
- /* We extend single-table tablespaces first one extent
- at a time, but for bigger tablespaces more. It is not
- enough to extend always by one extent, because some
- extents are frag page extents. */
-
- if (size < FSP_EXTENT_SIZE) {
- /* Let us first extend the file to 64 pages */
- success = fsp_try_extend_data_file_with_pages(
- space, FSP_EXTENT_SIZE - 1,
- header, mtr);
- if (!success) {
- new_size = mtr_read_ulint(
- header + FSP_SIZE,
- MLOG_4BYTES, mtr);
-
- *actual_increase = new_size - old_size;
-
- return(FALSE);
- }
-
- size = FSP_EXTENT_SIZE;
- }
-
- if (size < 32 * FSP_EXTENT_SIZE) {
- size_increase = FSP_EXTENT_SIZE;
- } else {
- /* Below in fsp_fill_free_list() we assume
- that we add at most FSP_FREE_ADD extents at
- a time */
- size_increase = FSP_FREE_ADD * FSP_EXTENT_SIZE;
- }
- }
- }
-
- if (size_increase == 0) {
-
- return(TRUE);
- }
-
- success = fil_extend_space_to_desired_size(&actual_size, space,
- size + size_increase);
- /* We ignore any fragments of a full megabyte when storing the size
- to the space header */
-
- mlog_write_ulint(header + FSP_SIZE,
- ut_calc_align_down(actual_size,
- (1024 * 1024) / UNIV_PAGE_SIZE),
- MLOG_4BYTES, mtr);
- new_size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
-
- *actual_increase = new_size - old_size;
-
- return(TRUE);
-}
-
-/**************************************************************************
-Puts new extents to the free list if there are free extents above the free
-limit. If an extent happens to contain an extent descriptor page, the extent
-is put to the FSP_FREE_FRAG list with the page marked as used. */
-static
-void
-fsp_fill_free_list(
-/*===============*/
- ibool init_space, /* in: TRUE if this is a single-table
- tablespace and we are only initing
- the tablespace's first extent
- descriptor page and ibuf bitmap page;
- then we do not allocate more extents */
- ulint space, /* in: space */
- fsp_header_t* header, /* in: space header */
- mtr_t* mtr) /* in: mtr */
-{
- ulint limit;
- ulint size;
- xdes_t* descr;
- ulint count = 0;
- ulint frag_n_used;
- page_t* descr_page;
- page_t* ibuf_page;
- ulint actual_increase;
- ulint i;
- mtr_t ibuf_mtr;
-
- ut_ad(header && mtr);
-
- /* Check if we can fill free list from above the free list limit */
- size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
- limit = mtr_read_ulint(header + FSP_FREE_LIMIT, MLOG_4BYTES, mtr);
-
- if (space == 0 && srv_auto_extend_last_data_file
- && size < limit + FSP_EXTENT_SIZE * FSP_FREE_ADD) {
-
- /* Try to increase the last data file size */
- fsp_try_extend_data_file(&actual_increase, space, header, mtr);
- size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
- }
-
- if (space != 0 && !init_space
- && size < limit + FSP_EXTENT_SIZE * FSP_FREE_ADD) {
-
- /* Try to increase the .ibd file size */
- fsp_try_extend_data_file(&actual_increase, space, header, mtr);
- size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
- }
-
- i = limit;
-
- while ((init_space && i < 1)
- || ((i + FSP_EXTENT_SIZE <= size) && (count < FSP_FREE_ADD))) {
-
- mlog_write_ulint(header + FSP_FREE_LIMIT, i + FSP_EXTENT_SIZE,
- MLOG_4BYTES, mtr);
-
- /* Update the free limit info in the log system and make
- a checkpoint */
- if (space == 0) {
- log_fsp_current_free_limit_set_and_checkpoint(
- (i + FSP_EXTENT_SIZE)
- / ((1024 * 1024) / UNIV_PAGE_SIZE));
- }
-
- if (0 == i % XDES_DESCRIBED_PER_PAGE) {
-
- /* We are going to initialize a new descriptor page
- and a new ibuf bitmap page: the prior contents of the
- pages should be ignored. */
-
- if (i > 0) {
- descr_page = buf_page_create(space, i, mtr);
- buf_page_get(space, i, RW_X_LATCH, mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(descr_page,
- SYNC_FSP_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
- fsp_init_file_page(descr_page, mtr);
- mlog_write_ulint(descr_page + FIL_PAGE_TYPE,
- FIL_PAGE_TYPE_XDES,
- MLOG_2BYTES, mtr);
- }
-
- /* Initialize the ibuf bitmap page in a separate
- mini-transaction because it is low in the latching
- order, and we must be able to release its latch
- before returning from the fsp routine */
-
- mtr_start(&ibuf_mtr);
-
- ibuf_page = buf_page_create(space,
- i + FSP_IBUF_BITMAP_OFFSET,
- &ibuf_mtr);
- buf_page_get(space, i + FSP_IBUF_BITMAP_OFFSET,
- RW_X_LATCH, &ibuf_mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(ibuf_page, SYNC_FSP_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
- fsp_init_file_page(ibuf_page, &ibuf_mtr);
-
- ibuf_bitmap_page_init(ibuf_page, &ibuf_mtr);
-
- mtr_commit(&ibuf_mtr);
- }
-
- descr = xdes_get_descriptor_with_space_hdr(header, space, i,
- mtr);
- xdes_init(descr, mtr);
-
-#if XDES_DESCRIBED_PER_PAGE % FSP_EXTENT_SIZE
-# error "XDES_DESCRIBED_PER_PAGE % FSP_EXTENT_SIZE != 0"
-#endif
-
- if (0 == i % XDES_DESCRIBED_PER_PAGE) {
-
- /* The first page in the extent is a descriptor page
- and the second is an ibuf bitmap page: mark them
- used */
-
- xdes_set_bit(descr, XDES_FREE_BIT, 0, FALSE, mtr);
- xdes_set_bit(descr, XDES_FREE_BIT,
- FSP_IBUF_BITMAP_OFFSET, FALSE, mtr);
- xdes_set_state(descr, XDES_FREE_FRAG, mtr);
-
- flst_add_last(header + FSP_FREE_FRAG,
- descr + XDES_FLST_NODE, mtr);
- frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED,
- MLOG_4BYTES, mtr);
- mlog_write_ulint(header + FSP_FRAG_N_USED,
- frag_n_used + 2, MLOG_4BYTES, mtr);
- } else {
- flst_add_last(header + FSP_FREE,
- descr + XDES_FLST_NODE, mtr);
- count++;
- }
-
- i += FSP_EXTENT_SIZE;
- }
-}
-
-/**************************************************************************
-Allocates a new free extent. */
-static
-xdes_t*
-fsp_alloc_free_extent(
-/*==================*/
- /* out: extent descriptor, NULL if cannot be
- allocated */
- ulint space, /* in: space id */
- ulint hint, /* in: hint of which extent would be desirable: any
- page offset in the extent goes; the hint must not
- be > FSP_FREE_LIMIT */
- mtr_t* mtr) /* in: mtr */
-{
- fsp_header_t* header;
- fil_addr_t first;
- xdes_t* descr;
-
- ut_ad(mtr);
-
- header = fsp_get_space_header(space, mtr);
-
- descr = xdes_get_descriptor_with_space_hdr(header, space, hint, mtr);
-
- if (descr && (xdes_get_state(descr, mtr) == XDES_FREE)) {
- /* Ok, we can take this extent */
- } else {
- /* Take the first extent in the free list */
- first = flst_get_first(header + FSP_FREE, mtr);
-
- if (fil_addr_is_null(first)) {
- fsp_fill_free_list(FALSE, space, header, mtr);
-
- first = flst_get_first(header + FSP_FREE, mtr);
- }
-
- if (fil_addr_is_null(first)) {
-
- return(NULL); /* No free extents left */
- }
-
- descr = xdes_lst_get_descriptor(space, first, mtr);
- }
-
- flst_remove(header + FSP_FREE, descr + XDES_FLST_NODE, mtr);
-
- return(descr);
-}
-
-/**************************************************************************
-Allocates a single free page from a space. The page is marked as used. */
-static
-ulint
-fsp_alloc_free_page(
-/*================*/
- /* out: the page offset, FIL_NULL if no page could
- be allocated */
- ulint space, /* in: space id */
- ulint hint, /* in: hint of which page would be desirable */
- mtr_t* mtr) /* in: mtr handle */
-{
- fsp_header_t* header;
- fil_addr_t first;
- xdes_t* descr;
- page_t* page;
- ulint free;
- ulint frag_n_used;
- ulint page_no;
- ulint space_size;
- ibool success;
-
- ut_ad(mtr);
-
- header = fsp_get_space_header(space, mtr);
-
- /* Get the hinted descriptor */
- descr = xdes_get_descriptor_with_space_hdr(header, space, hint, mtr);
-
- if (descr && (xdes_get_state(descr, mtr) == XDES_FREE_FRAG)) {
- /* Ok, we can take this extent */
- } else {
- /* Else take the first extent in free_frag list */
- first = flst_get_first(header + FSP_FREE_FRAG, mtr);
-
- if (fil_addr_is_null(first)) {
- /* There are no partially full fragments: allocate
- a free extent and add it to the FREE_FRAG list. NOTE
- that the allocation may have as a side-effect that an
- extent containing a descriptor page is added to the
- FREE_FRAG list. But we will allocate our page from the
- the free extent anyway. */
-
- descr = fsp_alloc_free_extent(space, hint, mtr);
-
- if (descr == NULL) {
- /* No free space left */
-
- return(FIL_NULL);
- }
-
- xdes_set_state(descr, XDES_FREE_FRAG, mtr);
- flst_add_last(header + FSP_FREE_FRAG,
- descr + XDES_FLST_NODE, mtr);
- } else {
- descr = xdes_lst_get_descriptor(space, first, mtr);
- }
-
- /* Reset the hint */
- hint = 0;
- }
-
- /* Now we have in descr an extent with at least one free page. Look
- for a free page in the extent. */
-
- free = xdes_find_bit(descr, XDES_FREE_BIT, TRUE,
- hint % FSP_EXTENT_SIZE, mtr);
- if (free == ULINT_UNDEFINED) {
-
- ut_print_buf(stderr, ((byte*)descr) - 500, 1000);
-
- ut_error;
- }
-
- page_no = xdes_get_offset(descr) + free;
-
- space_size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
-
- if (space_size <= page_no) {
- /* It must be that we are extending a single-table tablespace
- whose size is still < 64 pages */
-
- ut_a(space != 0);
- if (page_no >= FSP_EXTENT_SIZE) {
- fprintf(stderr,
- "InnoDB: Error: trying to extend a"
- " single-table tablespace %lu\n"
- "InnoDB: by single page(s) though the"
- " space size %lu. Page no %lu.\n",
- (ulong) space, (ulong) space_size,
- (ulong) page_no);
- return(FIL_NULL);
- }
- success = fsp_try_extend_data_file_with_pages(space, page_no,
- header, mtr);
- if (!success) {
- /* No disk space left */
- return(FIL_NULL);
- }
- }
-
- xdes_set_bit(descr, XDES_FREE_BIT, free, FALSE, mtr);
-
- /* Update the FRAG_N_USED field */
- frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES,
- mtr);
- frag_n_used++;
- mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used, MLOG_4BYTES,
- mtr);
- if (xdes_is_full(descr, mtr)) {
- /* The fragment is full: move it to another list */
- flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE,
- mtr);
- xdes_set_state(descr, XDES_FULL_FRAG, mtr);
-
- flst_add_last(header + FSP_FULL_FRAG, descr + XDES_FLST_NODE,
- mtr);
- mlog_write_ulint(header + FSP_FRAG_N_USED,
- frag_n_used - FSP_EXTENT_SIZE, MLOG_4BYTES,
- mtr);
- }
-
- /* Initialize the allocated page to the buffer pool, so that it can
- be obtained immediately with buf_page_get without need for a disk
- read. */
-
- buf_page_create(space, page_no, mtr);
-
- page = buf_page_get(space, page_no, RW_X_LATCH, mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_FSP_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
-
- /* Prior contents of the page should be ignored */
- fsp_init_file_page(page, mtr);
-
- return(page_no);
-}
-
-/**************************************************************************
-Frees a single page of a space. The page is marked as free and clean. */
-static
-void
-fsp_free_page(
-/*==========*/
- ulint space, /* in: space id */
- ulint page, /* in: page offset */
- mtr_t* mtr) /* in: mtr handle */
-{
- fsp_header_t* header;
- xdes_t* descr;
- ulint state;
- ulint frag_n_used;
-
- ut_ad(mtr);
-
- /* fprintf(stderr, "Freeing page %lu in space %lu\n", page, space); */
-
- header = fsp_get_space_header(space, mtr);
-
- descr = xdes_get_descriptor_with_space_hdr(header, space, page, mtr);
-
- state = xdes_get_state(descr, mtr);
-
- if (state != XDES_FREE_FRAG && state != XDES_FULL_FRAG) {
- fprintf(stderr,
- "InnoDB: Error: File space extent descriptor"
- " of page %lu has state %lu\n",
- (ulong) page,
- (ulong) state);
- fputs("InnoDB: Dump of descriptor: ", stderr);
- ut_print_buf(stderr, ((byte*)descr) - 50, 200);
- putc('\n', stderr);
-
- if (state == XDES_FREE) {
- /* We put here some fault tolerance: if the page
- is already free, return without doing anything! */
-
- return;
- }
-
- ut_error;
- }
-
- if (xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)) {
- fprintf(stderr,
- "InnoDB: Error: File space extent descriptor"
- " of page %lu says it is free\n"
- "InnoDB: Dump of descriptor: ", (ulong) page);
- ut_print_buf(stderr, ((byte*)descr) - 50, 200);
- putc('\n', stderr);
-
- /* We put here some fault tolerance: if the page
- is already free, return without doing anything! */
-
- return;
- }
-
- xdes_set_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);
- xdes_set_bit(descr, XDES_CLEAN_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);
-
- frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES,
- mtr);
- if (state == XDES_FULL_FRAG) {
- /* The fragment was full: move it to another list */
- flst_remove(header + FSP_FULL_FRAG, descr + XDES_FLST_NODE,
- mtr);
- xdes_set_state(descr, XDES_FREE_FRAG, mtr);
- flst_add_last(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE,
- mtr);
- mlog_write_ulint(header + FSP_FRAG_N_USED,
- frag_n_used + FSP_EXTENT_SIZE - 1,
- MLOG_4BYTES, mtr);
- } else {
- ut_a(frag_n_used > 0);
- mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used - 1,
- MLOG_4BYTES, mtr);
- }
-
- if (xdes_is_free(descr, mtr)) {
- /* The extent has become free: move it to another list */
- flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE,
- mtr);
- fsp_free_extent(space, page, mtr);
- }
-}
-
-/**************************************************************************
-Returns an extent to the free list of a space. */
-static
-void
-fsp_free_extent(
-/*============*/
- ulint space, /* in: space id */
- ulint page, /* in: page offset in the extent */
- mtr_t* mtr) /* in: mtr */
-{
- fsp_header_t* header;
- xdes_t* descr;
-
- ut_ad(mtr);
-
- header = fsp_get_space_header(space, mtr);
-
- descr = xdes_get_descriptor_with_space_hdr(header, space, page, mtr);
-
- if (xdes_get_state(descr, mtr) == XDES_FREE) {
-
- ut_print_buf(stderr, (byte*)descr - 500, 1000);
-
- ut_error;
- }
-
- xdes_init(descr, mtr);
-
- flst_add_last(header + FSP_FREE, descr + XDES_FLST_NODE, mtr);
-}
-
-/**************************************************************************
-Returns the nth inode slot on an inode page. */
-UNIV_INLINE
-fseg_inode_t*
-fsp_seg_inode_page_get_nth_inode(
-/*=============================*/
- /* out: segment inode */
- page_t* page, /* in: segment inode page */
- ulint i, /* in: inode index on page */
- mtr_t* mtr __attribute__((unused))) /* in: mini-transaction handle */
-{
- ut_ad(i < FSP_SEG_INODES_PER_PAGE);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
-
- return(page + FSEG_ARR_OFFSET + FSEG_INODE_SIZE * i);
-}
-
-/**************************************************************************
-Looks for a used segment inode on a segment inode page. */
-static
-ulint
-fsp_seg_inode_page_find_used(
-/*=========================*/
- /* out: segment inode index, or ULINT_UNDEFINED
- if not found */
- page_t* page, /* in: segment inode page */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- ulint i;
- fseg_inode_t* inode;
-
- for (i = 0; i < FSP_SEG_INODES_PER_PAGE; i++) {
-
- inode = fsp_seg_inode_page_get_nth_inode(page, i, mtr);
-
- if (ut_dulint_cmp(mach_read_from_8(inode + FSEG_ID),
- ut_dulint_zero) != 0) {
- /* This is used */
-
- return(i);
- }
- }
-
- return(ULINT_UNDEFINED);
-}
-
-/**************************************************************************
-Looks for an unused segment inode on a segment inode page. */
-static
-ulint
-fsp_seg_inode_page_find_free(
-/*=========================*/
- /* out: segment inode index, or ULINT_UNDEFINED
- if not found */
- page_t* page, /* in: segment inode page */
- ulint j, /* in: search forward starting from this index */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- ulint i;
- fseg_inode_t* inode;
-
- for (i = j; i < FSP_SEG_INODES_PER_PAGE; i++) {
-
- inode = fsp_seg_inode_page_get_nth_inode(page, i, mtr);
-
- if (ut_dulint_cmp(mach_read_from_8(inode + FSEG_ID),
- ut_dulint_zero) == 0) {
- /* This is unused */
-
- return(i);
- }
- }
-
- return(ULINT_UNDEFINED);
-}
-
-/**************************************************************************
-Allocates a new file segment inode page. */
-static
-ibool
-fsp_alloc_seg_inode_page(
-/*=====================*/
- /* out: TRUE if could be allocated */
- fsp_header_t* space_header, /* in: space header */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- fseg_inode_t* inode;
- page_t* page;
- ulint page_no;
- ulint space;
- ulint i;
-
- space = buf_frame_get_space_id(space_header);
-
- page_no = fsp_alloc_free_page(space, 0, mtr);
-
- if (page_no == FIL_NULL) {
-
- return(FALSE);
- }
-
- page = buf_page_get(space, page_no, RW_X_LATCH, mtr);
-
- buf_block_align(page)->check_index_page_at_flush = FALSE;
-
- mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_INODE,
- MLOG_2BYTES, mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_FSP_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
-
- for (i = 0; i < FSP_SEG_INODES_PER_PAGE; i++) {
-
- inode = fsp_seg_inode_page_get_nth_inode(page, i, mtr);
-
- mlog_write_dulint(inode + FSEG_ID, ut_dulint_zero, mtr);
- }
-
- flst_add_last(space_header + FSP_SEG_INODES_FREE,
- page + FSEG_INODE_PAGE_NODE, mtr);
- return(TRUE);
-}
-
-/**************************************************************************
-Allocates a new file segment inode. */
-static
-fseg_inode_t*
-fsp_alloc_seg_inode(
-/*================*/
- /* out: segment inode, or NULL if
- not enough space */
- fsp_header_t* space_header, /* in: space header */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- ulint page_no;
- page_t* page;
- fseg_inode_t* inode;
- ibool success;
- ulint n;
-
- if (flst_get_len(space_header + FSP_SEG_INODES_FREE, mtr) == 0) {
- /* Allocate a new segment inode page */
-
- success = fsp_alloc_seg_inode_page(space_header, mtr);
-
- if (!success) {
-
- return(NULL);
- }
- }
-
- page_no = flst_get_first(space_header + FSP_SEG_INODES_FREE, mtr).page;
-
- page = buf_page_get(buf_frame_get_space_id(space_header), page_no,
- RW_X_LATCH, mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_FSP_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
-
- n = fsp_seg_inode_page_find_free(page, 0, mtr);
-
- ut_a(n != ULINT_UNDEFINED);
-
- inode = fsp_seg_inode_page_get_nth_inode(page, n, mtr);
-
- if (ULINT_UNDEFINED == fsp_seg_inode_page_find_free(page, n + 1,
- mtr)) {
- /* There are no other unused headers left on the page: move it
- to another list */
-
- flst_remove(space_header + FSP_SEG_INODES_FREE,
- page + FSEG_INODE_PAGE_NODE, mtr);
-
- flst_add_last(space_header + FSP_SEG_INODES_FULL,
- page + FSEG_INODE_PAGE_NODE, mtr);
- }
-
- return(inode);
-}
-
-/**************************************************************************
-Frees a file segment inode. */
-static
-void
-fsp_free_seg_inode(
-/*===============*/
- ulint space, /* in: space id */
- fseg_inode_t* inode, /* in: segment inode */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- page_t* page;
- fsp_header_t* space_header;
-
- page = buf_frame_align(inode);
-
- space_header = fsp_get_space_header(space, mtr);
-
- ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
-
- if (ULINT_UNDEFINED == fsp_seg_inode_page_find_free(page, 0, mtr)) {
-
- /* Move the page to another list */
-
- flst_remove(space_header + FSP_SEG_INODES_FULL,
- page + FSEG_INODE_PAGE_NODE, mtr);
-
- flst_add_last(space_header + FSP_SEG_INODES_FREE,
- page + FSEG_INODE_PAGE_NODE, mtr);
- }
-
- mlog_write_dulint(inode + FSEG_ID, ut_dulint_zero, mtr);
- mlog_write_ulint(inode + FSEG_MAGIC_N, 0, MLOG_4BYTES, mtr);
-
- if (ULINT_UNDEFINED == fsp_seg_inode_page_find_used(page, mtr)) {
-
- /* There are no other used headers left on the page: free it */
-
- flst_remove(space_header + FSP_SEG_INODES_FREE,
- page + FSEG_INODE_PAGE_NODE, mtr);
-
- fsp_free_page(space, buf_frame_get_page_no(page), mtr);
- }
-}
-
-/**************************************************************************
-Returns the file segment inode, page x-latched. */
-static
-fseg_inode_t*
-fseg_inode_get(
-/*===========*/
- /* out: segment inode, page x-latched */
- fseg_header_t* header, /* in: segment header */
- mtr_t* mtr) /* in: mtr handle */
-{
- fil_addr_t inode_addr;
- fseg_inode_t* inode;
-
- inode_addr.page = mach_read_from_4(header + FSEG_HDR_PAGE_NO);
- inode_addr.boffset = mach_read_from_2(header + FSEG_HDR_OFFSET);
-
- inode = fut_get_ptr(mach_read_from_4(header + FSEG_HDR_SPACE),
- inode_addr, RW_X_LATCH, mtr);
-
- ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
-
- return(inode);
-}
-
-/**************************************************************************
-Gets the page number from the nth fragment page slot. */
-UNIV_INLINE
-ulint
-fseg_get_nth_frag_page_no(
-/*======================*/
- /* out: page number, FIL_NULL if not in use */
- fseg_inode_t* inode, /* in: segment inode */
- ulint n, /* in: slot index */
- mtr_t* mtr __attribute__((unused))) /* in: mtr handle */
-{
- ut_ad(inode && mtr);
- ut_ad(n < FSEG_FRAG_ARR_N_SLOTS);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(inode),
- MTR_MEMO_PAGE_X_FIX));
- return(mach_read_from_4(inode + FSEG_FRAG_ARR
- + n * FSEG_FRAG_SLOT_SIZE));
-}
-
-/**************************************************************************
-Sets the page number in the nth fragment page slot. */
-UNIV_INLINE
-void
-fseg_set_nth_frag_page_no(
-/*======================*/
- fseg_inode_t* inode, /* in: segment inode */
- ulint n, /* in: slot index */
- ulint page_no,/* in: page number to set */
- mtr_t* mtr) /* in: mtr handle */
-{
- ut_ad(inode && mtr);
- ut_ad(n < FSEG_FRAG_ARR_N_SLOTS);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(inode),
- MTR_MEMO_PAGE_X_FIX));
-
- mlog_write_ulint(inode + FSEG_FRAG_ARR + n * FSEG_FRAG_SLOT_SIZE,
- page_no, MLOG_4BYTES, mtr);
-}
-
-/**************************************************************************
-Finds a fragment page slot which is free. */
-static
-ulint
-fseg_find_free_frag_page_slot(
-/*==========================*/
- /* out: slot index; ULINT_UNDEFINED if none
- found */
- fseg_inode_t* inode, /* in: segment inode */
- mtr_t* mtr) /* in: mtr handle */
-{
- ulint i;
- ulint page_no;
-
- ut_ad(inode && mtr);
-
- for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
- page_no = fseg_get_nth_frag_page_no(inode, i, mtr);
-
- if (page_no == FIL_NULL) {
-
- return(i);
- }
- }
-
- return(ULINT_UNDEFINED);
-}
-
-/**************************************************************************
-Finds a fragment page slot which is used and last in the array. */
-static
-ulint
-fseg_find_last_used_frag_page_slot(
-/*===============================*/
- /* out: slot index; ULINT_UNDEFINED if none
- found */
- fseg_inode_t* inode, /* in: segment inode */
- mtr_t* mtr) /* in: mtr handle */
-{
- ulint i;
- ulint page_no;
-
- ut_ad(inode && mtr);
-
- for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
- page_no = fseg_get_nth_frag_page_no(
- inode, FSEG_FRAG_ARR_N_SLOTS - i - 1, mtr);
-
- if (page_no != FIL_NULL) {
-
- return(FSEG_FRAG_ARR_N_SLOTS - i - 1);
- }
- }
-
- return(ULINT_UNDEFINED);
-}
-
-/**************************************************************************
-Calculates reserved fragment page slots. */
-static
-ulint
-fseg_get_n_frag_pages(
-/*==================*/
- /* out: number of fragment pages */
- fseg_inode_t* inode, /* in: segment inode */
- mtr_t* mtr) /* in: mtr handle */
-{
- ulint i;
- ulint count = 0;
-
- ut_ad(inode && mtr);
-
- for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
- if (FIL_NULL != fseg_get_nth_frag_page_no(inode, i, mtr)) {
- count++;
- }
- }
-
- return(count);
-}
-
-/**************************************************************************
-Creates a new segment. */
-
-page_t*
-fseg_create_general(
-/*================*/
- /* out: the page where the segment header is placed,
- x-latched, NULL if could not create segment
- because of lack of space */
- ulint space, /* in: space id */
- ulint page, /* in: page where the segment header is placed: if
- this is != 0, the page must belong to another segment,
- if this is 0, a new page will be allocated and it
- will belong to the created segment */
- ulint byte_offset, /* in: byte offset of the created segment header
- on the page */
- ibool has_done_reservation, /* in: TRUE if the caller has already
- done the reservation for the pages with
- fsp_reserve_free_extents (at least 2 extents: one for
- the inode and the other for the segment) then there is
- no need to do the check for this individual
- operation */
- mtr_t* mtr) /* in: mtr */
-{
- fsp_header_t* space_header;
- fseg_inode_t* inode;
- dulint seg_id;
- fseg_header_t* header = 0; /* remove warning */
- rw_lock_t* latch;
- ibool success;
- ulint n_reserved;
- page_t* ret = NULL;
- ulint i;
-
- ut_ad(mtr);
-
- if (page != 0) {
- header = byte_offset + buf_page_get(space, page, RW_X_LATCH,
- mtr);
- }
-
- ut_ad(!mutex_own(&kernel_mutex)
- || mtr_memo_contains(mtr, fil_space_get_latch(space),
- MTR_MEMO_X_LOCK));
- latch = fil_space_get_latch(space);
-
- mtr_x_lock(latch, mtr);
-
- if (rw_lock_get_x_lock_count(latch) == 1) {
- /* This thread did not own the latch before this call: free
- excess pages from the insert buffer free list */
-
- if (space == 0) {
- ibuf_free_excess_pages(space);
- }
- }
-
- if (!has_done_reservation) {
- success = fsp_reserve_free_extents(&n_reserved, space, 2,
- FSP_NORMAL, mtr);
- if (!success) {
- return(NULL);
- }
- }
-
- space_header = fsp_get_space_header(space, mtr);
-
- inode = fsp_alloc_seg_inode(space_header, mtr);
-
- if (inode == NULL) {
-
- goto funct_exit;
- }
-
- /* Read the next segment id from space header and increment the
- value in space header */
-
- seg_id = mtr_read_dulint(space_header + FSP_SEG_ID, mtr);
-
- mlog_write_dulint(space_header + FSP_SEG_ID, ut_dulint_add(seg_id, 1),
- mtr);
-
- mlog_write_dulint(inode + FSEG_ID, seg_id, mtr);
- mlog_write_ulint(inode + FSEG_NOT_FULL_N_USED, 0, MLOG_4BYTES, mtr);
-
- flst_init(inode + FSEG_FREE, mtr);
- flst_init(inode + FSEG_NOT_FULL, mtr);
- flst_init(inode + FSEG_FULL, mtr);
-
- mlog_write_ulint(inode + FSEG_MAGIC_N, FSEG_MAGIC_N_VALUE,
- MLOG_4BYTES, mtr);
- for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
- fseg_set_nth_frag_page_no(inode, i, FIL_NULL, mtr);
- }
-
- if (page == 0) {
- page = fseg_alloc_free_page_low(space, inode, 0, FSP_UP, mtr);
-
- if (page == FIL_NULL) {
-
- fsp_free_seg_inode(space, inode, mtr);
-
- goto funct_exit;
- }
-
- header = byte_offset
- + buf_page_get(space, page, RW_X_LATCH, mtr);
- mlog_write_ulint(header - byte_offset + FIL_PAGE_TYPE,
- FIL_PAGE_TYPE_SYS, MLOG_2BYTES, mtr);
- }
-
- mlog_write_ulint(header + FSEG_HDR_OFFSET,
- inode - buf_frame_align(inode), MLOG_2BYTES, mtr);
-
- mlog_write_ulint(header + FSEG_HDR_PAGE_NO,
- buf_frame_get_page_no(inode), MLOG_4BYTES, mtr);
-
- mlog_write_ulint(header + FSEG_HDR_SPACE, space, MLOG_4BYTES, mtr);
-
- ret = buf_frame_align(header);
-
-funct_exit:
- if (!has_done_reservation) {
-
- fil_space_release_free_extents(space, n_reserved);
- }
-
- return(ret);
-}
-
-/**************************************************************************
-Creates a new segment. */
-
-page_t*
-fseg_create(
-/*========*/
- /* out: the page where the segment header is placed,
- x-latched, NULL if could not create segment
- because of lack of space */
- ulint space, /* in: space id */
- ulint page, /* in: page where the segment header is placed: if
- this is != 0, the page must belong to another segment,
- if this is 0, a new page will be allocated and it
- will belong to the created segment */
- ulint byte_offset, /* in: byte offset of the created segment header
- on the page */
- mtr_t* mtr) /* in: mtr */
-{
- return(fseg_create_general(space, page, byte_offset, FALSE, mtr));
-}
-
-/**************************************************************************
-Calculates the number of pages reserved by a segment, and how many pages are
-currently used. */
-static
-ulint
-fseg_n_reserved_pages_low(
-/*======================*/
- /* out: number of reserved pages */
- fseg_inode_t* inode, /* in: segment inode */
- ulint* used, /* out: number of pages used (<= reserved) */
- mtr_t* mtr) /* in: mtr handle */
-{
- ulint ret;
-
- ut_ad(inode && used && mtr);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(inode),
- MTR_MEMO_PAGE_X_FIX));
-
- *used = mtr_read_ulint(inode + FSEG_NOT_FULL_N_USED, MLOG_4BYTES, mtr)
- + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FULL, mtr)
- + fseg_get_n_frag_pages(inode, mtr);
-
- ret = fseg_get_n_frag_pages(inode, mtr)
- + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FREE, mtr)
- + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_NOT_FULL, mtr)
- + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FULL, mtr);
-
- return(ret);
-}
-
-/**************************************************************************
-Calculates the number of pages reserved by a segment, and how many pages are
-currently used. */
-
-ulint
-fseg_n_reserved_pages(
-/*==================*/
- /* out: number of reserved pages */
- fseg_header_t* header, /* in: segment header */
- ulint* used, /* out: number of pages used (<= reserved) */
- mtr_t* mtr) /* in: mtr handle */
-{
- ulint ret;
- fseg_inode_t* inode;
- ulint space;
-
- space = buf_frame_get_space_id(header);
-
- ut_ad(!mutex_own(&kernel_mutex)
- || mtr_memo_contains(mtr, fil_space_get_latch(space),
- MTR_MEMO_X_LOCK));
-
- mtr_x_lock(fil_space_get_latch(space), mtr);
-
- inode = fseg_inode_get(header, mtr);
-
- ret = fseg_n_reserved_pages_low(inode, used, mtr);
-
- return(ret);
-}
-
-/*************************************************************************
-Tries to fill the free list of a segment with consecutive free extents.
-This happens if the segment is big enough to allow extents in the free list,
-the free list is empty, and the extents can be allocated consecutively from
-the hint onward. */
-static
-void
-fseg_fill_free_list(
-/*================*/
- fseg_inode_t* inode, /* in: segment inode */
- ulint space, /* in: space id */
- ulint hint, /* in: hint which extent would be good as
- the first extent */
- mtr_t* mtr) /* in: mtr */
-{
- xdes_t* descr;
- ulint i;
- dulint seg_id;
- ulint reserved;
- ulint used;
-
- ut_ad(inode && mtr);
-
- reserved = fseg_n_reserved_pages_low(inode, &used, mtr);
-
- if (reserved < FSEG_FREE_LIST_LIMIT * FSP_EXTENT_SIZE) {
-
- /* The segment is too small to allow extents in free list */
-
- return;
- }
-
- if (flst_get_len(inode + FSEG_FREE, mtr) > 0) {
- /* Free list is not empty */
-
- return;
- }
-
- for (i = 0; i < FSEG_FREE_LIST_MAX_LEN; i++) {
- descr = xdes_get_descriptor(space, hint, mtr);
-
- if ((descr == NULL)
- || (XDES_FREE != xdes_get_state(descr, mtr))) {
-
- /* We cannot allocate the desired extent: stop */
-
- return;
- }
-
- descr = fsp_alloc_free_extent(space, hint, mtr);
-
- xdes_set_state(descr, XDES_FSEG, mtr);
-
- seg_id = mtr_read_dulint(inode + FSEG_ID, mtr);
- mlog_write_dulint(descr + XDES_ID, seg_id, mtr);
-
- flst_add_last(inode + FSEG_FREE, descr + XDES_FLST_NODE, mtr);
- hint += FSP_EXTENT_SIZE;
- }
-}
-
-/*************************************************************************
-Allocates a free extent for the segment: looks first in the free list of the
-segment, then tries to allocate from the space free list. NOTE that the extent
-returned still resides in the segment free list, it is not yet taken off it! */
-static
-xdes_t*
-fseg_alloc_free_extent(
-/*===================*/
- /* out: allocated extent, still placed in the
- segment free list, NULL if could
- not be allocated */
- fseg_inode_t* inode, /* in: segment inode */
- ulint space, /* in: space id */
- mtr_t* mtr) /* in: mtr */
-{
- xdes_t* descr;
- dulint seg_id;
- fil_addr_t first;
-
- if (flst_get_len(inode + FSEG_FREE, mtr) > 0) {
- /* Segment free list is not empty, allocate from it */
-
- first = flst_get_first(inode + FSEG_FREE, mtr);
-
- descr = xdes_lst_get_descriptor(space, first, mtr);
- } else {
- /* Segment free list was empty, allocate from space */
- descr = fsp_alloc_free_extent(space, 0, mtr);
-
- if (descr == NULL) {
-
- return(NULL);
- }
-
- seg_id = mtr_read_dulint(inode + FSEG_ID, mtr);
-
- xdes_set_state(descr, XDES_FSEG, mtr);
- mlog_write_dulint(descr + XDES_ID, seg_id, mtr);
- flst_add_last(inode + FSEG_FREE, descr + XDES_FLST_NODE, mtr);
-
- /* Try to fill the segment free list */
- fseg_fill_free_list(inode, space,
- xdes_get_offset(descr) + FSP_EXTENT_SIZE,
- mtr);
- }
-
- return(descr);
-}
-
-/**************************************************************************
-Allocates a single free page from a segment. This function implements
-the intelligent allocation strategy which tries to minimize file space
-fragmentation. */
-static
-ulint
-fseg_alloc_free_page_low(
-/*=====================*/
- /* out: the allocated page number, FIL_NULL
- if no page could be allocated */
- ulint space, /* in: space */
- fseg_inode_t* seg_inode, /* in: segment inode */
- ulint hint, /* in: hint of which page would be desirable */
- byte direction, /* in: if the new page is needed because
- of an index page split, and records are
- inserted there in order, into which
- direction they go alphabetically: FSP_DOWN,
- FSP_UP, FSP_NO_DIR */
- mtr_t* mtr) /* in: mtr handle */
-{
- fsp_header_t* space_header;
- ulint space_size;
- dulint seg_id;
- ulint used;
- ulint reserved;
- xdes_t* descr; /* extent of the hinted page */
- ulint ret_page; /* the allocated page offset, FIL_NULL
- if could not be allocated */
- xdes_t* ret_descr; /* the extent of the allocated page */
- page_t* page;
- ibool frag_page_allocated = FALSE;
- ibool success;
- ulint n;
-
- ut_ad(mtr);
- ut_ad((direction >= FSP_UP) && (direction <= FSP_NO_DIR));
- ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
- == FSEG_MAGIC_N_VALUE);
- seg_id = mtr_read_dulint(seg_inode + FSEG_ID, mtr);
-
- ut_ad(ut_dulint_cmp(seg_id, ut_dulint_zero) > 0);
-
- reserved = fseg_n_reserved_pages_low(seg_inode, &used, mtr);
-
- space_header = fsp_get_space_header(space, mtr);
-
- descr = xdes_get_descriptor_with_space_hdr(space_header, space,
- hint, mtr);
- if (descr == NULL) {
- /* Hint outside space or too high above free limit: reset
- hint */
- hint = 0;
- descr = xdes_get_descriptor(space, hint, mtr);
- }
-
- /* In the big if-else below we look for ret_page and ret_descr */
- /*-------------------------------------------------------------*/
- if ((xdes_get_state(descr, mtr) == XDES_FSEG)
- && (0 == ut_dulint_cmp(mtr_read_dulint(descr + XDES_ID,
- mtr), seg_id))
- && (xdes_get_bit(descr, XDES_FREE_BIT,
- hint % FSP_EXTENT_SIZE, mtr) == TRUE)) {
-
- /* 1. We can take the hinted page
- =================================*/
- ret_descr = descr;
- ret_page = hint;
- /*-----------------------------------------------------------*/
- } else if ((xdes_get_state(descr, mtr) == XDES_FREE)
- && ((reserved - used) < reserved / FSEG_FILLFACTOR)
- && (used >= FSEG_FRAG_LIMIT)) {
-
- /* 2. We allocate the free extent from space and can take
- =========================================================
- the hinted page
- ===============*/
- ret_descr = fsp_alloc_free_extent(space, hint, mtr);
-
- ut_a(ret_descr == descr);
-
- xdes_set_state(ret_descr, XDES_FSEG, mtr);
- mlog_write_dulint(ret_descr + XDES_ID, seg_id, mtr);
- flst_add_last(seg_inode + FSEG_FREE,
- ret_descr + XDES_FLST_NODE, mtr);
-
- /* Try to fill the segment free list */
- fseg_fill_free_list(seg_inode, space,
- hint + FSP_EXTENT_SIZE, mtr);
- ret_page = hint;
- /*-----------------------------------------------------------*/
- } else if ((direction != FSP_NO_DIR)
- && ((reserved - used) < reserved / FSEG_FILLFACTOR)
- && (used >= FSEG_FRAG_LIMIT)
- && (!!(ret_descr
- = fseg_alloc_free_extent(seg_inode, space, mtr)))) {
-
- /* 3. We take any free extent (which was already assigned above
- ===============================================================
- in the if-condition to ret_descr) and take the lowest or
- ========================================================
- highest page in it, depending on the direction
- ==============================================*/
- ret_page = xdes_get_offset(ret_descr);
-
- if (direction == FSP_DOWN) {
- ret_page += FSP_EXTENT_SIZE - 1;
- }
- /*-----------------------------------------------------------*/
- } else if ((xdes_get_state(descr, mtr) == XDES_FSEG)
- && (0 == ut_dulint_cmp(mtr_read_dulint(descr + XDES_ID,
- mtr), seg_id))
- && (!xdes_is_full(descr, mtr))) {
-
- /* 4. We can take the page from the same extent as the
- ======================================================
- hinted page (and the extent already belongs to the
- ==================================================
- segment)
- ========*/
- ret_descr = descr;
- ret_page = xdes_get_offset(ret_descr)
- + xdes_find_bit(ret_descr, XDES_FREE_BIT, TRUE,
- hint % FSP_EXTENT_SIZE, mtr);
- /*-----------------------------------------------------------*/
- } else if (reserved - used > 0) {
- /* 5. We take any unused page from the segment
- ==============================================*/
- fil_addr_t first;
-
- if (flst_get_len(seg_inode + FSEG_NOT_FULL, mtr) > 0) {
- first = flst_get_first(seg_inode + FSEG_NOT_FULL,
- mtr);
- } else if (flst_get_len(seg_inode + FSEG_FREE, mtr) > 0) {
- first = flst_get_first(seg_inode + FSEG_FREE, mtr);
- } else {
- ut_error;
- return(FIL_NULL);
- }
-
- ret_descr = xdes_lst_get_descriptor(space, first, mtr);
- ret_page = xdes_get_offset(ret_descr)
- + xdes_find_bit(ret_descr, XDES_FREE_BIT, TRUE,
- 0, mtr);
- /*-----------------------------------------------------------*/
- } else if (used < FSEG_FRAG_LIMIT) {
- /* 6. We allocate an individual page from the space
- ===================================================*/
- ret_page = fsp_alloc_free_page(space, hint, mtr);
- ret_descr = NULL;
-
- frag_page_allocated = TRUE;
-
- if (ret_page != FIL_NULL) {
- /* Put the page in the fragment page array of the
- segment */
- n = fseg_find_free_frag_page_slot(seg_inode, mtr);
- ut_a(n != FIL_NULL);
-
- fseg_set_nth_frag_page_no(seg_inode, n, ret_page,
- mtr);
- }
- /*-----------------------------------------------------------*/
- } else {
- /* 7. We allocate a new extent and take its first page
- ======================================================*/
- ret_descr = fseg_alloc_free_extent(seg_inode, space, mtr);
-
- if (ret_descr == NULL) {
- ret_page = FIL_NULL;
- } else {
- ret_page = xdes_get_offset(ret_descr);
- }
- }
-
- if (ret_page == FIL_NULL) {
- /* Page could not be allocated */
-
- return(FIL_NULL);
- }
-
- if (space != 0) {
- space_size = fil_space_get_size(space);
-
- if (space_size <= ret_page) {
- /* It must be that we are extending a single-table
- tablespace whose size is still < 64 pages */
-
- if (ret_page >= FSP_EXTENT_SIZE) {
- fprintf(stderr,
- "InnoDB: Error (2): trying to extend"
- " a single-table tablespace %lu\n"
- "InnoDB: by single page(s) though"
- " the space size %lu. Page no %lu.\n",
- (ulong) space, (ulong) space_size,
- (ulong) ret_page);
- return(FIL_NULL);
- }
-
- success = fsp_try_extend_data_file_with_pages(
- space, ret_page, space_header, mtr);
- if (!success) {
- /* No disk space left */
- return(FIL_NULL);
- }
- }
- }
-
- if (!frag_page_allocated) {
- /* Initialize the allocated page to buffer pool, so that it
- can be obtained immediately with buf_page_get without need
- for a disk read */
-
- page = buf_page_create(space, ret_page, mtr);
-
- ut_a(page == buf_page_get(space, ret_page, RW_X_LATCH, mtr));
-
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_FSP_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
-
- /* The prior contents of the page should be ignored */
- fsp_init_file_page(page, mtr);
-
- /* At this point we know the extent and the page offset.
- The extent is still in the appropriate list (FSEG_NOT_FULL
- or FSEG_FREE), and the page is not yet marked as used. */
-
- ut_ad(xdes_get_descriptor(space, ret_page, mtr) == ret_descr);
- ut_ad(xdes_get_bit(ret_descr, XDES_FREE_BIT,
- ret_page % FSP_EXTENT_SIZE, mtr) == TRUE);
-
- fseg_mark_page_used(seg_inode, space, ret_page, mtr);
- }
-
- buf_reset_check_index_page_at_flush(space, ret_page);
-
- return(ret_page);
-}
-
-/**************************************************************************
-Allocates a single free page from a segment. This function implements
-the intelligent allocation strategy which tries to minimize file space
-fragmentation. */
-
-ulint
-fseg_alloc_free_page_general(
-/*=========================*/
- /* out: allocated page offset, FIL_NULL if no
- page could be allocated */
- fseg_header_t* seg_header,/* in: segment header */
- ulint hint, /* in: hint of which page would be desirable */
- byte direction,/* in: if the new page is needed because
- of an index page split, and records are
- inserted there in order, into which
- direction they go alphabetically: FSP_DOWN,
- FSP_UP, FSP_NO_DIR */
- ibool has_done_reservation, /* in: TRUE if the caller has
- already done the reservation for the page
- with fsp_reserve_free_extents, then there
- is no need to do the check for this individual
- page */
- mtr_t* mtr) /* in: mtr handle */
-{
- fseg_inode_t* inode;
- ulint space;
- rw_lock_t* latch;
- ibool success;
- ulint page_no;
- ulint n_reserved;
-
- space = buf_frame_get_space_id(seg_header);
-
- ut_ad(!mutex_own(&kernel_mutex)
- || mtr_memo_contains(mtr, fil_space_get_latch(space),
- MTR_MEMO_X_LOCK));
- latch = fil_space_get_latch(space);
-
- mtr_x_lock(latch, mtr);
-
- if (rw_lock_get_x_lock_count(latch) == 1) {
- /* This thread did not own the latch before this call: free
- excess pages from the insert buffer free list */
-
- if (space == 0) {
- ibuf_free_excess_pages(space);
- }
- }
-
- inode = fseg_inode_get(seg_header, mtr);
-
- if (!has_done_reservation) {
- success = fsp_reserve_free_extents(&n_reserved, space, 2,
- FSP_NORMAL, mtr);
- if (!success) {
- return(FIL_NULL);
- }
- }
-
- page_no = fseg_alloc_free_page_low(buf_frame_get_space_id(inode),
- inode, hint, direction, mtr);
- if (!has_done_reservation) {
- fil_space_release_free_extents(space, n_reserved);
- }
-
- return(page_no);
-}
-
-/**************************************************************************
-Allocates a single free page from a segment. This function implements
-the intelligent allocation strategy which tries to minimize file space
-fragmentation. */
-
-ulint
-fseg_alloc_free_page(
-/*=================*/
- /* out: allocated page offset, FIL_NULL if no
- page could be allocated */
- fseg_header_t* seg_header,/* in: segment header */
- ulint hint, /* in: hint of which page would be desirable */
- byte direction,/* in: if the new page is needed because
- of an index page split, and records are
- inserted there in order, into which
- direction they go alphabetically: FSP_DOWN,
- FSP_UP, FSP_NO_DIR */
- mtr_t* mtr) /* in: mtr handle */
-{
- return(fseg_alloc_free_page_general(seg_header, hint, direction,
- FALSE, mtr));
-}
-
-/**************************************************************************
-Checks that we have at least 2 frag pages free in the first extent of a
-single-table tablespace, and they are also physically initialized to the data
-file. That is we have already extended the data file so that those pages are
-inside the data file. If not, this function extends the tablespace with
-pages. */
-static
-ibool
-fsp_reserve_free_pages(
-/*===================*/
- /* out: TRUE if there were >= 3 free
- pages, or we were able to extend */
- ulint space, /* in: space id, must be != 0 */
- fsp_header_t* space_header, /* in: header of that space,
- x-latched */
- ulint size, /* in: size of the tablespace in pages,
- must be < FSP_EXTENT_SIZE / 2 */
- mtr_t* mtr) /* in: mtr */
-{
- xdes_t* descr;
- ulint n_used;
-
- ut_a(space != 0);
- ut_a(size < FSP_EXTENT_SIZE / 2);
-
- descr = xdes_get_descriptor_with_space_hdr(space_header, space, 0,
- mtr);
- n_used = xdes_get_n_used(descr, mtr);
-
- ut_a(n_used <= size);
-
- if (size >= n_used + 2) {
-
- return(TRUE);
- }
-
- return(fsp_try_extend_data_file_with_pages(space, n_used + 1,
- space_header, mtr));
-}
-
-/**************************************************************************
-Reserves free pages from a tablespace. All mini-transactions which may
-use several pages from the tablespace should call this function beforehand
-and reserve enough free extents so that they certainly will be able
-to do their operation, like a B-tree page split, fully. Reservations
-must be released with function fil_space_release_free_extents!
-
-The alloc_type below has the following meaning: FSP_NORMAL means an
-operation which will probably result in more space usage, like an
-insert in a B-tree; FSP_UNDO means allocation to undo logs: if we are
-deleting rows, then this allocation will in the long run result in
-less space usage (after a purge); FSP_CLEANING means allocation done
-in a physical record delete (like in a purge) or other cleaning operation
-which will result in less space usage in the long run. We prefer the latter
-two types of allocation: when space is scarce, FSP_NORMAL allocations
-will not succeed, but the latter two allocations will succeed, if possible.
-The purpose is to avoid dead end where the database is full but the
-user cannot free any space because these freeing operations temporarily
-reserve some space.
-
-Single-table tablespaces whose size is < 32 pages are a special case. In this
-function we would liberally reserve several 64 page extents for every page
-split or merge in a B-tree. But we do not want to waste disk space if the table
-only occupies < 32 pages. That is why we apply different rules in that special
-case, just ensuring that there are 3 free pages available. */
-
-ibool
-fsp_reserve_free_extents(
-/*=====================*/
- /* out: TRUE if we were able to make the reservation */
- ulint* n_reserved,/* out: number of extents actually reserved; if we
- return TRUE and the tablespace size is < 64 pages,
- then this can be 0, otherwise it is n_ext */
- ulint space, /* in: space id */
- ulint n_ext, /* in: number of extents to reserve */
- ulint alloc_type,/* in: FSP_NORMAL, FSP_UNDO, or FSP_CLEANING */
- mtr_t* mtr) /* in: mtr */
-{
- fsp_header_t* space_header;
- rw_lock_t* latch;
- ulint n_free_list_ext;
- ulint free_limit;
- ulint size;
- ulint n_free;
- ulint n_free_up;
- ulint reserve;
- ibool success;
- ulint n_pages_added;
-
- ut_ad(mtr);
- ut_ad(!mutex_own(&kernel_mutex)
- || mtr_memo_contains(mtr, fil_space_get_latch(space),
- MTR_MEMO_X_LOCK));
- *n_reserved = n_ext;
-
- latch = fil_space_get_latch(space);
-
- mtr_x_lock(latch, mtr);
-
- space_header = fsp_get_space_header(space, mtr);
-try_again:
- size = mtr_read_ulint(space_header + FSP_SIZE, MLOG_4BYTES, mtr);
-
- if (size < FSP_EXTENT_SIZE / 2) {
- /* Use different rules for small single-table tablespaces */
- *n_reserved = 0;
- return(fsp_reserve_free_pages(space, space_header, size, mtr));
- }
-
- n_free_list_ext = flst_get_len(space_header + FSP_FREE, mtr);
-
- free_limit = mtr_read_ulint(space_header + FSP_FREE_LIMIT,
- MLOG_4BYTES, mtr);
-
- /* Below we play safe when counting free extents above the free limit:
- some of them will contain extent descriptor pages, and therefore
- will not be free extents */
-
- n_free_up = (size - free_limit) / FSP_EXTENT_SIZE;
-
- if (n_free_up > 0) {
- n_free_up--;
- n_free_up = n_free_up - n_free_up
- / (XDES_DESCRIBED_PER_PAGE / FSP_EXTENT_SIZE);
- }
-
- n_free = n_free_list_ext + n_free_up;
-
- if (alloc_type == FSP_NORMAL) {
- /* We reserve 1 extent + 0.5 % of the space size to undo logs
- and 1 extent + 0.5 % to cleaning operations; NOTE: this source
- code is duplicated in the function below! */
-
- reserve = 2 + ((size / FSP_EXTENT_SIZE) * 2) / 200;
-
- if (n_free <= reserve + n_ext) {
-
- goto try_to_extend;
- }
- } else if (alloc_type == FSP_UNDO) {
- /* We reserve 0.5 % of the space size to cleaning operations */
-
- reserve = 1 + ((size / FSP_EXTENT_SIZE) * 1) / 200;
-
- if (n_free <= reserve + n_ext) {
-
- goto try_to_extend;
- }
- } else {
- ut_a(alloc_type == FSP_CLEANING);
- }
-
- success = fil_space_reserve_free_extents(space, n_free, n_ext);
-
- if (success) {
- return(TRUE);
- }
-try_to_extend:
- success = fsp_try_extend_data_file(&n_pages_added, space,
- space_header, mtr);
- if (success && n_pages_added > 0) {
-
- goto try_again;
- }
-
- return(FALSE);
-}
-
-/**************************************************************************
-This function should be used to get information on how much we still
-will be able to insert new data to the database without running out the
-tablespace. Only free extents are taken into account and we also subtract
-the safety margin required by the above function fsp_reserve_free_extents. */
-
-ullint
-fsp_get_available_space_in_free_extents(
-/*====================================*/
- /* out: available space in kB */
- ulint space) /* in: space id */
-{
- fsp_header_t* space_header;
- ulint n_free_list_ext;
- ulint free_limit;
- ulint size;
- ulint n_free;
- ulint n_free_up;
- ulint reserve;
- rw_lock_t* latch;
- mtr_t mtr;
-
- ut_ad(!mutex_own(&kernel_mutex));
-
- mtr_start(&mtr);
-
- latch = fil_space_get_latch(space);
-
- mtr_x_lock(latch, &mtr);
-
- space_header = fsp_get_space_header(space, &mtr);
-
- size = mtr_read_ulint(space_header + FSP_SIZE, MLOG_4BYTES, &mtr);
-
- n_free_list_ext = flst_get_len(space_header + FSP_FREE, &mtr);
-
- free_limit = mtr_read_ulint(space_header + FSP_FREE_LIMIT,
- MLOG_4BYTES, &mtr);
- mtr_commit(&mtr);
-
- if (size < FSP_EXTENT_SIZE) {
- ut_a(space != 0); /* This must be a single-table
- tablespace */
-
- return(0); /* TODO: count free frag pages and
- return a value based on that */
- }
-
- /* Below we play safe when counting free extents above the free limit:
- some of them will contain extent descriptor pages, and therefore
- will not be free extents */
-
- n_free_up = (size - free_limit) / FSP_EXTENT_SIZE;
-
- if (n_free_up > 0) {
- n_free_up--;
- n_free_up = n_free_up - n_free_up
- / (XDES_DESCRIBED_PER_PAGE / FSP_EXTENT_SIZE);
- }
-
- n_free = n_free_list_ext + n_free_up;
-
- /* We reserve 1 extent + 0.5 % of the space size to undo logs
- and 1 extent + 0.5 % to cleaning operations; NOTE: this source
- code is duplicated in the function above! */
-
- reserve = 2 + ((size / FSP_EXTENT_SIZE) * 2) / 200;
-
- if (reserve > n_free) {
- return(0);
- }
-
- return((ullint)(n_free - reserve)
- * FSP_EXTENT_SIZE
- * (UNIV_PAGE_SIZE / 1024));
-}
-
-/************************************************************************
-Marks a page used. The page must reside within the extents of the given
-segment. */
-static
-void
-fseg_mark_page_used(
-/*================*/
- fseg_inode_t* seg_inode,/* in: segment inode */
- ulint space, /* in: space id */
- ulint page, /* in: page offset */
- mtr_t* mtr) /* in: mtr */
-{
- xdes_t* descr;
- ulint not_full_n_used;
-
- ut_ad(seg_inode && mtr);
-
- descr = xdes_get_descriptor(space, page, mtr);
-
- ut_ad(mtr_read_ulint(seg_inode + FSEG_ID, MLOG_4BYTES, mtr)
- == mtr_read_ulint(descr + XDES_ID, MLOG_4BYTES, mtr));
-
- if (xdes_is_free(descr, mtr)) {
- /* We move the extent from the free list to the
- NOT_FULL list */
- flst_remove(seg_inode + FSEG_FREE, descr + XDES_FLST_NODE,
- mtr);
- flst_add_last(seg_inode + FSEG_NOT_FULL,
- descr + XDES_FLST_NODE, mtr);
- }
-
- ut_ad(xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)
- == TRUE);
- /* We mark the page as used */
- xdes_set_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, FALSE, mtr);
-
- not_full_n_used = mtr_read_ulint(seg_inode + FSEG_NOT_FULL_N_USED,
- MLOG_4BYTES, mtr);
- not_full_n_used++;
- mlog_write_ulint(seg_inode + FSEG_NOT_FULL_N_USED, not_full_n_used,
- MLOG_4BYTES, mtr);
- if (xdes_is_full(descr, mtr)) {
- /* We move the extent from the NOT_FULL list to the
- FULL list */
- flst_remove(seg_inode + FSEG_NOT_FULL,
- descr + XDES_FLST_NODE, mtr);
- flst_add_last(seg_inode + FSEG_FULL,
- descr + XDES_FLST_NODE, mtr);
-
- mlog_write_ulint(seg_inode + FSEG_NOT_FULL_N_USED,
- not_full_n_used - FSP_EXTENT_SIZE,
- MLOG_4BYTES, mtr);
- }
-}
-
-/**************************************************************************
-Frees a single page of a segment. */
-static
-void
-fseg_free_page_low(
-/*===============*/
- fseg_inode_t* seg_inode, /* in: segment inode */
- ulint space, /* in: space id */
- ulint page, /* in: page offset */
- mtr_t* mtr) /* in: mtr handle */
-{
- xdes_t* descr;
- ulint not_full_n_used;
- ulint state;
- dulint descr_id;
- dulint seg_id;
- ulint i;
-
- ut_ad(seg_inode && mtr);
- ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
- == FSEG_MAGIC_N_VALUE);
-
- /* Drop search system page hash index if the page is found in
- the pool and is hashed */
-
- btr_search_drop_page_hash_when_freed(space, page);
-
- descr = xdes_get_descriptor(space, page, mtr);
-
- ut_a(descr);
- if (xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)) {
- fputs("InnoDB: Dump of the tablespace extent descriptor: ",
- stderr);
- ut_print_buf(stderr, descr, 40);
-
- fprintf(stderr, "\n"
- "InnoDB: Serious error! InnoDB is trying to"
- " free page %lu\n"
- "InnoDB: though it is already marked as free"
- " in the tablespace!\n"
- "InnoDB: The tablespace free space info is corrupt.\n"
- "InnoDB: You may need to dump your"
- " InnoDB tables and recreate the whole\n"
- "InnoDB: database!\n", (ulong) page);
-crash:
- fputs("InnoDB: Please refer to\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "forcing-recovery.html\n"
- "InnoDB: about forcing recovery.\n", stderr);
- ut_error;
- }
-
- state = xdes_get_state(descr, mtr);
-
- if (state != XDES_FSEG) {
- /* The page is in the fragment pages of the segment */
-
- for (i = 0;; i++) {
- if (fseg_get_nth_frag_page_no(seg_inode, i, mtr)
- == page) {
-
- fseg_set_nth_frag_page_no(seg_inode, i,
- FIL_NULL, mtr);
- break;
- }
- }
-
- fsp_free_page(space, page, mtr);
-
- return;
- }
-
- /* If we get here, the page is in some extent of the segment */
-
- descr_id = mtr_read_dulint(descr + XDES_ID, mtr);
- seg_id = mtr_read_dulint(seg_inode + FSEG_ID, mtr);
-#if 0
- fprintf(stderr,
- "InnoDB: InnoDB is freeing space %lu page %lu,\n"
- "InnoDB: which belongs to descr seg %lu %lu\n"
- "InnoDB: segment %lu %lu.\n",
- (ulong) space, (ulong) page,
- (ulong) ut_dulint_get_high(descr_id),
- (ulong) ut_dulint_get_low(descr_id),
- (ulong) ut_dulint_get_high(seg_id),
- (ulong) ut_dulint_get_low(seg_id));
-#endif /* 0 */
- if (0 != ut_dulint_cmp(descr_id, seg_id)) {
- fputs("InnoDB: Dump of the tablespace extent descriptor: ",
- stderr);
- ut_print_buf(stderr, descr, 40);
- fputs("\nInnoDB: Dump of the segment inode: ", stderr);
- ut_print_buf(stderr, seg_inode, 40);
- putc('\n', stderr);
-
- fprintf(stderr,
- "InnoDB: Serious error: InnoDB is trying to"
- " free space %lu page %lu,\n"
- "InnoDB: which does not belong to"
- " segment %lu %lu but belongs\n"
- "InnoDB: to segment %lu %lu.\n",
- (ulong) space, (ulong) page,
- (ulong) ut_dulint_get_high(descr_id),
- (ulong) ut_dulint_get_low(descr_id),
- (ulong) ut_dulint_get_high(seg_id),
- (ulong) ut_dulint_get_low(seg_id));
- goto crash;
- }
-
- not_full_n_used = mtr_read_ulint(seg_inode + FSEG_NOT_FULL_N_USED,
- MLOG_4BYTES, mtr);
- if (xdes_is_full(descr, mtr)) {
- /* The fragment is full: move it to another list */
- flst_remove(seg_inode + FSEG_FULL,
- descr + XDES_FLST_NODE, mtr);
- flst_add_last(seg_inode + FSEG_NOT_FULL,
- descr + XDES_FLST_NODE, mtr);
- mlog_write_ulint(seg_inode + FSEG_NOT_FULL_N_USED,
- not_full_n_used + FSP_EXTENT_SIZE - 1,
- MLOG_4BYTES, mtr);
- } else {
- ut_a(not_full_n_used > 0);
- mlog_write_ulint(seg_inode + FSEG_NOT_FULL_N_USED,
- not_full_n_used - 1, MLOG_4BYTES, mtr);
- }
-
- xdes_set_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);
- xdes_set_bit(descr, XDES_CLEAN_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);
-
- if (xdes_is_free(descr, mtr)) {
- /* The extent has become free: free it to space */
- flst_remove(seg_inode + FSEG_NOT_FULL,
- descr + XDES_FLST_NODE, mtr);
- fsp_free_extent(space, page, mtr);
- }
-}
-
-/**************************************************************************
-Frees a single page of a segment. */
-
-void
-fseg_free_page(
-/*===========*/
- fseg_header_t* seg_header, /* in: segment header */
- ulint space, /* in: space id */
- ulint page, /* in: page offset */
- mtr_t* mtr) /* in: mtr handle */
-{
- fseg_inode_t* seg_inode;
-
- ut_ad(!mutex_own(&kernel_mutex)
- || mtr_memo_contains(mtr, fil_space_get_latch(space),
- MTR_MEMO_X_LOCK));
-
- mtr_x_lock(fil_space_get_latch(space), mtr);
-
- seg_inode = fseg_inode_get(seg_header, mtr);
-
- fseg_free_page_low(seg_inode, space, page, mtr);
-
-#ifdef UNIV_DEBUG_FILE_ACCESSES
- buf_page_set_file_page_was_freed(space, page);
-#endif
-}
-
-/**************************************************************************
-Frees an extent of a segment to the space free list. */
-static
-void
-fseg_free_extent(
-/*=============*/
- fseg_inode_t* seg_inode, /* in: segment inode */
- ulint space, /* in: space id */
- ulint page, /* in: a page in the extent */
- mtr_t* mtr) /* in: mtr handle */
-{
- ulint first_page_in_extent;
- xdes_t* descr;
- ulint not_full_n_used;
- ulint descr_n_used;
- ulint i;
-
- ut_ad(seg_inode && mtr);
-
- descr = xdes_get_descriptor(space, page, mtr);
-
- ut_a(xdes_get_state(descr, mtr) == XDES_FSEG);
- ut_a(0 == ut_dulint_cmp(mtr_read_dulint(descr + XDES_ID, mtr),
- mtr_read_dulint(seg_inode + FSEG_ID, mtr)));
-
- first_page_in_extent = page - (page % FSP_EXTENT_SIZE);
-
- for (i = 0; i < FSP_EXTENT_SIZE; i++) {
- if (FALSE == xdes_get_bit(descr, XDES_FREE_BIT, i, mtr)) {
-
- /* Drop search system page hash index if the page is
- found in the pool and is hashed */
-
- btr_search_drop_page_hash_when_freed(
- space, first_page_in_extent + i);
- }
- }
-
- if (xdes_is_full(descr, mtr)) {
- flst_remove(seg_inode + FSEG_FULL,
- descr + XDES_FLST_NODE, mtr);
- } else if (xdes_is_free(descr, mtr)) {
- flst_remove(seg_inode + FSEG_FREE,
- descr + XDES_FLST_NODE, mtr);
- } else {
- flst_remove(seg_inode + FSEG_NOT_FULL,
- descr + XDES_FLST_NODE, mtr);
-
- not_full_n_used = mtr_read_ulint(
- seg_inode + FSEG_NOT_FULL_N_USED, MLOG_4BYTES, mtr);
-
- descr_n_used = xdes_get_n_used(descr, mtr);
- ut_a(not_full_n_used >= descr_n_used);
- mlog_write_ulint(seg_inode + FSEG_NOT_FULL_N_USED,
- not_full_n_used - descr_n_used,
- MLOG_4BYTES, mtr);
- }
-
- fsp_free_extent(space, page, mtr);
-
-#ifdef UNIV_DEBUG_FILE_ACCESSES
- for (i = 0; i < FSP_EXTENT_SIZE; i++) {
-
- buf_page_set_file_page_was_freed(space,
- first_page_in_extent + i);
- }
-#endif
-}
-
-/**************************************************************************
-Frees part of a segment. This function can be used to free a segment by
-repeatedly calling this function in different mini-transactions. Doing
-the freeing in a single mini-transaction might result in too big a
-mini-transaction. */
-
-ibool
-fseg_free_step(
-/*===========*/
- /* out: TRUE if freeing completed */
- fseg_header_t* header, /* in, own: segment header; NOTE: if the header
- resides on the first page of the frag list
- of the segment, this pointer becomes obsolete
- after the last freeing step */
- mtr_t* mtr) /* in: mtr */
-{
- ulint n;
- ulint page;
- xdes_t* descr;
- fseg_inode_t* inode;
- ulint space;
-
- space = buf_frame_get_space_id(header);
-
- ut_ad(!mutex_own(&kernel_mutex)
- || mtr_memo_contains(mtr, fil_space_get_latch(space),
- MTR_MEMO_X_LOCK));
-
- mtr_x_lock(fil_space_get_latch(space), mtr);
-
- descr = xdes_get_descriptor(space, buf_frame_get_page_no(header), mtr);
-
- /* Check that the header resides on a page which has not been
- freed yet */
-
- ut_a(descr);
- ut_a(xdes_get_bit(descr, XDES_FREE_BIT, buf_frame_get_page_no(header)
- % FSP_EXTENT_SIZE, mtr) == FALSE);
- inode = fseg_inode_get(header, mtr);
-
- descr = fseg_get_first_extent(inode, mtr);
-
- if (descr != NULL) {
- /* Free the extent held by the segment */
- page = xdes_get_offset(descr);
-
- fseg_free_extent(inode, space, page, mtr);
-
- return(FALSE);
- }
-
- /* Free a frag page */
- n = fseg_find_last_used_frag_page_slot(inode, mtr);
-
- if (n == ULINT_UNDEFINED) {
- /* Freeing completed: free the segment inode */
- fsp_free_seg_inode(space, inode, mtr);
-
- return(TRUE);
- }
-
- fseg_free_page_low(inode, space,
- fseg_get_nth_frag_page_no(inode, n, mtr), mtr);
-
- n = fseg_find_last_used_frag_page_slot(inode, mtr);
-
- if (n == ULINT_UNDEFINED) {
- /* Freeing completed: free the segment inode */
- fsp_free_seg_inode(space, inode, mtr);
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/**************************************************************************
-Frees part of a segment. Differs from fseg_free_step because this function
-leaves the header page unfreed. */
-
-ibool
-fseg_free_step_not_header(
-/*======================*/
- /* out: TRUE if freeing completed, except the
- header page */
- fseg_header_t* header, /* in: segment header which must reside on
- the first fragment page of the segment */
- mtr_t* mtr) /* in: mtr */
-{
- ulint n;
- ulint page;
- xdes_t* descr;
- fseg_inode_t* inode;
- ulint space;
- ulint page_no;
-
- space = buf_frame_get_space_id(header);
-
- ut_ad(!mutex_own(&kernel_mutex)
- || mtr_memo_contains(mtr, fil_space_get_latch(space),
- MTR_MEMO_X_LOCK));
-
- mtr_x_lock(fil_space_get_latch(space), mtr);
-
- inode = fseg_inode_get(header, mtr);
-
- descr = fseg_get_first_extent(inode, mtr);
-
- if (descr != NULL) {
- /* Free the extent held by the segment */
- page = xdes_get_offset(descr);
-
- fseg_free_extent(inode, space, page, mtr);
-
- return(FALSE);
- }
-
- /* Free a frag page */
-
- n = fseg_find_last_used_frag_page_slot(inode, mtr);
-
- if (n == ULINT_UNDEFINED) {
- ut_error;
- }
-
- page_no = fseg_get_nth_frag_page_no(inode, n, mtr);
-
- if (page_no == buf_frame_get_page_no(header)) {
-
- return(TRUE);
- }
-
- fseg_free_page_low(inode, space, page_no, mtr);
-
- return(FALSE);
-}
-
-/***********************************************************************
-Frees a segment. The freeing is performed in several mini-transactions,
-so that there is no danger of bufferfixing too many buffer pages. */
-
-void
-fseg_free(
-/*======*/
- ulint space, /* in: space id */
- ulint page_no,/* in: page number where the segment header is
- placed */
- ulint offset) /* in: byte offset of the segment header on that
- page */
-{
- mtr_t mtr;
- ibool finished;
- fseg_header_t* header;
- fil_addr_t addr;
-
- addr.page = page_no;
- addr.boffset = offset;
-
- for (;;) {
- mtr_start(&mtr);
-
- header = fut_get_ptr(space, addr, RW_X_LATCH, &mtr);
-
- finished = fseg_free_step(header, &mtr);
-
- mtr_commit(&mtr);
-
- if (finished) {
-
- return;
- }
- }
-}
-
-/**************************************************************************
-Returns the first extent descriptor for a segment. We think of the extent
-lists of the segment catenated in the order FSEG_FULL -> FSEG_NOT_FULL
--> FSEG_FREE. */
-static
-xdes_t*
-fseg_get_first_extent(
-/*==================*/
- /* out: the first extent descriptor, or NULL if
- none */
- fseg_inode_t* inode, /* in: segment inode */
- mtr_t* mtr) /* in: mtr */
-{
- fil_addr_t first;
- ulint space;
- xdes_t* descr;
-
- ut_ad(inode && mtr);
-
- space = buf_frame_get_space_id(inode);
-
- first = fil_addr_null;
-
- if (flst_get_len(inode + FSEG_FULL, mtr) > 0) {
-
- first = flst_get_first(inode + FSEG_FULL, mtr);
-
- } else if (flst_get_len(inode + FSEG_NOT_FULL, mtr) > 0) {
-
- first = flst_get_first(inode + FSEG_NOT_FULL, mtr);
-
- } else if (flst_get_len(inode + FSEG_FREE, mtr) > 0) {
-
- first = flst_get_first(inode + FSEG_FREE, mtr);
- }
-
- if (first.page == FIL_NULL) {
-
- return(NULL);
- }
- descr = xdes_lst_get_descriptor(space, first, mtr);
-
- return(descr);
-}
-
-/***********************************************************************
-Validates a segment. */
-static
-ibool
-fseg_validate_low(
-/*==============*/
- /* out: TRUE if ok */
- fseg_inode_t* inode, /* in: segment inode */
- mtr_t* mtr2) /* in: mtr */
-{
- ulint space;
- dulint seg_id;
- mtr_t mtr;
- xdes_t* descr;
- fil_addr_t node_addr;
- ulint n_used = 0;
- ulint n_used2 = 0;
-
- ut_ad(mtr_memo_contains(mtr2, buf_block_align(inode),
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
-
- space = buf_frame_get_space_id(inode);
-
- seg_id = mtr_read_dulint(inode + FSEG_ID, mtr2);
- n_used = mtr_read_ulint(inode + FSEG_NOT_FULL_N_USED,
- MLOG_4BYTES, mtr2);
- flst_validate(inode + FSEG_FREE, mtr2);
- flst_validate(inode + FSEG_NOT_FULL, mtr2);
- flst_validate(inode + FSEG_FULL, mtr2);
-
- /* Validate FSEG_FREE list */
- node_addr = flst_get_first(inode + FSEG_FREE, mtr2);
-
- while (!fil_addr_is_null(node_addr)) {
- mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
-
- descr = xdes_lst_get_descriptor(space, node_addr, &mtr);
-
- ut_a(xdes_get_n_used(descr, &mtr) == 0);
- ut_a(xdes_get_state(descr, &mtr) == XDES_FSEG);
- ut_a(!ut_dulint_cmp(mtr_read_dulint(descr + XDES_ID, &mtr),
- seg_id));
-
- node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
- mtr_commit(&mtr);
- }
-
- /* Validate FSEG_NOT_FULL list */
-
- node_addr = flst_get_first(inode + FSEG_NOT_FULL, mtr2);
-
- while (!fil_addr_is_null(node_addr)) {
- mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
-
- descr = xdes_lst_get_descriptor(space, node_addr, &mtr);
-
- ut_a(xdes_get_n_used(descr, &mtr) > 0);
- ut_a(xdes_get_n_used(descr, &mtr) < FSP_EXTENT_SIZE);
- ut_a(xdes_get_state(descr, &mtr) == XDES_FSEG);
- ut_a(!ut_dulint_cmp(mtr_read_dulint(descr + XDES_ID, &mtr),
- seg_id));
-
- n_used2 += xdes_get_n_used(descr, &mtr);
-
- node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
- mtr_commit(&mtr);
- }
-
- /* Validate FSEG_FULL list */
-
- node_addr = flst_get_first(inode + FSEG_FULL, mtr2);
-
- while (!fil_addr_is_null(node_addr)) {
- mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
-
- descr = xdes_lst_get_descriptor(space, node_addr, &mtr);
-
- ut_a(xdes_get_n_used(descr, &mtr) == FSP_EXTENT_SIZE);
- ut_a(xdes_get_state(descr, &mtr) == XDES_FSEG);
- ut_a(!ut_dulint_cmp(mtr_read_dulint(descr + XDES_ID, &mtr),
- seg_id));
-
- node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
- mtr_commit(&mtr);
- }
-
- ut_a(n_used == n_used2);
-
- return(TRUE);
-}
-
-/***********************************************************************
-Validates a segment. */
-
-ibool
-fseg_validate(
-/*==========*/
- /* out: TRUE if ok */
- fseg_header_t* header, /* in: segment header */
- mtr_t* mtr2) /* in: mtr */
-{
- fseg_inode_t* inode;
- ibool ret;
- ulint space;
-
- space = buf_frame_get_space_id(header);
-
- mtr_x_lock(fil_space_get_latch(space), mtr2);
-
- inode = fseg_inode_get(header, mtr2);
-
- ret = fseg_validate_low(inode, mtr2);
-
- return(ret);
-}
-
-/***********************************************************************
-Writes info of a segment. */
-static
-void
-fseg_print_low(
-/*===========*/
- fseg_inode_t* inode, /* in: segment inode */
- mtr_t* mtr) /* in: mtr */
-{
- ulint space;
- ulint seg_id_low;
- ulint seg_id_high;
- ulint n_used;
- ulint n_frag;
- ulint n_free;
- ulint n_not_full;
- ulint n_full;
- ulint reserved;
- ulint used;
- ulint page_no;
- dulint d_var;
-
- ut_ad(mtr_memo_contains(mtr, buf_block_align(inode),
- MTR_MEMO_PAGE_X_FIX));
- space = buf_frame_get_space_id(inode);
- page_no = buf_frame_get_page_no(inode);
-
- reserved = fseg_n_reserved_pages_low(inode, &used, mtr);
-
- d_var = mtr_read_dulint(inode + FSEG_ID, mtr);
-
- seg_id_low = ut_dulint_get_low(d_var);
- seg_id_high = ut_dulint_get_high(d_var);
-
- n_used = mtr_read_ulint(inode + FSEG_NOT_FULL_N_USED,
- MLOG_4BYTES, mtr);
- n_frag = fseg_get_n_frag_pages(inode, mtr);
- n_free = flst_get_len(inode + FSEG_FREE, mtr);
- n_not_full = flst_get_len(inode + FSEG_NOT_FULL, mtr);
- n_full = flst_get_len(inode + FSEG_FULL, mtr);
-
- fprintf(stderr,
- "SEGMENT id %lu %lu space %lu; page %lu;"
- " res %lu used %lu; full ext %lu\n"
- "fragm pages %lu; free extents %lu;"
- " not full extents %lu: pages %lu\n",
- (ulong) seg_id_high, (ulong) seg_id_low,
- (ulong) space, (ulong) page_no,
- (ulong) reserved, (ulong) used, (ulong) n_full,
- (ulong) n_frag, (ulong) n_free, (ulong) n_not_full,
- (ulong) n_used);
-}
-
-/***********************************************************************
-Writes info of a segment. */
-
-void
-fseg_print(
-/*=======*/
- fseg_header_t* header, /* in: segment header */
- mtr_t* mtr) /* in: mtr */
-{
- fseg_inode_t* inode;
- ulint space;
-
- space = buf_frame_get_space_id(header);
-
- mtr_x_lock(fil_space_get_latch(space), mtr);
-
- inode = fseg_inode_get(header, mtr);
-
- fseg_print_low(inode, mtr);
-}
-
-/***********************************************************************
-Validates the file space system and its segments. */
-
-ibool
-fsp_validate(
-/*=========*/
- /* out: TRUE if ok */
- ulint space) /* in: space id */
-{
- fsp_header_t* header;
- fseg_inode_t* seg_inode;
- page_t* seg_inode_page;
- ulint size;
- ulint free_limit;
- ulint frag_n_used;
- mtr_t mtr;
- mtr_t mtr2;
- xdes_t* descr;
- fil_addr_t node_addr;
- fil_addr_t next_node_addr;
- ulint descr_count = 0;
- ulint n_used = 0;
- ulint n_used2 = 0;
- ulint n_full_frag_pages;
- ulint n;
- ulint seg_inode_len_free;
- ulint seg_inode_len_full;
-
- /* Start first a mini-transaction mtr2 to lock out all other threads
- from the fsp system */
- mtr_start(&mtr2);
- mtr_x_lock(fil_space_get_latch(space), &mtr2);
-
- mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
-
- header = fsp_get_space_header(space, &mtr);
-
- size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, &mtr);
- free_limit = mtr_read_ulint(header + FSP_FREE_LIMIT,
- MLOG_4BYTES, &mtr);
- frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED,
- MLOG_4BYTES, &mtr);
-
- n_full_frag_pages = FSP_EXTENT_SIZE
- * flst_get_len(header + FSP_FULL_FRAG, &mtr);
-
- if (UNIV_UNLIKELY(free_limit > size)) {
-
- ut_a(space != 0);
- ut_a(size < FSP_EXTENT_SIZE);
- }
-
- flst_validate(header + FSP_FREE, &mtr);
- flst_validate(header + FSP_FREE_FRAG, &mtr);
- flst_validate(header + FSP_FULL_FRAG, &mtr);
-
- mtr_commit(&mtr);
-
- /* Validate FSP_FREE list */
- mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
-
- header = fsp_get_space_header(space, &mtr);
- node_addr = flst_get_first(header + FSP_FREE, &mtr);
-
- mtr_commit(&mtr);
-
- while (!fil_addr_is_null(node_addr)) {
- mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
-
- descr_count++;
- descr = xdes_lst_get_descriptor(space, node_addr, &mtr);
-
- ut_a(xdes_get_n_used(descr, &mtr) == 0);
- ut_a(xdes_get_state(descr, &mtr) == XDES_FREE);
-
- node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
- mtr_commit(&mtr);
- }
-
- /* Validate FSP_FREE_FRAG list */
- mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
-
- header = fsp_get_space_header(space, &mtr);
- node_addr = flst_get_first(header + FSP_FREE_FRAG, &mtr);
-
- mtr_commit(&mtr);
-
- while (!fil_addr_is_null(node_addr)) {
- mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
-
- descr_count++;
- descr = xdes_lst_get_descriptor(space, node_addr, &mtr);
-
- ut_a(xdes_get_n_used(descr, &mtr) > 0);
- ut_a(xdes_get_n_used(descr, &mtr) < FSP_EXTENT_SIZE);
- ut_a(xdes_get_state(descr, &mtr) == XDES_FREE_FRAG);
-
- n_used += xdes_get_n_used(descr, &mtr);
- node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
-
- mtr_commit(&mtr);
- }
-
- /* Validate FSP_FULL_FRAG list */
- mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
-
- header = fsp_get_space_header(space, &mtr);
- node_addr = flst_get_first(header + FSP_FULL_FRAG, &mtr);
-
- mtr_commit(&mtr);
-
- while (!fil_addr_is_null(node_addr)) {
- mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
-
- descr_count++;
- descr = xdes_lst_get_descriptor(space, node_addr, &mtr);
-
- ut_a(xdes_get_n_used(descr, &mtr) == FSP_EXTENT_SIZE);
- ut_a(xdes_get_state(descr, &mtr) == XDES_FULL_FRAG);
-
- node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
- mtr_commit(&mtr);
- }
-
- /* Validate segments */
- mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
-
- header = fsp_get_space_header(space, &mtr);
-
- node_addr = flst_get_first(header + FSP_SEG_INODES_FULL, &mtr);
-
- seg_inode_len_full = flst_get_len(header + FSP_SEG_INODES_FULL, &mtr);
-
- mtr_commit(&mtr);
-
- while (!fil_addr_is_null(node_addr)) {
-
- for (n = 0; n < FSP_SEG_INODES_PER_PAGE; n++) {
-
- mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
-
- seg_inode_page = fut_get_ptr(
- space, node_addr, RW_X_LATCH, &mtr)
- - FSEG_INODE_PAGE_NODE;
-
- seg_inode = fsp_seg_inode_page_get_nth_inode(
- seg_inode_page, n, &mtr);
- ut_a(ut_dulint_cmp(
- mach_read_from_8(seg_inode + FSEG_ID),
- ut_dulint_zero) != 0);
- fseg_validate_low(seg_inode, &mtr);
-
- descr_count += flst_get_len(seg_inode + FSEG_FREE,
- &mtr);
- descr_count += flst_get_len(seg_inode + FSEG_FULL,
- &mtr);
- descr_count += flst_get_len(seg_inode + FSEG_NOT_FULL,
- &mtr);
-
- n_used2 += fseg_get_n_frag_pages(seg_inode, &mtr);
-
- next_node_addr = flst_get_next_addr(
- seg_inode_page + FSEG_INODE_PAGE_NODE, &mtr);
- mtr_commit(&mtr);
- }
-
- node_addr = next_node_addr;
- }
-
- mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
-
- header = fsp_get_space_header(space, &mtr);
-
- node_addr = flst_get_first(header + FSP_SEG_INODES_FREE, &mtr);
-
- seg_inode_len_free = flst_get_len(header + FSP_SEG_INODES_FREE, &mtr);
-
- mtr_commit(&mtr);
-
- while (!fil_addr_is_null(node_addr)) {
-
- for (n = 0; n < FSP_SEG_INODES_PER_PAGE; n++) {
-
- mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
-
- seg_inode_page = fut_get_ptr(
- space, node_addr, RW_X_LATCH, &mtr)
- - FSEG_INODE_PAGE_NODE;
-
- seg_inode = fsp_seg_inode_page_get_nth_inode(
- seg_inode_page, n, &mtr);
- if (ut_dulint_cmp(
- mach_read_from_8(seg_inode + FSEG_ID),
- ut_dulint_zero) != 0) {
- fseg_validate_low(seg_inode, &mtr);
-
- descr_count += flst_get_len(
- seg_inode + FSEG_FREE, &mtr);
- descr_count += flst_get_len(
- seg_inode + FSEG_FULL, &mtr);
- descr_count += flst_get_len(
- seg_inode + FSEG_NOT_FULL, &mtr);
- n_used2 += fseg_get_n_frag_pages(
- seg_inode, &mtr);
- }
-
- next_node_addr = flst_get_next_addr(
- seg_inode_page + FSEG_INODE_PAGE_NODE, &mtr);
- mtr_commit(&mtr);
- }
-
- node_addr = next_node_addr;
- }
-
- ut_a(descr_count * FSP_EXTENT_SIZE == free_limit);
- ut_a(n_used + n_full_frag_pages
- == n_used2 + 2* ((free_limit + XDES_DESCRIBED_PER_PAGE - 1)
- / XDES_DESCRIBED_PER_PAGE)
- + seg_inode_len_full + seg_inode_len_free);
- ut_a(frag_n_used == n_used);
-
- mtr_commit(&mtr2);
-
- return(TRUE);
-}
-
-/***********************************************************************
-Prints info of a file space. */
-
-void
-fsp_print(
-/*======*/
- ulint space) /* in: space id */
-{
- fsp_header_t* header;
- fseg_inode_t* seg_inode;
- page_t* seg_inode_page;
- ulint size;
- ulint free_limit;
- ulint frag_n_used;
- fil_addr_t node_addr;
- fil_addr_t next_node_addr;
- ulint n_free;
- ulint n_free_frag;
- ulint n_full_frag;
- ulint seg_id_low;
- ulint seg_id_high;
- ulint n;
- ulint n_segs = 0;
- dulint d_var;
- mtr_t mtr;
- mtr_t mtr2;
-
- /* Start first a mini-transaction mtr2 to lock out all other threads
- from the fsp system */
-
- mtr_start(&mtr2);
-
- mtr_x_lock(fil_space_get_latch(space), &mtr2);
-
- mtr_start(&mtr);
-
- mtr_x_lock(fil_space_get_latch(space), &mtr);
-
- header = fsp_get_space_header(space, &mtr);
-
- size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, &mtr);
-
- free_limit = mtr_read_ulint(header + FSP_FREE_LIMIT, MLOG_4BYTES,
- &mtr);
- frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES,
- &mtr);
- n_free = flst_get_len(header + FSP_FREE, &mtr);
- n_free_frag = flst_get_len(header + FSP_FREE_FRAG, &mtr);
- n_full_frag = flst_get_len(header + FSP_FULL_FRAG, &mtr);
-
- d_var = mtr_read_dulint(header + FSP_SEG_ID, &mtr);
-
- seg_id_low = ut_dulint_get_low(d_var);
- seg_id_high = ut_dulint_get_high(d_var);
-
- fprintf(stderr,
- "FILE SPACE INFO: id %lu\n"
- "size %lu, free limit %lu, free extents %lu\n"
- "not full frag extents %lu: used pages %lu,"
- " full frag extents %lu\n"
- "first seg id not used %lu %lu\n",
- (long) space,
- (ulong) size, (ulong) free_limit, (ulong) n_free,
- (ulong) n_free_frag, (ulong) frag_n_used, (ulong) n_full_frag,
- (ulong) seg_id_high, (ulong) seg_id_low);
-
- mtr_commit(&mtr);
-
- /* Print segments */
-
- mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
-
- header = fsp_get_space_header(space, &mtr);
-
- node_addr = flst_get_first(header + FSP_SEG_INODES_FULL, &mtr);
-
- mtr_commit(&mtr);
-
- while (!fil_addr_is_null(node_addr)) {
-
- for (n = 0; n < FSP_SEG_INODES_PER_PAGE; n++) {
-
- mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
-
- seg_inode_page = fut_get_ptr(
- space, node_addr, RW_X_LATCH, &mtr)
- - FSEG_INODE_PAGE_NODE;
-
- seg_inode = fsp_seg_inode_page_get_nth_inode(
- seg_inode_page, n, &mtr);
- ut_a(ut_dulint_cmp(
- mach_read_from_8(seg_inode + FSEG_ID),
- ut_dulint_zero) != 0);
- fseg_print_low(seg_inode, &mtr);
-
- n_segs++;
-
- next_node_addr = flst_get_next_addr(
- seg_inode_page + FSEG_INODE_PAGE_NODE, &mtr);
- mtr_commit(&mtr);
- }
-
- node_addr = next_node_addr;
- }
-
- mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
-
- header = fsp_get_space_header(space, &mtr);
-
- node_addr = flst_get_first(header + FSP_SEG_INODES_FREE, &mtr);
-
- mtr_commit(&mtr);
-
- while (!fil_addr_is_null(node_addr)) {
-
- for (n = 0; n < FSP_SEG_INODES_PER_PAGE; n++) {
-
- mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space), &mtr);
-
- seg_inode_page = fut_get_ptr(
- space, node_addr, RW_X_LATCH, &mtr)
- - FSEG_INODE_PAGE_NODE;
-
- seg_inode = fsp_seg_inode_page_get_nth_inode(
- seg_inode_page, n, &mtr);
- if (ut_dulint_cmp(
- mach_read_from_8(seg_inode + FSEG_ID),
- ut_dulint_zero) != 0) {
-
- fseg_print_low(seg_inode, &mtr);
- n_segs++;
- }
-
- next_node_addr = flst_get_next_addr(
- seg_inode_page + FSEG_INODE_PAGE_NODE, &mtr);
- mtr_commit(&mtr);
- }
-
- node_addr = next_node_addr;
- }
-
- mtr_commit(&mtr2);
-
- fprintf(stderr, "NUMBER of file segments: %lu\n", (ulong) n_segs);
-}
diff --git a/storage/innobase/fut/fut0fut.c b/storage/innobase/fut/fut0fut.c
deleted file mode 100644
index 7f7a8fa39e7..00000000000
--- a/storage/innobase/fut/fut0fut.c
+++ /dev/null
@@ -1,14 +0,0 @@
-/**********************************************************************
-File-based utilities
-
-(c) 1995 Innobase Oy
-
-Created 12/13/1995 Heikki Tuuri
-***********************************************************************/
-
-#include "fut0fut.h"
-
-#ifdef UNIV_NONINL
-#include "fut0fut.ic"
-#endif
-
diff --git a/storage/innobase/fut/fut0lst.c b/storage/innobase/fut/fut0lst.c
deleted file mode 100644
index 75fa8bf5552..00000000000
--- a/storage/innobase/fut/fut0lst.c
+++ /dev/null
@@ -1,518 +0,0 @@
-/**********************************************************************
-File-based list utilities
-
-(c) 1995 Innobase Oy
-
-Created 11/28/1995 Heikki Tuuri
-***********************************************************************/
-
-#include "fut0lst.h"
-
-#ifdef UNIV_NONINL
-#include "fut0lst.ic"
-#endif
-
-#include "buf0buf.h"
-
-
-/************************************************************************
-Adds a node to an empty list. */
-static
-void
-flst_add_to_empty(
-/*==============*/
- flst_base_node_t* base, /* in: pointer to base node of
- empty list */
- flst_node_t* node, /* in: node to add */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- ulint space;
- fil_addr_t node_addr;
- ulint len;
-
- ut_ad(mtr && base && node);
- ut_ad(base != node);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(node),
- MTR_MEMO_PAGE_X_FIX));
- len = flst_get_len(base, mtr);
- ut_a(len == 0);
-
- buf_ptr_get_fsp_addr(node, &space, &node_addr);
-
- /* Update first and last fields of base node */
- flst_write_addr(base + FLST_FIRST, node_addr, mtr);
- flst_write_addr(base + FLST_LAST, node_addr, mtr);
-
- /* Set prev and next fields of node to add */
- flst_write_addr(node + FLST_PREV, fil_addr_null, mtr);
- flst_write_addr(node + FLST_NEXT, fil_addr_null, mtr);
-
- /* Update len of base node */
- mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr);
-}
-
-/************************************************************************
-Adds a node as the last node in a list. */
-
-void
-flst_add_last(
-/*==========*/
- flst_base_node_t* base, /* in: pointer to base node of list */
- flst_node_t* node, /* in: node to add */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- ulint space;
- fil_addr_t node_addr;
- ulint len;
- fil_addr_t last_addr;
- flst_node_t* last_node;
-
- ut_ad(mtr && base && node);
- ut_ad(base != node);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(node),
- MTR_MEMO_PAGE_X_FIX));
- len = flst_get_len(base, mtr);
- last_addr = flst_get_last(base, mtr);
-
- buf_ptr_get_fsp_addr(node, &space, &node_addr);
-
- /* If the list is not empty, call flst_insert_after */
- if (len != 0) {
- if (last_addr.page == node_addr.page) {
- last_node = buf_frame_align(node) + last_addr.boffset;
- } else {
- last_node = fut_get_ptr(space, last_addr, RW_X_LATCH,
- mtr);
- }
-
- flst_insert_after(base, last_node, node, mtr);
- } else {
- /* else call flst_add_to_empty */
- flst_add_to_empty(base, node, mtr);
- }
-}
-
-/************************************************************************
-Adds a node as the first node in a list. */
-
-void
-flst_add_first(
-/*===========*/
- flst_base_node_t* base, /* in: pointer to base node of list */
- flst_node_t* node, /* in: node to add */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- ulint space;
- fil_addr_t node_addr;
- ulint len;
- fil_addr_t first_addr;
- flst_node_t* first_node;
-
- ut_ad(mtr && base && node);
- ut_ad(base != node);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(node),
- MTR_MEMO_PAGE_X_FIX));
- len = flst_get_len(base, mtr);
- first_addr = flst_get_first(base, mtr);
-
- buf_ptr_get_fsp_addr(node, &space, &node_addr);
-
- /* If the list is not empty, call flst_insert_before */
- if (len != 0) {
- if (first_addr.page == node_addr.page) {
- first_node = buf_frame_align(node)
- + first_addr.boffset;
- } else {
- first_node = fut_get_ptr(space, first_addr,
- RW_X_LATCH, mtr);
- }
-
- flst_insert_before(base, node, first_node, mtr);
- } else {
- /* else call flst_add_to_empty */
- flst_add_to_empty(base, node, mtr);
- }
-}
-
-/************************************************************************
-Inserts a node after another in a list. */
-
-void
-flst_insert_after(
-/*==============*/
- flst_base_node_t* base, /* in: pointer to base node of list */
- flst_node_t* node1, /* in: node to insert after */
- flst_node_t* node2, /* in: node to add */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- ulint space;
- fil_addr_t node1_addr;
- fil_addr_t node2_addr;
- flst_node_t* node3;
- fil_addr_t node3_addr;
- ulint len;
-
- ut_ad(mtr && node1 && node2 && base);
- ut_ad(base != node1);
- ut_ad(base != node2);
- ut_ad(node2 != node1);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(node1),
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(node2),
- MTR_MEMO_PAGE_X_FIX));
-
- buf_ptr_get_fsp_addr(node1, &space, &node1_addr);
- buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
-
- node3_addr = flst_get_next_addr(node1, mtr);
-
- /* Set prev and next fields of node2 */
- flst_write_addr(node2 + FLST_PREV, node1_addr, mtr);
- flst_write_addr(node2 + FLST_NEXT, node3_addr, mtr);
-
- if (!fil_addr_is_null(node3_addr)) {
- /* Update prev field of node3 */
- node3 = fut_get_ptr(space, node3_addr, RW_X_LATCH, mtr);
- flst_write_addr(node3 + FLST_PREV, node2_addr, mtr);
- } else {
- /* node1 was last in list: update last field in base */
- flst_write_addr(base + FLST_LAST, node2_addr, mtr);
- }
-
- /* Set next field of node1 */
- flst_write_addr(node1 + FLST_NEXT, node2_addr, mtr);
-
- /* Update len of base node */
- len = flst_get_len(base, mtr);
- mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr);
-}
-
-/************************************************************************
-Inserts a node before another in a list. */
-
-void
-flst_insert_before(
-/*===============*/
- flst_base_node_t* base, /* in: pointer to base node of list */
- flst_node_t* node2, /* in: node to insert */
- flst_node_t* node3, /* in: node to insert before */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- ulint space;
- flst_node_t* node1;
- fil_addr_t node1_addr;
- fil_addr_t node2_addr;
- fil_addr_t node3_addr;
- ulint len;
-
- ut_ad(mtr && node2 && node3 && base);
- ut_ad(base != node2);
- ut_ad(base != node3);
- ut_ad(node2 != node3);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(node2),
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(node3),
- MTR_MEMO_PAGE_X_FIX));
-
- buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
- buf_ptr_get_fsp_addr(node3, &space, &node3_addr);
-
- node1_addr = flst_get_prev_addr(node3, mtr);
-
- /* Set prev and next fields of node2 */
- flst_write_addr(node2 + FLST_PREV, node1_addr, mtr);
- flst_write_addr(node2 + FLST_NEXT, node3_addr, mtr);
-
- if (!fil_addr_is_null(node1_addr)) {
- /* Update next field of node1 */
- node1 = fut_get_ptr(space, node1_addr, RW_X_LATCH, mtr);
- flst_write_addr(node1 + FLST_NEXT, node2_addr, mtr);
- } else {
- /* node3 was first in list: update first field in base */
- flst_write_addr(base + FLST_FIRST, node2_addr, mtr);
- }
-
- /* Set prev field of node3 */
- flst_write_addr(node3 + FLST_PREV, node2_addr, mtr);
-
- /* Update len of base node */
- len = flst_get_len(base, mtr);
- mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr);
-}
-
-/************************************************************************
-Removes a node. */
-
-void
-flst_remove(
-/*========*/
- flst_base_node_t* base, /* in: pointer to base node of list */
- flst_node_t* node2, /* in: node to remove */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- ulint space;
- flst_node_t* node1;
- fil_addr_t node1_addr;
- fil_addr_t node2_addr;
- flst_node_t* node3;
- fil_addr_t node3_addr;
- ulint len;
-
- ut_ad(mtr && node2 && base);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(node2),
- MTR_MEMO_PAGE_X_FIX));
-
- buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
-
- node1_addr = flst_get_prev_addr(node2, mtr);
- node3_addr = flst_get_next_addr(node2, mtr);
-
- if (!fil_addr_is_null(node1_addr)) {
-
- /* Update next field of node1 */
-
- if (node1_addr.page == node2_addr.page) {
-
- node1 = buf_frame_align(node2) + node1_addr.boffset;
- } else {
- node1 = fut_get_ptr(space, node1_addr, RW_X_LATCH,
- mtr);
- }
-
- ut_ad(node1 != node2);
-
- flst_write_addr(node1 + FLST_NEXT, node3_addr, mtr);
- } else {
- /* node2 was first in list: update first field in base */
- flst_write_addr(base + FLST_FIRST, node3_addr, mtr);
- }
-
- if (!fil_addr_is_null(node3_addr)) {
- /* Update prev field of node3 */
-
- if (node3_addr.page == node2_addr.page) {
-
- node3 = buf_frame_align(node2) + node3_addr.boffset;
- } else {
- node3 = fut_get_ptr(space, node3_addr, RW_X_LATCH,
- mtr);
- }
-
- ut_ad(node2 != node3);
-
- flst_write_addr(node3 + FLST_PREV, node1_addr, mtr);
- } else {
- /* node2 was last in list: update last field in base */
- flst_write_addr(base + FLST_LAST, node1_addr, mtr);
- }
-
- /* Update len of base node */
- len = flst_get_len(base, mtr);
- ut_ad(len > 0);
-
- mlog_write_ulint(base + FLST_LEN, len - 1, MLOG_4BYTES, mtr);
-}
-
-/************************************************************************
-Cuts off the tail of the list, including the node given. The number of
-nodes which will be removed must be provided by the caller, as this function
-does not measure the length of the tail. */
-
-void
-flst_cut_end(
-/*=========*/
- flst_base_node_t* base, /* in: pointer to base node of list */
- flst_node_t* node2, /* in: first node to remove */
- ulint n_nodes,/* in: number of nodes to remove,
- must be >= 1 */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- ulint space;
- flst_node_t* node1;
- fil_addr_t node1_addr;
- fil_addr_t node2_addr;
- ulint len;
-
- ut_ad(mtr && node2 && base);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(node2),
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(n_nodes > 0);
-
- buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
-
- node1_addr = flst_get_prev_addr(node2, mtr);
-
- if (!fil_addr_is_null(node1_addr)) {
-
- /* Update next field of node1 */
-
- if (node1_addr.page == node2_addr.page) {
-
- node1 = buf_frame_align(node2) + node1_addr.boffset;
- } else {
- node1 = fut_get_ptr(space, node1_addr, RW_X_LATCH,
- mtr);
- }
-
- flst_write_addr(node1 + FLST_NEXT, fil_addr_null, mtr);
- } else {
- /* node2 was first in list: update the field in base */
- flst_write_addr(base + FLST_FIRST, fil_addr_null, mtr);
- }
-
- flst_write_addr(base + FLST_LAST, node1_addr, mtr);
-
- /* Update len of base node */
- len = flst_get_len(base, mtr);
- ut_ad(len >= n_nodes);
-
- mlog_write_ulint(base + FLST_LEN, len - n_nodes, MLOG_4BYTES, mtr);
-}
-
-/************************************************************************
-Cuts off the tail of the list, not including the given node. The number of
-nodes which will be removed must be provided by the caller, as this function
-does not measure the length of the tail. */
-
-void
-flst_truncate_end(
-/*==============*/
- flst_base_node_t* base, /* in: pointer to base node of list */
- flst_node_t* node2, /* in: first node not to remove */
- ulint n_nodes,/* in: number of nodes to remove */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- fil_addr_t node2_addr;
- ulint len;
- ulint space;
-
- ut_ad(mtr && node2 && base);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains(mtr, buf_block_align(node2),
- MTR_MEMO_PAGE_X_FIX));
- if (n_nodes == 0) {
-
- ut_ad(fil_addr_is_null(flst_get_next_addr(node2, mtr)));
-
- return;
- }
-
- buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
-
- /* Update next field of node2 */
- flst_write_addr(node2 + FLST_NEXT, fil_addr_null, mtr);
-
- flst_write_addr(base + FLST_LAST, node2_addr, mtr);
-
- /* Update len of base node */
- len = flst_get_len(base, mtr);
- ut_ad(len >= n_nodes);
-
- mlog_write_ulint(base + FLST_LEN, len - n_nodes, MLOG_4BYTES, mtr);
-}
-
-/************************************************************************
-Validates a file-based list. */
-
-ibool
-flst_validate(
-/*==========*/
- /* out: TRUE if ok */
- flst_base_node_t* base, /* in: pointer to base node of list */
- mtr_t* mtr1) /* in: mtr */
-{
- ulint space;
- flst_node_t* node;
- fil_addr_t node_addr;
- fil_addr_t base_addr;
- ulint len;
- ulint i;
- mtr_t mtr2;
-
- ut_ad(base);
- ut_ad(mtr_memo_contains(mtr1, buf_block_align(base),
- MTR_MEMO_PAGE_X_FIX));
-
- /* We use two mini-transaction handles: the first is used to
- lock the base node, and prevent other threads from modifying the
- list. The second is used to traverse the list. We cannot run the
- second mtr without committing it at times, because if the list
- is long, then the x-locked pages could fill the buffer resulting
- in a deadlock. */
-
- /* Find out the space id */
- buf_ptr_get_fsp_addr(base, &space, &base_addr);
-
- len = flst_get_len(base, mtr1);
- node_addr = flst_get_first(base, mtr1);
-
- for (i = 0; i < len; i++) {
- mtr_start(&mtr2);
-
- node = fut_get_ptr(space, node_addr, RW_X_LATCH, &mtr2);
- node_addr = flst_get_next_addr(node, &mtr2);
-
- mtr_commit(&mtr2); /* Commit mtr2 each round to prevent buffer
- becoming full */
- }
-
- ut_a(fil_addr_is_null(node_addr));
-
- node_addr = flst_get_last(base, mtr1);
-
- for (i = 0; i < len; i++) {
- mtr_start(&mtr2);
-
- node = fut_get_ptr(space, node_addr, RW_X_LATCH, &mtr2);
- node_addr = flst_get_prev_addr(node, &mtr2);
-
- mtr_commit(&mtr2); /* Commit mtr2 each round to prevent buffer
- becoming full */
- }
-
- ut_a(fil_addr_is_null(node_addr));
-
- return(TRUE);
-}
-
-/************************************************************************
-Prints info of a file-based list. */
-
-void
-flst_print(
-/*=======*/
- flst_base_node_t* base, /* in: pointer to base node of list */
- mtr_t* mtr) /* in: mtr */
-{
- buf_frame_t* frame;
- ulint len;
-
- ut_ad(base && mtr);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
- MTR_MEMO_PAGE_X_FIX));
- frame = buf_frame_align(base);
-
- len = flst_get_len(base, mtr);
-
- fprintf(stderr,
- "FILE-BASED LIST:\n"
- "Base node in space %lu page %lu byte offset %lu; len %lu\n",
- (ulong) buf_frame_get_space_id(frame),
- (ulong) buf_frame_get_page_no(frame),
- (ulong) (base - frame), (ulong) len);
-}
diff --git a/storage/innobase/ha/ha0ha.c b/storage/innobase/ha/ha0ha.c
deleted file mode 100644
index 077497493b4..00000000000
--- a/storage/innobase/ha/ha0ha.c
+++ /dev/null
@@ -1,380 +0,0 @@
-/************************************************************************
-The hash table with external chains
-
-(c) 1994-1997 Innobase Oy
-
-Created 8/22/1994 Heikki Tuuri
-*************************************************************************/
-
-#include "ha0ha.h"
-#ifdef UNIV_NONINL
-#include "ha0ha.ic"
-#endif
-
-#include "buf0buf.h"
-
-/*****************************************************************
-Creates a hash table with >= n array cells. The actual number of cells is
-chosen to be a prime number slightly bigger than n. */
-
-hash_table_t*
-ha_create_func(
-/*===========*/
- /* out, own: created table */
- ibool in_btr_search, /* in: TRUE if the hash table is used in
- the btr_search module */
- ulint n, /* in: number of array cells */
-#ifdef UNIV_SYNC_DEBUG
- ulint mutex_level, /* in: level of the mutexes in the latching
- order: this is used in the debug version */
-#endif /* UNIV_SYNC_DEBUG */
- ulint n_mutexes) /* in: number of mutexes to protect the
- hash table: must be a power of 2, or 0 */
-{
- hash_table_t* table;
- ulint i;
-
- table = hash_create(n);
-
- if (in_btr_search) {
- table->adaptive = TRUE;
- } else {
- table->adaptive = FALSE;
- }
-
- /* Creating MEM_HEAP_BTR_SEARCH type heaps can potentially fail,
- but in practise it never should in this case, hence the asserts. */
-
- if (n_mutexes == 0) {
- if (in_btr_search) {
- table->heap = mem_heap_create_in_btr_search(4096);
- ut_a(table->heap);
- } else {
- table->heap = mem_heap_create_in_buffer(4096);
- }
-
- return(table);
- }
-
- hash_create_mutexes(table, n_mutexes, mutex_level);
-
- table->heaps = mem_alloc(n_mutexes * sizeof(void*));
-
- for (i = 0; i < n_mutexes; i++) {
- if (in_btr_search) {
- table->heaps[i] = mem_heap_create_in_btr_search(4096);
- ut_a(table->heaps[i]);
- } else {
- table->heaps[i] = mem_heap_create_in_buffer(4096);
- }
- }
-
- return(table);
-}
-
-/*****************************************************************
-Inserts an entry into a hash table. If an entry with the same fold number
-is found, its node is updated to point to the new data, and no new node
-is inserted. */
-
-ibool
-ha_insert_for_fold(
-/*===============*/
- /* out: TRUE if succeed, FALSE if no more
- memory could be allocated */
- hash_table_t* table, /* in: hash table */
- ulint fold, /* in: folded value of data; if a node with
- the same fold value already exists, it is
- updated to point to the same data, and no new
- node is created! */
- void* data) /* in: data, must not be NULL */
-{
- hash_cell_t* cell;
- ha_node_t* node;
- ha_node_t* prev_node;
- buf_block_t* prev_block;
- ulint hash;
-
- ut_ad(table && data);
- ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
-
- hash = hash_calc_hash(fold, table);
-
- cell = hash_get_nth_cell(table, hash);
-
- prev_node = cell->node;
-
- while (prev_node != NULL) {
- if (prev_node->fold == fold) {
- if (table->adaptive) {
- prev_block = buf_block_align(prev_node->data);
- ut_a(prev_block->n_pointers > 0);
- prev_block->n_pointers--;
- buf_block_align(data)->n_pointers++;
- }
-
- prev_node->data = data;
-
- return(TRUE);
- }
-
- prev_node = prev_node->next;
- }
-
- /* We have to allocate a new chain node */
-
- node = mem_heap_alloc(hash_get_heap(table, fold), sizeof(ha_node_t));
-
- if (node == NULL) {
- /* It was a btr search type memory heap and at the moment
- no more memory could be allocated: return */
-
- ut_ad(hash_get_heap(table, fold)->type & MEM_HEAP_BTR_SEARCH);
-
- return(FALSE);
- }
-
- ha_node_set_data(node, data);
-
- if (table->adaptive) {
- buf_block_align(data)->n_pointers++;
- }
-
- node->fold = fold;
-
- node->next = NULL;
-
- prev_node = cell->node;
-
- if (prev_node == NULL) {
-
- cell->node = node;
-
- return(TRUE);
- }
-
- while (prev_node->next != NULL) {
-
- prev_node = prev_node->next;
- }
-
- prev_node->next = node;
-
- return(TRUE);
-}
-
-/***************************************************************
-Deletes a hash node. */
-
-void
-ha_delete_hash_node(
-/*================*/
- hash_table_t* table, /* in: hash table */
- ha_node_t* del_node) /* in: node to be deleted */
-{
- if (table->adaptive) {
- ut_a(buf_block_align(del_node->data)->n_pointers > 0);
- buf_block_align(del_node->data)->n_pointers--;
- }
-
- HASH_DELETE_AND_COMPACT(ha_node_t, next, table, del_node);
-}
-
-/*****************************************************************
-Deletes an entry from a hash table. */
-
-void
-ha_delete(
-/*======*/
- hash_table_t* table, /* in: hash table */
- ulint fold, /* in: folded value of data */
- void* data) /* in: data, must not be NULL and must exist
- in the hash table */
-{
- ha_node_t* node;
-
- ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
-
- node = ha_search_with_data(table, fold, data);
-
- ut_a(node);
-
- ha_delete_hash_node(table, node);
-}
-
-/*************************************************************
-Looks for an element when we know the pointer to the data, and updates
-the pointer to data, if found. */
-
-void
-ha_search_and_update_if_found(
-/*==========================*/
- hash_table_t* table, /* in: hash table */
- ulint fold, /* in: folded value of the searched data */
- void* data, /* in: pointer to the data */
- void* new_data)/* in: new pointer to the data */
-{
- ha_node_t* node;
-
- ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
-
- node = ha_search_with_data(table, fold, data);
-
- if (node) {
- if (table->adaptive) {
- ut_a(buf_block_align(node->data)->n_pointers > 0);
- buf_block_align(node->data)->n_pointers--;
- buf_block_align(new_data)->n_pointers++;
- }
-
- node->data = new_data;
- }
-}
-
-/*********************************************************************
-Removes from the chain determined by fold all nodes whose data pointer
-points to the page given. */
-
-void
-ha_remove_all_nodes_to_page(
-/*========================*/
- hash_table_t* table, /* in: hash table */
- ulint fold, /* in: fold value */
- page_t* page) /* in: buffer page */
-{
- ha_node_t* node;
-
- ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
-
- node = ha_chain_get_first(table, fold);
-
- while (node) {
- if (buf_frame_align(ha_node_get_data(node)) == page) {
-
- /* Remove the hash node */
-
- ha_delete_hash_node(table, node);
-
- /* Start again from the first node in the chain
- because the deletion may compact the heap of
- nodes and move other nodes! */
-
- node = ha_chain_get_first(table, fold);
- } else {
- node = ha_chain_get_next(node);
- }
- }
-#ifdef UNIV_DEBUG
- /* Check that all nodes really got deleted */
-
- node = ha_chain_get_first(table, fold);
-
- while (node) {
- ut_a(buf_frame_align(ha_node_get_data(node)) != page);
-
- node = ha_chain_get_next(node);
- }
-#endif
-}
-
-/*****************************************************************
-Validates a given range of the cells in hash table. */
-
-ibool
-ha_validate(
-/*========*/
- /* out: TRUE if ok */
- hash_table_t* table, /* in: hash table */
- ulint start_index, /* in: start index */
- ulint end_index) /* in: end index */
-{
- hash_cell_t* cell;
- ha_node_t* node;
- ibool ok = TRUE;
- ulint i;
-
- ut_a(start_index <= end_index);
- ut_a(start_index < hash_get_n_cells(table));
- ut_a(end_index < hash_get_n_cells(table));
-
- for (i = start_index; i <= end_index; i++) {
-
- cell = hash_get_nth_cell(table, i);
-
- node = cell->node;
-
- while (node) {
- if (hash_calc_hash(node->fold, table) != i) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- "InnoDB: Error: hash table node"
- " fold value %lu does not\n"
- "InnoDB: match the cell number %lu.\n",
- (ulong) node->fold, (ulong) i);
-
- ok = FALSE;
- }
-
- node = node->next;
- }
- }
-
- return(ok);
-}
-
-/*****************************************************************
-Prints info of a hash table. */
-
-void
-ha_print_info(
-/*==========*/
- FILE* file, /* in: file where to print */
- hash_table_t* table) /* in: hash table */
-{
-#ifdef UNIV_DEBUG
-/* Some of the code here is disabled for performance reasons in production
-builds, see http://bugs.mysql.com/36941 */
-#define PRINT_USED_CELLS
-#endif /* UNIV_DEBUG */
-
-#ifdef PRINT_USED_CELLS
- hash_cell_t* cell;
- ulint cells = 0;
- ulint i;
-#endif /* PRINT_USED_CELLS */
- ulint n_bufs;
-
-#ifdef PRINT_USED_CELLS
- for (i = 0; i < hash_get_n_cells(table); i++) {
-
- cell = hash_get_nth_cell(table, i);
-
- if (cell->node) {
-
- cells++;
- }
- }
-#endif /* PRINT_USED_CELLS */
-
- fprintf(file, "Hash table size %lu",
- (ulong) hash_get_n_cells(table));
-
-#ifdef PRINT_USED_CELLS
- fprintf(file, ", used cells %lu", (ulong) cells);
-#endif /* PRINT_USED_CELLS */
-
- if (table->heaps == NULL && table->heap != NULL) {
-
- /* This calculation is intended for the adaptive hash
- index: how many buffer frames we have reserved? */
-
- n_bufs = UT_LIST_GET_LEN(table->heap->base) - 1;
-
- if (table->heap->free_block) {
- n_bufs++;
- }
-
- fprintf(file, ", node heap has %lu buffer(s)\n",
- (ulong) n_bufs);
- }
-}
diff --git a/storage/innobase/ha/hash0hash.c b/storage/innobase/ha/hash0hash.c
deleted file mode 100644
index 4807015eee5..00000000000
--- a/storage/innobase/ha/hash0hash.c
+++ /dev/null
@@ -1,153 +0,0 @@
-/******************************************************
-The simple hash table utility
-
-(c) 1997 Innobase Oy
-
-Created 5/20/1997 Heikki Tuuri
-*******************************************************/
-
-#include "hash0hash.h"
-#ifdef UNIV_NONINL
-#include "hash0hash.ic"
-#endif
-
-#include "mem0mem.h"
-
-/****************************************************************
-Reserves the mutex for a fold value in a hash table. */
-
-void
-hash_mutex_enter(
-/*=============*/
- hash_table_t* table, /* in: hash table */
- ulint fold) /* in: fold */
-{
- mutex_enter(hash_get_mutex(table, fold));
-}
-
-/****************************************************************
-Releases the mutex for a fold value in a hash table. */
-
-void
-hash_mutex_exit(
-/*============*/
- hash_table_t* table, /* in: hash table */
- ulint fold) /* in: fold */
-{
- mutex_exit(hash_get_mutex(table, fold));
-}
-
-/****************************************************************
-Reserves all the mutexes of a hash table, in an ascending order. */
-
-void
-hash_mutex_enter_all(
-/*=================*/
- hash_table_t* table) /* in: hash table */
-{
- ulint i;
-
- for (i = 0; i < table->n_mutexes; i++) {
-
- mutex_enter(table->mutexes + i);
- }
-}
-
-/****************************************************************
-Releases all the mutexes of a hash table. */
-
-void
-hash_mutex_exit_all(
-/*================*/
- hash_table_t* table) /* in: hash table */
-{
- ulint i;
-
- for (i = 0; i < table->n_mutexes; i++) {
-
- mutex_exit(table->mutexes + i);
- }
-}
-
-/*****************************************************************
-Creates a hash table with >= n array cells. The actual number of cells is
-chosen to be a prime number slightly bigger than n. */
-
-hash_table_t*
-hash_create(
-/*========*/
- /* out, own: created table */
- ulint n) /* in: number of array cells */
-{
- hash_cell_t* array;
- ulint prime;
- hash_table_t* table;
- ulint i;
- hash_cell_t* cell;
-
- prime = ut_find_prime(n);
-
- table = mem_alloc(sizeof(hash_table_t));
-
- array = ut_malloc(sizeof(hash_cell_t) * prime);
-
- table->adaptive = FALSE;
- table->array = array;
- table->n_cells = prime;
- table->n_mutexes = 0;
- table->mutexes = NULL;
- table->heaps = NULL;
- table->heap = NULL;
- table->magic_n = HASH_TABLE_MAGIC_N;
-
- /* Initialize the cell array */
-
- for (i = 0; i < prime; i++) {
-
- cell = hash_get_nth_cell(table, i);
- cell->node = NULL;
- }
-
- return(table);
-}
-
-/*****************************************************************
-Frees a hash table. */
-
-void
-hash_table_free(
-/*============*/
- hash_table_t* table) /* in, own: hash table */
-{
- ut_a(table->mutexes == NULL);
-
- ut_free(table->array);
- mem_free(table);
-}
-
-/*****************************************************************
-Creates a mutex array to protect a hash table. */
-
-void
-hash_create_mutexes_func(
-/*=====================*/
- hash_table_t* table, /* in: hash table */
-#ifdef UNIV_SYNC_DEBUG
- ulint sync_level, /* in: latching order level of the
- mutexes: used in the debug version */
-#endif /* UNIV_SYNC_DEBUG */
- ulint n_mutexes) /* in: number of mutexes, must be a
- power of 2 */
-{
- ulint i;
-
- ut_a(n_mutexes == ut_2_power_up(n_mutexes));
-
- table->mutexes = mem_alloc(n_mutexes * sizeof(mutex_t));
-
- for (i = 0; i < n_mutexes; i++) {
- mutex_create(table->mutexes + i, sync_level);
- }
-
- table->n_mutexes = n_mutexes;
-}
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
deleted file mode 100644
index 828dcdb843d..00000000000
--- a/storage/innobase/handler/ha_innodb.cc
+++ /dev/null
@@ -1,8534 +0,0 @@
-/* Copyright (C) 2000-2005 MySQL AB & Innobase Oy
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; version 2 of the License.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
-
-/* This file defines the InnoDB handler: the interface between MySQL and InnoDB
-NOTE: You can only use noninlined InnoDB functions in this file, because we
-have disabled the InnoDB inlining in this file. */
-
-/* TODO list for the InnoDB handler in 5.0:
- - Remove the flag trx->active_trans and look at trx->conc_state
- - fix savepoint functions to use savepoint storage area
- - Find out what kind of problems the OS X case-insensitivity causes to
- table and database names; should we 'normalize' the names like we do
- in Windows?
-*/
-
-#ifdef USE_PRAGMA_IMPLEMENTATION
-#pragma implementation // gcc: Class implementation
-#endif
-
-#include <mysql_priv.h>
-#include <mysqld_error.h>
-
-#include <m_ctype.h>
-#include <hash.h>
-#include <myisampack.h>
-#include <mysys_err.h>
-#include <my_sys.h>
-#include "ha_innodb.h"
-#include <mysql/plugin.h>
-
-#ifndef MYSQL_SERVER
-/* This is needed because of Bug #3596. Let us hope that pthread_mutex_t
-is defined the same in both builds: the MySQL server and the InnoDB plugin. */
-extern pthread_mutex_t LOCK_thread_count;
-#endif /* MYSQL_SERVER */
-
-/** to protect innobase_open_files */
-static pthread_mutex_t innobase_share_mutex;
-/** to force correct commit order in binlog */
-static pthread_mutex_t prepare_commit_mutex;
-static ulong commit_threads = 0;
-static pthread_mutex_t commit_threads_m;
-static pthread_cond_t commit_cond;
-static pthread_mutex_t commit_cond_m;
-static bool innodb_inited = 0;
-
-/*
- This needs to exist until the query cache callback is removed
- or learns to pass hton.
-*/
-static handlerton *innodb_hton_ptr;
-
-#define INSIDE_HA_INNOBASE_CC
-
-/* Include necessary InnoDB headers */
-extern "C" {
-#include "../storage/innobase/include/univ.i"
-#include "../storage/innobase/include/os0file.h"
-#include "../storage/innobase/include/os0thread.h"
-#include "../storage/innobase/include/srv0start.h"
-#include "../storage/innobase/include/srv0srv.h"
-#include "../storage/innobase/include/trx0roll.h"
-#include "../storage/innobase/include/trx0trx.h"
-#include "../storage/innobase/include/trx0sys.h"
-#include "../storage/innobase/include/mtr0mtr.h"
-#include "../storage/innobase/include/row0ins.h"
-#include "../storage/innobase/include/row0mysql.h"
-#include "../storage/innobase/include/row0sel.h"
-#include "../storage/innobase/include/row0upd.h"
-#include "../storage/innobase/include/log0log.h"
-#include "../storage/innobase/include/lock0lock.h"
-#include "../storage/innobase/include/dict0crea.h"
-#include "../storage/innobase/include/btr0cur.h"
-#include "../storage/innobase/include/btr0btr.h"
-#include "../storage/innobase/include/fsp0fsp.h"
-#include "../storage/innobase/include/sync0sync.h"
-#include "../storage/innobase/include/fil0fil.h"
-#include "../storage/innobase/include/trx0xa.h"
-#include "../storage/innobase/include/thr0loc.h"
-#include "../storage/innobase/include/ha_prototypes.h"
-}
-
-static const long AUTOINC_OLD_STYLE_LOCKING = 0;
-static const long AUTOINC_NEW_STYLE_LOCKING = 1;
-static const long AUTOINC_NO_LOCKING = 2;
-
-static long innobase_mirrored_log_groups, innobase_log_files_in_group,
- innobase_log_buffer_size, innobase_buffer_pool_awe_mem_mb,
- innobase_additional_mem_pool_size,
- innobase_lock_wait_timeout, innobase_force_recovery,
- innobase_open_files, innobase_autoinc_lock_mode;
-
-static long long innobase_buffer_pool_size, innobase_log_file_size;
-
-/* The default values for the following char* start-up parameters
-are determined in innobase_init below: */
-
-static char* innobase_data_home_dir = NULL;
-static char* innobase_data_file_path = NULL;
-static char* innobase_log_group_home_dir = NULL;
-/* The following has a misleading name: starting from 4.0.5, this also
-affects Windows: */
-static char* innobase_unix_file_flush_method = NULL;
-
-/* Below we have boolean-valued start-up parameters, and their default
-values */
-
-static ulong innobase_fast_shutdown = 1;
-#ifdef UNIV_LOG_ARCHIVE
-static my_bool innobase_log_archive = FALSE;
-static char* innobase_log_arch_dir = NULL;
-#endif /* UNIV_LOG_ARCHIVE */
-static my_bool innobase_use_doublewrite = TRUE;
-static my_bool innobase_use_checksums = TRUE;
-static my_bool innobase_file_per_table = FALSE;
-static my_bool innobase_locks_unsafe_for_binlog = FALSE;
-static my_bool innobase_rollback_on_timeout = FALSE;
-static my_bool innobase_create_status_file = FALSE;
-static my_bool innobase_stats_on_metadata = TRUE;
-static my_bool innobase_adaptive_hash_index = TRUE;
-
-static char* internal_innobase_data_file_path = NULL;
-
-/* Default number of IO per second supported by server. Tunes background
- IO rate. */
-static long innobase_io_capacity = 100;
-
-/* Write dirty pages when pct dirty is less than max pct dirty */
-static my_bool innobase_extra_dirty_writes = TRUE;
-
-/* Max number of IO requests merged to perform large IO in background
- IO threads.
-*/
-long innobase_max_merged_io = 64;
-
-/* Number of background IO threads for read and write. */
-long innobase_read_io_threads, innobase_write_io_threads;
-
-/* Use timer based InnoDB concurrency throttling flag */
-static my_bool innobase_thread_concurrency_timer_based;
-
-/* The following counter is used to convey information to InnoDB
-about server activity: in selects it is not sensible to call
-srv_active_wake_master_thread after each fetch or search, we only do
-it every INNOBASE_WAKE_INTERVAL'th step. */
-
-#define INNOBASE_WAKE_INTERVAL 32
-static ulong innobase_active_counter = 0;
-
-static HASH innobase_open_tables;
-
-#ifdef __NETWARE__ /* some special cleanup for NetWare */
-bool nw_panic = FALSE;
-#endif
-
-static uchar* innobase_get_key(INNOBASE_SHARE *share, size_t *length,
- my_bool not_used __attribute__((unused)));
-static INNOBASE_SHARE *get_share(const char *table_name);
-static void free_share(INNOBASE_SHARE *share);
-static int innobase_close_connection(handlerton *hton, THD* thd);
-static int innobase_commit(handlerton *hton, THD* thd, bool all);
-static int innobase_rollback(handlerton *hton, THD* thd, bool all);
-static int innobase_rollback_to_savepoint(handlerton *hton, THD* thd,
- void *savepoint);
-static int innobase_savepoint(handlerton *hton, THD* thd, void *savepoint);
-static int innobase_release_savepoint(handlerton *hton, THD* thd,
- void *savepoint);
-static handler *innobase_create_handler(handlerton *hton,
- TABLE_SHARE *table,
- MEM_ROOT *mem_root);
-
-static const char innobase_hton_name[]= "InnoDB";
-
-
-static MYSQL_THDVAR_BOOL(support_xa, PLUGIN_VAR_OPCMDARG,
- "Enable InnoDB support for the XA two-phase commit",
- /* check_func */ NULL, /* update_func */ NULL,
- /* default */ TRUE);
-
-static MYSQL_THDVAR_BOOL(table_locks, PLUGIN_VAR_OPCMDARG,
- "Enable InnoDB locking in LOCK TABLES",
- /* check_func */ NULL, /* update_func */ NULL,
- /* default */ TRUE);
-
-static handler *innobase_create_handler(handlerton *hton,
- TABLE_SHARE *table,
- MEM_ROOT *mem_root)
-{
- return new (mem_root) ha_innobase(hton, table);
-}
-
-/***********************************************************************
-This function is used to prepare X/Open XA distributed transaction */
-static
-int
-innobase_xa_prepare(
-/*================*/
- /* out: 0 or error number */
- handlerton* hton,
- THD* thd, /* in: handle to the MySQL thread of the user
- whose XA transaction should be prepared */
- bool all); /* in: TRUE - commit transaction
- FALSE - the current SQL statement ended */
-/***********************************************************************
-This function is used to recover X/Open XA distributed transactions */
-static
-int
-innobase_xa_recover(
-/*================*/
- /* out: number of prepared transactions
- stored in xid_list */
- handlerton* hton,
- XID* xid_list, /* in/out: prepared transactions */
- uint len); /* in: number of slots in xid_list */
-/***********************************************************************
-This function is used to commit one X/Open XA distributed transaction
-which is in the prepared state */
-static
-int
-innobase_commit_by_xid(
-/*===================*/
- /* out: 0 or error number */
- handlerton* hton,
- XID* xid); /* in: X/Open XA transaction identification */
-/***********************************************************************
-This function is used to rollback one X/Open XA distributed transaction
-which is in the prepared state */
-static
-int
-innobase_rollback_by_xid(
-/*=====================*/
- /* out: 0 or error number */
- handlerton* hton,
- XID *xid); /* in: X/Open XA transaction identification */
-/***********************************************************************
-Create a consistent view for a cursor based on current transaction
-which is created if the corresponding MySQL thread still lacks one.
-This consistent view is then used inside of MySQL when accessing records
-using a cursor. */
-static
-void*
-innobase_create_cursor_view(
-/*========================*/
- /* out: pointer to cursor view or NULL */
- handlerton* hton, /* in: innobase hton */
- THD* thd); /* in: user thread handle */
-/***********************************************************************
-Set the given consistent cursor view to a transaction which is created
-if the corresponding MySQL thread still lacks one. If the given
-consistent cursor view is NULL global read view of a transaction is
-restored to a transaction read view. */
-static
-void
-innobase_set_cursor_view(
-/*=====================*/
- handlerton* hton,
- THD* thd, /* in: user thread handle */
- void* curview);/* in: Consistent cursor view to be set */
-/***********************************************************************
-Close the given consistent cursor view of a transaction and restore
-global read view to a transaction read view. Transaction is created if the
-corresponding MySQL thread still lacks one. */
-static
-void
-innobase_close_cursor_view(
-/*=======================*/
- handlerton* hton,
- THD* thd, /* in: user thread handle */
- void* curview);/* in: Consistent read view to be closed */
-/*********************************************************************
-Removes all tables in the named database inside InnoDB. */
-static
-void
-innobase_drop_database(
-/*===================*/
- /* out: error number */
- handlerton* hton, /* in: handlerton of Innodb */
- char* path); /* in: database path; inside InnoDB the name
- of the last directory in the path is used as
- the database name: for example, in 'mysql/data/test'
- the database name is 'test' */
-/***********************************************************************
-Closes an InnoDB database. */
-static
-int
-innobase_end(handlerton *hton, ha_panic_function type);
-
-/*********************************************************************
-Creates an InnoDB transaction struct for the thd if it does not yet have one.
-Starts a new InnoDB transaction if a transaction is not yet started. And
-assigns a new snapshot for a consistent read if the transaction does not yet
-have one. */
-static
-int
-innobase_start_trx_and_assign_read_view(
-/*====================================*/
- /* out: 0 */
- handlerton* hton, /* in: Innodb handlerton */
- THD* thd); /* in: MySQL thread handle of the user for whom
- the transaction should be committed */
-/********************************************************************
-Flushes InnoDB logs to disk and makes a checkpoint. Really, a commit flushes
-the logs, and the name of this function should be innobase_checkpoint. */
-static
-bool
-innobase_flush_logs(
-/*================*/
- /* out: TRUE if error */
- handlerton* hton); /* in: InnoDB handlerton */
-
-/****************************************************************************
-Implements the SHOW INNODB STATUS command. Sends the output of the InnoDB
-Monitor to the client. */
-static
-bool
-innodb_show_status(
-/*===============*/
- handlerton* hton, /* in: the innodb handlerton */
- THD* thd, /* in: the MySQL query thread of the caller */
- stat_print_fn *stat_print);
-static
-bool innobase_show_status(handlerton *hton, THD* thd,
- stat_print_fn* stat_print,
- enum ha_stat_type stat_type);
-
-/*********************************************************************
-Commits a transaction in an InnoDB database. */
-static
-void
-innobase_commit_low(
-/*================*/
- trx_t* trx); /* in: transaction handle */
-
-static SHOW_VAR innodb_status_variables[]= {
- {"buffer_pool_pages_data",
- (char*) &export_vars.innodb_buffer_pool_pages_data, SHOW_LONG},
- {"buffer_pool_pages_dirty",
- (char*) &export_vars.innodb_buffer_pool_pages_dirty, SHOW_LONG},
- {"buffer_pool_pages_flushed",
- (char*) &export_vars.innodb_buffer_pool_pages_flushed, SHOW_LONG},
- {"buffer_pool_pages_free",
- (char*) &export_vars.innodb_buffer_pool_pages_free, SHOW_LONG},
-#ifdef UNIV_DEBUG
- {"buffer_pool_pages_latched",
- (char*) &export_vars.innodb_buffer_pool_pages_latched, SHOW_LONG},
-#endif /* UNIV_DEBUG */
- {"buffer_pool_pages_misc",
- (char*) &export_vars.innodb_buffer_pool_pages_misc, SHOW_LONG},
- {"buffer_pool_pages_total",
- (char*) &export_vars.innodb_buffer_pool_pages_total, SHOW_LONG},
- {"buffer_pool_read_ahead_rnd",
- (char*) &export_vars.innodb_buffer_pool_read_ahead_rnd, SHOW_LONG},
- {"buffer_pool_read_ahead_seq",
- (char*) &export_vars.innodb_buffer_pool_read_ahead_seq, SHOW_LONG},
- {"buffer_pool_read_requests",
- (char*) &export_vars.innodb_buffer_pool_read_requests, SHOW_LONG},
- {"buffer_pool_reads",
- (char*) &export_vars.innodb_buffer_pool_reads, SHOW_LONG},
- {"buffer_pool_wait_free",
- (char*) &export_vars.innodb_buffer_pool_wait_free, SHOW_LONG},
- {"buffer_pool_write_requests",
- (char*) &export_vars.innodb_buffer_pool_write_requests, SHOW_LONG},
- {"data_fsyncs",
- (char*) &export_vars.innodb_data_fsyncs, SHOW_LONG},
- {"data_pending_fsyncs",
- (char*) &export_vars.innodb_data_pending_fsyncs, SHOW_LONG},
- {"data_pending_reads",
- (char*) &export_vars.innodb_data_pending_reads, SHOW_LONG},
- {"data_pending_writes",
- (char*) &export_vars.innodb_data_pending_writes, SHOW_LONG},
- {"data_read",
- (char*) &export_vars.innodb_data_read, SHOW_LONG},
- {"data_reads",
- (char*) &export_vars.innodb_data_reads, SHOW_LONG},
- {"data_writes",
- (char*) &export_vars.innodb_data_writes, SHOW_LONG},
- {"data_written",
- (char*) &export_vars.innodb_data_written, SHOW_LONG},
- {"dblwr_pages_written",
- (char*) &export_vars.innodb_dblwr_pages_written, SHOW_LONG},
- {"dblwr_writes",
- (char*) &export_vars.innodb_dblwr_writes, SHOW_LONG},
- {"have_sync_atomic",
- (char*) &export_vars.innodb_have_sync_atomic, SHOW_BOOL},
- {"heap_enabled",
- (char*) &export_vars.innodb_heap_enabled, SHOW_BOOL},
- {"log_waits",
- (char*) &export_vars.innodb_log_waits, SHOW_LONG},
- {"log_write_requests",
- (char*) &export_vars.innodb_log_write_requests, SHOW_LONG},
- {"log_writes",
- (char*) &export_vars.innodb_log_writes, SHOW_LONG},
- {"os_log_fsyncs",
- (char*) &export_vars.innodb_os_log_fsyncs, SHOW_LONG},
- {"os_log_pending_fsyncs",
- (char*) &export_vars.innodb_os_log_pending_fsyncs, SHOW_LONG},
- {"os_log_pending_writes",
- (char*) &export_vars.innodb_os_log_pending_writes, SHOW_LONG},
- {"os_log_written",
- (char*) &export_vars.innodb_os_log_written, SHOW_LONG},
- {"page_size",
- (char*) &export_vars.innodb_page_size, SHOW_LONG},
- {"pages_created",
- (char*) &export_vars.innodb_pages_created, SHOW_LONG},
- {"pages_read",
- (char*) &export_vars.innodb_pages_read, SHOW_LONG},
- {"pages_written",
- (char*) &export_vars.innodb_pages_written, SHOW_LONG},
- {"row_lock_current_waits",
- (char*) &export_vars.innodb_row_lock_current_waits, SHOW_LONG},
- {"row_lock_time",
- (char*) &export_vars.innodb_row_lock_time, SHOW_LONGLONG},
- {"row_lock_time_avg",
- (char*) &export_vars.innodb_row_lock_time_avg, SHOW_LONG},
- {"row_lock_time_max",
- (char*) &export_vars.innodb_row_lock_time_max, SHOW_LONG},
- {"row_lock_waits",
- (char*) &export_vars.innodb_row_lock_waits, SHOW_LONG},
- {"rows_deleted",
- (char*) &export_vars.innodb_rows_deleted, SHOW_LONG},
- {"rows_inserted",
- (char*) &export_vars.innodb_rows_inserted, SHOW_LONG},
- {"rows_read",
- (char*) &export_vars.innodb_rows_read, SHOW_LONG},
- {"rows_updated",
- (char*) &export_vars.innodb_rows_updated, SHOW_LONG},
- {"wake_ups",
- (char*) &export_vars.innodb_wake_ups, SHOW_LONG},
- {NullS, NullS, SHOW_LONG}
-};
-
-/* General functions */
-
-/**********************************************************************
-Returns true if the thread is the replication thread on the slave
-server. Used in srv_conc_enter_innodb() to determine if the thread
-should be allowed to enter InnoDB - the replication thread is treated
-differently than other threads. Also used in
-srv_conc_force_exit_innodb(). */
-extern "C"
-ibool
-thd_is_replication_slave_thread(
-/*============================*/
- /* out: true if thd is the replication thread */
- void* thd) /* in: thread handle (THD*) */
-{
- return((ibool) thd_slave_thread((THD*) thd));
-}
-
-/**********************************************************************
-Save some CPU by testing the value of srv_thread_concurrency in inline
-functions. */
-inline
-void
-innodb_srv_conc_enter_innodb(
-/*=========================*/
- trx_t* trx) /* in: transaction handle */
-{
- if (UNIV_LIKELY(!srv_thread_concurrency)) {
-
- return;
- }
-
- srv_conc_enter_innodb(trx);
-}
-
-/**********************************************************************
-Save some CPU by testing the value of srv_thread_concurrency in inline
-functions. */
-inline
-void
-innodb_srv_conc_exit_innodb(
-/*========================*/
- trx_t* trx) /* in: transaction handle */
-{
- if (UNIV_LIKELY(!trx->declared_to_be_inside_innodb)) {
-
- return;
- }
-
- srv_conc_exit_innodb(trx);
-}
-
-/**********************************************************************
-Releases possible search latch and InnoDB thread FIFO ticket. These should
-be released at each SQL statement end, and also when mysqld passes the
-control to the client. It does no harm to release these also in the middle
-of an SQL statement. */
-inline
-void
-innobase_release_stat_resources(
-/*============================*/
- trx_t* trx) /* in: transaction object */
-{
- if (trx->has_search_latch) {
- trx_search_latch_release_if_reserved(trx);
- }
-
- if (trx->declared_to_be_inside_innodb) {
- /* Release our possible ticket in the FIFO */
-
- srv_conc_force_exit_innodb(trx);
- }
-}
-
-/**********************************************************************
-Returns true if the transaction this thread is processing has edited
-non-transactional tables. Used by the deadlock detector when deciding
-which transaction to rollback in case of a deadlock - we try to avoid
-rolling back transactions that have edited non-transactional tables. */
-extern "C"
-ibool
-thd_has_edited_nontrans_tables(
-/*===========================*/
- /* out: true if non-transactional tables have
- been edited */
- void* thd) /* in: thread handle (THD*) */
-{
- return((ibool) thd_non_transactional_update((THD*) thd));
-}
-
-/**********************************************************************
-Returns true if the thread is executing a SELECT statement. */
-extern "C"
-ibool
-thd_is_select(
-/*==========*/
- /* out: true if thd is executing SELECT */
- const void* thd) /* in: thread handle (THD*) */
-{
- return(thd_sql_command((const THD*) thd) == SQLCOM_SELECT);
-}
-
-/************************************************************************
-Obtain the InnoDB transaction of a MySQL thread. */
-inline
-trx_t*&
-thd_to_trx(
-/*=======*/
- /* out: reference to transaction pointer */
- THD* thd) /* in: MySQL thread */
-{
- return(*(trx_t**) thd_ha_data(thd, innodb_hton_ptr));
-}
-
-/************************************************************************
-Call this function when mysqld passes control to the client. That is to
-avoid deadlocks on the adaptive hash S-latch possibly held by thd. For more
-documentation, see handler.cc. */
-static
-int
-innobase_release_temporary_latches(
-/*===============================*/
- /* out: 0 */
- handlerton* hton, /* in: handlerton */
- THD* thd) /* in: MySQL thread */
-{
- trx_t* trx;
-
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- if (!innodb_inited) {
-
- return 0;
- }
-
- trx = thd_to_trx(thd);
-
- if (trx) {
- innobase_release_stat_resources(trx);
- }
- return 0;
-}
-
-/************************************************************************
-Increments innobase_active_counter and every INNOBASE_WAKE_INTERVALth
-time calls srv_active_wake_master_thread. This function should be used
-when a single database operation may introduce a small need for
-server utility activity, like checkpointing. */
-inline
-void
-innobase_active_small(void)
-/*=======================*/
-{
- innobase_active_counter++;
-
- if ((innobase_active_counter % INNOBASE_WAKE_INTERVAL) == 0) {
- srv_active_wake_master_thread();
- }
-}
-
-/************************************************************************
-Converts an InnoDB error code to a MySQL error code and also tells to MySQL
-about a possible transaction rollback inside InnoDB caused by a lock wait
-timeout or a deadlock. */
-static
-int
-convert_error_code_to_mysql(
-/*========================*/
- /* out: MySQL error code */
- int error, /* in: InnoDB error code */
- THD* thd) /* in: user thread handle or NULL */
-{
- if (error == DB_SUCCESS) {
-
- return(0);
-
- } else if (error == (int) DB_DUPLICATE_KEY) {
-
- return(HA_ERR_FOUND_DUPP_KEY);
-
- } else if (error == (int) DB_FOREIGN_DUPLICATE_KEY) {
-
- return(HA_ERR_FOREIGN_DUPLICATE_KEY);
-
- } else if (error == (int) DB_RECORD_NOT_FOUND) {
-
- return(HA_ERR_NO_ACTIVE_RECORD);
-
- } else if (error == (int) DB_ERROR) {
-
- return(-1); /* unspecified error */
-
- } else if (error == (int) DB_DEADLOCK) {
- /* Since we rolled back the whole transaction, we must
- tell it also to MySQL so that MySQL knows to empty the
- cached binlog for this transaction */
-
- if (thd) {
- thd_mark_transaction_to_rollback(thd, TRUE);
- }
-
- return(HA_ERR_LOCK_DEADLOCK);
- } else if (error == (int) DB_LOCK_WAIT_TIMEOUT) {
-
- /* Starting from 5.0.13, we let MySQL just roll back the
- latest SQL statement in a lock wait timeout. Previously, we
- rolled back the whole transaction. */
-
- if (thd) {
- thd_mark_transaction_to_rollback(
- thd, (bool)row_rollback_on_timeout);
- }
-
- return(HA_ERR_LOCK_WAIT_TIMEOUT);
-
- } else if (error == (int) DB_NO_REFERENCED_ROW) {
-
- return(HA_ERR_NO_REFERENCED_ROW);
-
- } else if (error == (int) DB_ROW_IS_REFERENCED) {
-
- return(HA_ERR_ROW_IS_REFERENCED);
-
- } else if (error == (int) DB_CANNOT_ADD_CONSTRAINT) {
-
- return(HA_ERR_CANNOT_ADD_FOREIGN);
-
- } else if (error == (int) DB_CANNOT_DROP_CONSTRAINT) {
-
- return(HA_ERR_ROW_IS_REFERENCED); /* TODO: This is a bit
- misleading, a new MySQL error
- code should be introduced */
- } else if (error == (int) DB_COL_APPEARS_TWICE_IN_INDEX) {
-
- return(HA_ERR_CRASHED);
-
- } else if (error == (int) DB_OUT_OF_FILE_SPACE) {
-
- return(HA_ERR_RECORD_FILE_FULL);
-
- } else if (error == (int) DB_TABLE_IS_BEING_USED) {
-
- return(HA_ERR_WRONG_COMMAND);
-
- } else if (error == (int) DB_TABLE_NOT_FOUND) {
-
- return(HA_ERR_NO_SUCH_TABLE);
-
- } else if (error == (int) DB_TOO_BIG_RECORD) {
-
- return(HA_ERR_TO_BIG_ROW);
-
- } else if (error == (int) DB_CORRUPTION) {
-
- return(HA_ERR_CRASHED);
- } else if (error == (int) DB_NO_SAVEPOINT) {
-
- return(HA_ERR_NO_SAVEPOINT);
- } else if (error == (int) DB_LOCK_TABLE_FULL) {
- /* Since we rolled back the whole transaction, we must
- tell it also to MySQL so that MySQL knows to empty the
- cached binlog for this transaction */
-
- if (thd) {
- thd_mark_transaction_to_rollback(thd, TRUE);
- }
-
- return(HA_ERR_LOCK_TABLE_FULL);
- } else if (error == DB_TOO_MANY_CONCURRENT_TRXS) {
-
- /* Once MySQL add the appropriate code to errmsg.txt then
- we can get rid of this #ifdef. NOTE: The code checked by
- the #ifdef is the suggested name for the error condition
- and the actual error code name could very well be different.
- This will require some monitoring, ie. the status
- of this request on our part.*/
-#ifdef ER_TOO_MANY_CONCURRENT_TRXS
- return(ER_TOO_MANY_CONCURRENT_TRXS);
-#else
- return(HA_ERR_RECORD_FILE_FULL);
-#endif
-
- } else if (error == DB_UNSUPPORTED) {
-
- return(HA_ERR_UNSUPPORTED);
- } else {
- return(-1); // Unknown error
- }
-}
-
-/*****************************************************************
-If you want to print a thd that is not associated with the current thread,
-you must call this function before reserving the InnoDB kernel_mutex, to
-protect MySQL from setting thd->query NULL. If you print a thd of the current
-thread, we know that MySQL cannot modify thd->query, and it is not necessary
-to call this. Call innobase_mysql_end_print_arbitrary_thd() after you release
-the kernel_mutex.
-NOTE that /mysql/innobase/lock/lock0lock.c must contain the prototype for this
-function! */
-extern "C"
-void
-innobase_mysql_prepare_print_arbitrary_thd(void)
-/*============================================*/
-{
- VOID(pthread_mutex_lock(&LOCK_thread_count));
-}
-
-/*****************************************************************
-Releases the mutex reserved by innobase_mysql_prepare_print_arbitrary_thd().
-NOTE that /mysql/innobase/lock/lock0lock.c must contain the prototype for this
-function! */
-extern "C"
-void
-innobase_mysql_end_print_arbitrary_thd(void)
-/*========================================*/
-{
- VOID(pthread_mutex_unlock(&LOCK_thread_count));
-}
-
-/*****************************************************************
-Prints info of a THD object (== user session thread) to the given file.
-NOTE that /mysql/innobase/trx/trx0trx.c must contain the prototype for
-this function! */
-extern "C"
-void
-innobase_mysql_print_thd(
-/*=====================*/
- FILE* f, /* in: output stream */
- void* input_thd, /* in: pointer to a MySQL THD object */
- uint max_query_len) /* in: max query length to print, or 0 to
- use the default max length */
-{
- THD* thd;
- char buffer[1024];
-
- thd = (THD*) input_thd;
- fputs(thd_security_context(thd, buffer, sizeof(buffer),
- max_query_len), f);
- putc('\n', f);
-}
-
-/**********************************************************************
-Get the variable length bounds of the given character set.
-
-NOTE that the exact prototype of this function has to be in
-/innobase/include/data0type.ic! */
-extern "C"
-void
-innobase_get_cset_width(
-/*====================*/
- ulint cset, /* in: MySQL charset-collation code */
- ulint* mbminlen, /* out: minimum length of a char (in bytes) */
- ulint* mbmaxlen) /* out: maximum length of a char (in bytes) */
-{
- CHARSET_INFO* cs;
- ut_ad(cset < 256);
- ut_ad(mbminlen);
- ut_ad(mbmaxlen);
-
- cs = all_charsets[cset];
- if (cs) {
- *mbminlen = cs->mbminlen;
- *mbmaxlen = cs->mbmaxlen;
- } else {
- ut_a(cset == 0);
- *mbminlen = *mbmaxlen = 0;
- }
-}
-
-/**********************************************************************
-Converts an identifier to a table name.
-
-NOTE that the exact prototype of this function has to be in
-/innobase/dict/dict0dict.c! */
-extern "C"
-void
-innobase_convert_from_table_id(
-/*===========================*/
- char* to, /* out: converted identifier */
- const char* from, /* in: identifier to convert */
- ulint len) /* in: length of 'to', in bytes */
-{
- uint errors;
-
- strconvert(thd_charset(current_thd), from,
- &my_charset_filename, to, (uint) len, &errors);
-}
-
-/**********************************************************************
-Converts an identifier to UTF-8.
-
-NOTE that the exact prototype of this function has to be in
-/innobase/dict/dict0dict.c! */
-extern "C"
-void
-innobase_convert_from_id(
-/*=====================*/
- char* to, /* out: converted identifier */
- const char* from, /* in: identifier to convert */
- ulint len) /* in: length of 'to', in bytes */
-{
- uint errors;
-
- strconvert(thd_charset(current_thd), from,
- system_charset_info, to, (uint) len, &errors);
-}
-
-/**********************************************************************
-Compares NUL-terminated UTF-8 strings case insensitively.
-
-NOTE that the exact prototype of this function has to be in
-/innobase/dict/dict0dict.c! */
-extern "C"
-int
-innobase_strcasecmp(
-/*================*/
- /* out: 0 if a=b, <0 if a<b, >1 if a>b */
- const char* a, /* in: first string to compare */
- const char* b) /* in: second string to compare */
-{
- return(my_strcasecmp(system_charset_info, a, b));
-}
-
-/**********************************************************************
-Makes all characters in a NUL-terminated UTF-8 string lower case.
-
-NOTE that the exact prototype of this function has to be in
-/innobase/dict/dict0dict.c! */
-extern "C"
-void
-innobase_casedn_str(
-/*================*/
- char* a) /* in/out: string to put in lower case */
-{
- my_casedn_str(system_charset_info, a);
-}
-
-/**************************************************************************
-Determines the connection character set.
-
-NOTE that the exact prototype of this function has to be in
-/innobase/dict/dict0dict.c! */
-extern "C"
-struct charset_info_st*
-innobase_get_charset(
-/*=================*/
- /* out: connection character set */
- void* mysql_thd) /* in: MySQL thread handle */
-{
- return(thd_charset((THD*) mysql_thd));
-}
-
-/*************************************************************************
-Creates a temporary file. */
-extern "C"
-int
-innobase_mysql_tmpfile(void)
-/*========================*/
- /* out: temporary file descriptor, or < 0 on error */
-{
- int fd2 = -1;
- File fd = mysql_tmpfile("ib");
- if (fd >= 0) {
- /* Copy the file descriptor, so that the additional resources
- allocated by create_temp_file() can be freed by invoking
- my_close().
-
- Because the file descriptor returned by this function
- will be passed to fdopen(), it will be closed by invoking
- fclose(), which in turn will invoke close() instead of
- my_close(). */
- fd2 = dup(fd);
- if (fd2 < 0) {
- DBUG_PRINT("error",("Got error %d on dup",fd2));
- my_errno=errno;
- my_error(EE_OUT_OF_FILERESOURCES,
- MYF(ME_BELL+ME_WAITTANG),
- "ib*", my_errno);
- }
- my_close(fd, MYF(MY_WME));
- }
- return(fd2);
-}
-
-/*************************************************************************
-Wrapper around MySQL's copy_and_convert function, see it for
-documentation. */
-extern "C"
-ulint
-innobase_convert_string(
-/*====================*/
- void* to,
- ulint to_length,
- CHARSET_INFO* to_cs,
- const void* from,
- ulint from_length,
- CHARSET_INFO* from_cs,
- uint* errors)
-{
- return(copy_and_convert((char*)to, (uint32) to_length, to_cs,
- (const char*)from, (uint32) from_length, from_cs,
- errors));
-}
-
-/*************************************************************************
-Compute the next autoinc value.
-
-For MySQL replication the autoincrement values can be partitioned among
-the nodes. The offset is the start or origin of the autoincrement value
-for a particular node. For n nodes the increment will be n and the offset
-will be in the interval [1, n]. The formula tries to allocate the next
-value for a particular node.
-
-Note: This function is also called with increment set to the number of
-values we want to reserve for multi-value inserts e.g.,
-
- INSERT INTO T VALUES(), (), ();
-
-innobase_next_autoinc() will be called with increment set to
-n * 3 where autoinc_lock_mode != TRADITIONAL because we want
-to reserve 3 values for the multi-value INSERT above. */
-static
-ulonglong
-innobase_next_autoinc(
-/*==================*/
- /* out: the next value */
- ulonglong current, /* in: Current value */
- ulonglong increment, /* in: increment current by */
- ulonglong offset, /* in: AUTOINC offset */
- ulonglong max_value) /* in: max value for type */
-{
- ulonglong next_value;
-
- /* Should never be 0. */
- ut_a(increment > 0);
-
- /* According to MySQL documentation, if the offset is greater than
- the increment then the offset is ignored. */
- if (offset > increment) {
- offset = 0;
- }
-
- if (max_value <= current) {
- next_value = max_value;
- } else if (offset <= 1) {
- /* Offset 0 and 1 are the same, because there must be at
- least one node in the system. */
- if (max_value - current <= increment) {
- next_value = max_value;
- } else {
- next_value = current + increment;
- }
- } else {
- if (current > offset) {
- next_value = ((current - offset) / increment) + 1;
- } else {
- next_value = ((offset - current) / increment) + 1;
- }
-
- ut_a(increment > 0);
- ut_a(next_value > 0);
-
- /* Check for multiplication overflow. */
- if (increment > (max_value / next_value)) {
-
- next_value = max_value;
- } else {
- next_value *= increment;
-
- ut_a(max_value >= next_value);
-
- /* Check for overflow. */
- if (max_value - next_value <= offset) {
- next_value = max_value;
- } else {
- next_value += offset;
- }
- }
- }
-
- ut_a(next_value <= max_value);
-
- return(next_value);
-}
-
-/*************************************************************************
-Gets the InnoDB transaction handle for a MySQL handler object, creates
-an InnoDB transaction struct if the corresponding MySQL thread struct still
-lacks one. */
-static
-trx_t*
-check_trx_exists(
-/*=============*/
- /* out: InnoDB transaction handle */
- THD* thd) /* in: user thread handle */
-{
- trx_t*& trx = thd_to_trx(thd);
-
- ut_ad(thd == current_thd);
-
- if (trx == NULL) {
- DBUG_ASSERT(thd != NULL);
- trx = trx_allocate_for_mysql();
-
- trx->mysql_thd = thd;
- trx->mysql_query_str = thd_query(thd);
-
- /* Update the info whether we should skip XA steps that eat
- CPU time */
- trx->support_xa = THDVAR(thd, support_xa);
- } else {
- if (trx->magic_n != TRX_MAGIC_N) {
- mem_analyze_corruption(trx);
-
- ut_error;
- }
- }
-
- if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) {
- trx->check_foreigns = FALSE;
- } else {
- trx->check_foreigns = TRUE;
- }
-
- if (thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS)) {
- trx->check_unique_secondary = FALSE;
- } else {
- trx->check_unique_secondary = TRUE;
- }
-
- return(trx);
-}
-
-
-/*************************************************************************
-Construct ha_innobase handler. */
-
-ha_innobase::ha_innobase(handlerton *hton, TABLE_SHARE *table_arg)
- :handler(hton, table_arg),
- int_table_flags(HA_REC_NOT_IN_SEQ |
- HA_NULL_IN_KEY |
- HA_CAN_INDEX_BLOBS |
- HA_CAN_SQL_HANDLER |
- HA_PRIMARY_KEY_REQUIRED_FOR_POSITION |
- HA_PRIMARY_KEY_IN_READ_INDEX |
- HA_BINLOG_ROW_CAPABLE |
- HA_CAN_GEOMETRY | HA_PARTIAL_COLUMN_READ |
- HA_TABLE_SCAN_ON_INDEX),
- start_of_scan(0),
- num_write_row(0)
-{}
-
-/*************************************************************************
-Updates the user_thd field in a handle and also allocates a new InnoDB
-transaction handle if needed, and updates the transaction fields in the
-prebuilt struct. */
-inline
-int
-ha_innobase::update_thd(
-/*====================*/
- /* out: 0 or error code */
- THD* thd) /* in: thd to use the handle */
-{
- trx_t* trx;
-
- trx = check_trx_exists(thd);
-
- if (prebuilt->trx != trx) {
-
- row_update_prebuilt_trx(prebuilt, trx);
- }
-
- user_thd = thd;
-
- return(0);
-}
-
-/*************************************************************************
-Registers that InnoDB takes part in an SQL statement, so that MySQL knows to
-roll back the statement if the statement results in an error. This MUST be
-called for every SQL statement that may be rolled back by MySQL. Calling this
-several times to register the same statement is allowed, too. */
-inline
-void
-innobase_register_stmt(
-/*===================*/
- handlerton* hton, /* in: Innobase hton */
- THD* thd) /* in: MySQL thd (connection) object */
-{
- /* Register the statement */
- trans_register_ha(thd, FALSE, hton);
-}
-
-/*************************************************************************
-Registers an InnoDB transaction in MySQL, so that the MySQL XA code knows
-to call the InnoDB prepare and commit, or rollback for the transaction. This
-MUST be called for every transaction for which the user may call commit or
-rollback. Calling this several times to register the same transaction is
-allowed, too.
-This function also registers the current SQL statement. */
-inline
-void
-innobase_register_trx_and_stmt(
-/*===========================*/
- handlerton *hton, /* in: Innobase handlerton */
- THD* thd) /* in: MySQL thd (connection) object */
-{
- /* NOTE that actually innobase_register_stmt() registers also
- the transaction in the AUTOCOMMIT=1 mode. */
-
- innobase_register_stmt(hton, thd);
-
- if (thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
-
- /* No autocommit mode, register for a transaction */
- trans_register_ha(thd, TRUE, hton);
- }
-}
-
-/* BACKGROUND INFO: HOW THE MYSQL QUERY CACHE WORKS WITH INNODB
- ------------------------------------------------------------
-
-1) The use of the query cache for TBL is disabled when there is an
-uncommitted change to TBL.
-
-2) When a change to TBL commits, InnoDB stores the current value of
-its global trx id counter, let us denote it by INV_TRX_ID, to the table object
-in the InnoDB data dictionary, and does only allow such transactions whose
-id <= INV_TRX_ID to use the query cache.
-
-3) When InnoDB does an INSERT/DELETE/UPDATE to a table TBL, or an implicit
-modification because an ON DELETE CASCADE, we invalidate the MySQL query cache
-of TBL immediately.
-
-How this is implemented inside InnoDB:
-
-1) Since every modification always sets an IX type table lock on the InnoDB
-table, it is easy to check if there can be uncommitted modifications for a
-table: just check if there are locks in the lock list of the table.
-
-2) When a transaction inside InnoDB commits, it reads the global trx id
-counter and stores the value INV_TRX_ID to the tables on which it had a lock.
-
-3) If there is an implicit table change from ON DELETE CASCADE or SET NULL,
-InnoDB calls an invalidate method for the MySQL query cache for that table.
-
-How this is implemented inside sql_cache.cc:
-
-1) The query cache for an InnoDB table TBL is invalidated immediately at an
-INSERT/UPDATE/DELETE, just like in the case of MyISAM. No need to delay
-invalidation to the transaction commit.
-
-2) To store or retrieve a value from the query cache of an InnoDB table TBL,
-any query must first ask InnoDB's permission. We must pass the thd as a
-parameter because InnoDB will look at the trx id, if any, associated with
-that thd.
-
-3) Use of the query cache for InnoDB tables is now allowed also when
-AUTOCOMMIT==0 or we are inside BEGIN ... COMMIT. Thus transactions no longer
-put restrictions on the use of the query cache.
-*/
-
-/**********************************************************************
-The MySQL query cache uses this to check from InnoDB if the query cache at
-the moment is allowed to operate on an InnoDB table. The SQL query must
-be a non-locking SELECT.
-
-The query cache is allowed to operate on certain query only if this function
-returns TRUE for all tables in the query.
-
-If thd is not in the autocommit state, this function also starts a new
-transaction for thd if there is no active trx yet, and assigns a consistent
-read view to it if there is no read view yet.
-
-Why a deadlock of threads is not possible: the query cache calls this function
-at the start of a SELECT processing. Then the calling thread cannot be
-holding any InnoDB semaphores. The calling thread is holding the
-query cache mutex, and this function will reserver the InnoDB kernel mutex.
-Thus, the 'rank' in sync0sync.h of the MySQL query cache mutex is above
-the InnoDB kernel mutex. */
-static
-my_bool
-innobase_query_caching_of_table_permitted(
-/*======================================*/
- /* out: TRUE if permitted, FALSE if not;
- note that the value FALSE does not mean
- we should invalidate the query cache:
- invalidation is called explicitly */
- THD* thd, /* in: thd of the user who is trying to
- store a result to the query cache or
- retrieve it */
- char* full_name, /* in: concatenation of database name,
- the null character '\0', and the table
- name */
- uint full_name_len, /* in: length of the full name, i.e.
- len(dbname) + len(tablename) + 1 */
- ulonglong *unused) /* unused for this engine */
-{
- ibool is_autocommit;
- trx_t* trx;
- char norm_name[1000];
-
- ut_a(full_name_len < 999);
-
- trx = check_trx_exists(thd);
-
- if (trx->isolation_level == TRX_ISO_SERIALIZABLE) {
- /* In the SERIALIZABLE mode we add LOCK IN SHARE MODE to every
- plain SELECT if AUTOCOMMIT is not on. */
-
- return((my_bool)FALSE);
- }
-
- if (trx->has_search_latch) {
- sql_print_error("The calling thread is holding the adaptive "
- "search, latch though calling "
- "innobase_query_caching_of_table_permitted.");
-
- mutex_enter_noninline(&kernel_mutex);
- trx_print(stderr, trx, 1024);
- mutex_exit_noninline(&kernel_mutex);
- }
-
- innobase_release_stat_resources(trx);
-
- if (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
-
- is_autocommit = TRUE;
- } else {
- is_autocommit = FALSE;
-
- }
-
- if (is_autocommit && trx->n_mysql_tables_in_use == 0) {
- /* We are going to retrieve the query result from the query
- cache. This cannot be a store operation to the query cache
- because then MySQL would have locks on tables already.
-
- TODO: if the user has used LOCK TABLES to lock the table,
- then we open a transaction in the call of row_.. below.
- That trx can stay open until UNLOCK TABLES. The same problem
- exists even if we do not use the query cache. MySQL should be
- modified so that it ALWAYS calls some cleanup function when
- the processing of a query ends!
-
- We can imagine we instantaneously serialize this consistent
- read trx to the current trx id counter. If trx2 would have
- changed the tables of a query result stored in the cache, and
- trx2 would have already committed, making the result obsolete,
- then trx2 would have already invalidated the cache. Thus we
- can trust the result in the cache is ok for this query. */
-
- return((my_bool)TRUE);
- }
-
- /* Normalize the table name to InnoDB format */
-
- memcpy(norm_name, full_name, full_name_len);
-
- norm_name[strlen(norm_name)] = '/'; /* InnoDB uses '/' as the
- separator between db and table */
- norm_name[full_name_len] = '\0';
-#ifdef __WIN__
- innobase_casedn_str(norm_name);
-#endif
- /* The call of row_search_.. will start a new transaction if it is
- not yet started */
-
- if (trx->active_trans == 0) {
-
- innobase_register_trx_and_stmt(innodb_hton_ptr, thd);
- trx->active_trans = 1;
- }
-
- if (row_search_check_if_query_cache_permitted(trx, norm_name)) {
-
- /* printf("Query cache for %s permitted\n", norm_name); */
-
- return((my_bool)TRUE);
- }
-
- /* printf("Query cache for %s NOT permitted\n", norm_name); */
-
- return((my_bool)FALSE);
-}
-
-/*********************************************************************
-Invalidates the MySQL query cache for the table.
-NOTE that the exact prototype of this function has to be in
-/innobase/row/row0ins.c! */
-extern "C"
-void
-innobase_invalidate_query_cache(
-/*============================*/
- trx_t* trx, /* in: transaction which modifies the table */
- char* full_name, /* in: concatenation of database name, null
- char '\0', table name, null char'\0';
- NOTE that in Windows this is always
- in LOWER CASE! */
- ulint full_name_len) /* in: full name length where also the null
- chars count */
-{
- /* Note that the sync0sync.h rank of the query cache mutex is just
- above the InnoDB kernel mutex. The caller of this function must not
- have latches of a lower rank. */
-
- /* Argument TRUE below means we are using transactions */
-#ifdef HAVE_QUERY_CACHE
- mysql_query_cache_invalidate4((THD*) trx->mysql_thd,
- (const char*) full_name,
- (uint32) full_name_len,
- TRUE);
-#endif
-}
-
-/*********************************************************************
-Display an SQL identifier. */
-extern "C"
-void
-innobase_print_identifier(
-/*======================*/
- FILE* f, /* in: output stream */
- trx_t* trx, /* in: transaction */
- ibool table_id,/* in: TRUE=print a table name,
- FALSE=print other identifier */
- const char* name, /* in: name to print */
- ulint namelen)/* in: length of name */
-{
- const char* s = name;
- char* qname = NULL;
- int q;
-
- if (table_id) {
- /* Decode the table name. The filename_to_tablename()
- function expects a NUL-terminated string. The input and
- output strings buffers must not be shared. The function
- only produces more output when the name contains other
- characters than [0-9A-Z_a-z]. */
- char* temp_name = (char*) my_malloc((uint) namelen + 1, MYF(MY_WME));
- uint qnamelen = (uint) (namelen
- + (1 + sizeof srv_mysql50_table_name_prefix));
-
- if (temp_name) {
- qname = (char*) my_malloc(qnamelen, MYF(MY_WME));
- if (qname) {
- memcpy(temp_name, name, namelen);
- temp_name[namelen] = 0;
- s = qname;
- namelen = filename_to_tablename(temp_name,
- qname, qnamelen);
- }
- my_free(temp_name, MYF(0));
- }
- }
-
- if (!trx || !trx->mysql_thd) {
-
- q = '"';
- } else {
- q = get_quote_char_for_identifier((THD*) trx->mysql_thd,
- s, (int) namelen);
- }
-
- if (q == EOF) {
- fwrite(s, 1, namelen, f);
- } else {
- const char* e = s + namelen;
- putc(q, f);
- while (s < e) {
- int c = *s++;
- if (c == q) {
- putc(c, f);
- }
- putc(c, f);
- }
- putc(q, f);
- }
-
- my_free(qname, MYF(MY_ALLOW_ZERO_PTR));
-}
-
-/**************************************************************************
-Determines if the currently running transaction has been interrupted. */
-extern "C"
-ibool
-trx_is_interrupted(
-/*===============*/
- /* out: TRUE if interrupted */
- trx_t* trx) /* in: transaction */
-{
- return(trx && trx->mysql_thd && thd_killed((THD*) trx->mysql_thd));
-}
-
-/******************************************************************
-Resets some fields of a prebuilt struct. The template is used in fast
-retrieval of just those column values MySQL needs in its processing. */
-static
-void
-reset_template(
-/*===========*/
- row_prebuilt_t* prebuilt) /* in/out: prebuilt struct */
-{
- prebuilt->keep_other_fields_on_keyread = 0;
- prebuilt->read_just_key = 0;
-}
-
-/*********************************************************************
-Call this when you have opened a new table handle in HANDLER, before you
-call index_read_idx() etc. Actually, we can let the cursor stay open even
-over a transaction commit! Then you should call this before every operation,
-fetch next etc. This function inits the necessary things even after a
-transaction commit. */
-
-void
-ha_innobase::init_table_handle_for_HANDLER(void)
-/*============================================*/
-{
- /* If current thd does not yet have a trx struct, create one.
- If the current handle does not yet have a prebuilt struct, create
- one. Update the trx pointers in the prebuilt struct. Normally
- this operation is done in external_lock. */
-
- update_thd(ha_thd());
-
- /* Initialize the prebuilt struct much like it would be inited in
- external_lock */
-
- innobase_release_stat_resources(prebuilt->trx);
-
- /* If the transaction is not started yet, start it */
-
- trx_start_if_not_started_noninline(prebuilt->trx);
-
- /* Assign a read view if the transaction does not have it yet */
-
- trx_assign_read_view(prebuilt->trx);
-
- /* Set the MySQL flag to mark that there is an active transaction */
-
- if (prebuilt->trx->active_trans == 0) {
-
- innobase_register_trx_and_stmt(ht, user_thd);
-
- prebuilt->trx->active_trans = 1;
- }
-
- /* We did the necessary inits in this function, no need to repeat them
- in row_search_for_mysql */
-
- prebuilt->sql_stat_start = FALSE;
-
- /* We let HANDLER always to do the reads as consistent reads, even
- if the trx isolation level would have been specified as SERIALIZABLE */
-
- prebuilt->select_lock_type = LOCK_NONE;
- prebuilt->stored_select_lock_type = LOCK_NONE;
-
- /* Always fetch all columns in the index record */
-
- prebuilt->hint_need_to_fetch_extra_cols = ROW_RETRIEVE_ALL_COLS;
-
- /* We want always to fetch all columns in the whole row? Or do
- we???? */
-
- prebuilt->used_in_HANDLER = TRUE;
- reset_template(prebuilt);
-}
-
-/*************************************************************************
-Opens an InnoDB database. */
-static
-int
-innobase_init(
-/*==========*/
- /* out: 0 on success, error code on failure */
- void *p) /* in: InnoDB handlerton */
-{
- static char current_dir[3]; /* Set if using current lib */
- int err;
- bool ret;
- char *default_path;
-
- DBUG_ENTER("innobase_init");
- handlerton *innobase_hton= (handlerton *)p;
- innodb_hton_ptr = innobase_hton;
-
- innobase_hton->state = SHOW_OPTION_YES;
- innobase_hton->db_type= DB_TYPE_INNODB;
- innobase_hton->savepoint_offset=sizeof(trx_named_savept_t);
- innobase_hton->close_connection=innobase_close_connection;
- innobase_hton->savepoint_set=innobase_savepoint;
- innobase_hton->savepoint_rollback=innobase_rollback_to_savepoint;
- innobase_hton->savepoint_release=innobase_release_savepoint;
- innobase_hton->commit=innobase_commit;
- innobase_hton->rollback=innobase_rollback;
- innobase_hton->prepare=innobase_xa_prepare;
- innobase_hton->recover=innobase_xa_recover;
- innobase_hton->commit_by_xid=innobase_commit_by_xid;
- innobase_hton->rollback_by_xid=innobase_rollback_by_xid;
- innobase_hton->create_cursor_read_view=innobase_create_cursor_view;
- innobase_hton->set_cursor_read_view=innobase_set_cursor_view;
- innobase_hton->close_cursor_read_view=innobase_close_cursor_view;
- innobase_hton->create=innobase_create_handler;
- innobase_hton->drop_database=innobase_drop_database;
- innobase_hton->panic=innobase_end;
- innobase_hton->start_consistent_snapshot=innobase_start_trx_and_assign_read_view;
- innobase_hton->flush_logs=innobase_flush_logs;
- innobase_hton->show_status=innobase_show_status;
- innobase_hton->flags=HTON_NO_FLAGS;
- innobase_hton->release_temporary_latches=innobase_release_temporary_latches;
-
- ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR);
-
-#ifdef UNIV_DEBUG
- static const char test_filename[] = "-@";
- char test_tablename[sizeof test_filename
- + sizeof srv_mysql50_table_name_prefix];
- if ((sizeof test_tablename) - 1
- != filename_to_tablename(test_filename, test_tablename,
- sizeof test_tablename)
- || strncmp(test_tablename,
- srv_mysql50_table_name_prefix,
- sizeof srv_mysql50_table_name_prefix)
- || strcmp(test_tablename
- + sizeof srv_mysql50_table_name_prefix,
- test_filename)) {
- sql_print_error("tablename encoding has been changed");
- goto error;
- }
-#endif /* UNIV_DEBUG */
-
- /* Check that values don't overflow on 32-bit systems. */
- if (sizeof(ulint) == 4) {
- if (innobase_buffer_pool_size > UINT_MAX32) {
- sql_print_error(
- "innobase_buffer_pool_size can't be over 4GB"
- " on 32-bit systems");
-
- goto error;
- }
-
- if (innobase_log_file_size > UINT_MAX32) {
- sql_print_error(
- "innobase_log_file_size can't be over 4GB"
- " on 32-bit systems");
-
- goto error;
- }
- }
-
- os_innodb_umask = (ulint)my_umask;
-
- /* First calculate the default path for innodb_data_home_dir etc.,
- in case the user has not given any value.
-
- Note that when using the embedded server, the datadirectory is not
- necessarily the current directory of this program. */
-
- if (mysqld_embedded) {
- default_path = mysql_real_data_home;
- fil_path_to_mysql_datadir = mysql_real_data_home;
- } else {
- /* It's better to use current lib, to keep paths short */
- current_dir[0] = FN_CURLIB;
- current_dir[1] = FN_LIBCHAR;
- current_dir[2] = 0;
- default_path = current_dir;
- }
-
- ut_a(default_path);
-
- if (specialflag & SPECIAL_NO_PRIOR) {
- srv_set_thread_priorities = FALSE;
- } else {
- srv_set_thread_priorities = TRUE;
- srv_query_thread_priority = QUERY_PRIOR;
- }
-
- /* Set InnoDB initialization parameters according to the values
- read from MySQL .cnf file */
-
- /*--------------- Data files -------------------------*/
-
- /* The default dir for data files is the datadir of MySQL */
-
- srv_data_home = (innobase_data_home_dir ? innobase_data_home_dir :
- default_path);
-
- /* Set default InnoDB data file size to 10 MB and let it be
- auto-extending. Thus users can use InnoDB in >= 4.0 without having
- to specify any startup options. */
-
- if (!innobase_data_file_path) {
- innobase_data_file_path = (char*) "ibdata1:10M:autoextend";
- }
-
- /* Since InnoDB edits the argument in the next call, we make another
- copy of it: */
-
- internal_innobase_data_file_path = my_strdup(innobase_data_file_path,
- MYF(MY_FAE));
-
- ret = (bool) srv_parse_data_file_paths_and_sizes(
- internal_innobase_data_file_path,
- &srv_data_file_names,
- &srv_data_file_sizes,
- &srv_data_file_is_raw_partition,
- &srv_n_data_files,
- &srv_auto_extend_last_data_file,
- &srv_last_file_size_max);
- if (ret == FALSE) {
- sql_print_error(
- "InnoDB: syntax error in innodb_data_file_path");
- my_free(internal_innobase_data_file_path,
- MYF(MY_ALLOW_ZERO_PTR));
- goto error;
- }
-
- /* -------------- Log files ---------------------------*/
-
- /* The default dir for log files is the datadir of MySQL */
-
- if (!innobase_log_group_home_dir) {
- innobase_log_group_home_dir = default_path;
- }
-
-#ifdef UNIV_LOG_ARCHIVE
- /* Since innodb_log_arch_dir has no relevance under MySQL,
- starting from 4.0.6 we always set it the same as
- innodb_log_group_home_dir: */
-
- innobase_log_arch_dir = innobase_log_group_home_dir;
-
- srv_arch_dir = innobase_log_arch_dir;
-#endif /* UNIG_LOG_ARCHIVE */
-
- ret = (bool)
- srv_parse_log_group_home_dirs(innobase_log_group_home_dir,
- &srv_log_group_home_dirs);
-
- if (ret == FALSE || innobase_mirrored_log_groups != 1) {
- sql_print_error("syntax error in innodb_log_group_home_dir, or a "
- "wrong number of mirrored log groups");
-
- my_free(internal_innobase_data_file_path,
- MYF(MY_ALLOW_ZERO_PTR));
- goto error;
- }
-
- /* --------------------------------------------------*/
-
- srv_file_flush_method_str = innobase_unix_file_flush_method;
-
- srv_n_log_groups = (ulint) innobase_mirrored_log_groups;
- srv_n_log_files = (ulint) innobase_log_files_in_group;
- srv_log_file_size = (ulint) innobase_log_file_size;
-
- srv_thread_concurrency_timer_based =
- (ibool) innobase_thread_concurrency_timer_based;
-
-#ifdef UNIV_LOG_ARCHIVE
- srv_log_archive_on = (ulint) innobase_log_archive;
-#endif /* UNIV_LOG_ARCHIVE */
- srv_log_buffer_size = (ulint) innobase_log_buffer_size;
-
- srv_io_capacity = (ulint) innobase_io_capacity;
- srv_extra_dirty_writes = (ulint) innobase_extra_dirty_writes;
-
- /* We set srv_pool_size here in units of 1 kB. InnoDB internally
- changes the value so that it becomes the number of database pages. */
-
- if (innobase_buffer_pool_awe_mem_mb == 0) {
- srv_pool_size = (ulint)(innobase_buffer_pool_size / 1024);
- } else {
- srv_use_awe = TRUE;
- srv_pool_size = (ulint)
- (1024 * innobase_buffer_pool_awe_mem_mb);
- srv_awe_window_size = (ulint) innobase_buffer_pool_size;
-
- /* Note that what the user specified as
- innodb_buffer_pool_size is actually the AWE memory window
- size in this case, and the real buffer pool size is
- determined by .._awe_mem_mb. */
- }
-
- srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;
-
- srv_n_read_io_threads = (ulint) innobase_read_io_threads;
- srv_n_write_io_threads = (ulint) innobase_write_io_threads;
- srv_max_merged_io = (ulint) innobase_max_merged_io;
-
- srv_lock_wait_timeout = (ulint) innobase_lock_wait_timeout;
- srv_force_recovery = (ulint) innobase_force_recovery;
-
- srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
- srv_use_checksums = (ibool) innobase_use_checksums;
-
-#ifdef HAVE_LARGE_PAGES
- if ((os_use_large_pages = (ibool) my_use_large_pages))
- os_large_page_size = (ulint) opt_large_page_size;
-#endif
-
- row_rollback_on_timeout = (ibool) innobase_rollback_on_timeout;
-
- srv_file_per_table = (ibool) innobase_file_per_table;
- srv_locks_unsafe_for_binlog = (ibool) innobase_locks_unsafe_for_binlog;
-
- srv_max_n_open_files = (ulint) innobase_open_files;
- srv_innodb_status = (ibool) innobase_create_status_file;
-
- srv_use_adaptive_hash_indexes =
- (ibool) innobase_adaptive_hash_index;
-
- srv_print_verbose_log = mysqld_embedded ? 0 : 1;
-
- /* Store the default charset-collation number of this MySQL
- installation */
-
- data_mysql_default_charset_coll = (ulint)default_charset_info->number;
-
- ut_a(DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL ==
- my_charset_latin1.number);
- ut_a(DATA_MYSQL_BINARY_CHARSET_COLL == my_charset_bin.number);
-
- /* Store the latin1_swedish_ci character ordering table to InnoDB. For
- non-latin1_swedish_ci charsets we use the MySQL comparison functions,
- and consequently we do not need to know the ordering internally in
- InnoDB. */
-
- ut_a(0 == strcmp((char*)my_charset_latin1.name,
- (char*)"latin1_swedish_ci"));
- memcpy(srv_latin1_ordering, my_charset_latin1.sort_order, 256);
-
- /* Since we in this module access directly the fields of a trx
- struct, and due to different headers and flags it might happen that
- mutex_t has a different size in this module and in InnoDB
- modules, we check at run time that the size is the same in
- these compilation modules. */
-
- srv_sizeof_trx_t_in_ha_innodb_cc = sizeof(trx_t);
-
- err = innobase_start_or_create_for_mysql();
-
- if (err != DB_SUCCESS) {
- my_free(internal_innobase_data_file_path,
- MYF(MY_ALLOW_ZERO_PTR));
- goto error;
- }
-
- (void) hash_init(&innobase_open_tables,system_charset_info, 32, 0, 0,
- (hash_get_key) innobase_get_key, 0, 0);
- pthread_mutex_init(&innobase_share_mutex, MY_MUTEX_INIT_FAST);
- pthread_mutex_init(&prepare_commit_mutex, MY_MUTEX_INIT_FAST);
- pthread_mutex_init(&commit_threads_m, MY_MUTEX_INIT_FAST);
- pthread_mutex_init(&commit_cond_m, MY_MUTEX_INIT_FAST);
- pthread_cond_init(&commit_cond, NULL);
- innodb_inited= 1;
-
- DBUG_RETURN(FALSE);
-error:
- DBUG_RETURN(TRUE);
-}
-
-/***********************************************************************
-Closes an InnoDB database. */
-static
-int
-innobase_end(handlerton *hton, ha_panic_function type)
-/*==============*/
- /* out: TRUE if error */
-{
- int err= 0;
-
- DBUG_ENTER("innobase_end");
-
-#ifdef __NETWARE__ /* some special cleanup for NetWare */
- if (nw_panic) {
- set_panic_flag_for_netware();
- }
-#endif
- if (innodb_inited) {
-
- srv_fast_shutdown = (ulint) innobase_fast_shutdown;
- innodb_inited = 0;
- if (innobase_shutdown_for_mysql() != DB_SUCCESS) {
- err = 1;
- }
- hash_free(&innobase_open_tables);
- my_free(internal_innobase_data_file_path,
- MYF(MY_ALLOW_ZERO_PTR));
- pthread_mutex_destroy(&innobase_share_mutex);
- pthread_mutex_destroy(&prepare_commit_mutex);
- pthread_mutex_destroy(&commit_threads_m);
- pthread_mutex_destroy(&commit_cond_m);
- pthread_cond_destroy(&commit_cond);
- }
-
- DBUG_RETURN(err);
-}
-
-/********************************************************************
-Flushes InnoDB logs to disk and makes a checkpoint. Really, a commit flushes
-the logs, and the name of this function should be innobase_checkpoint. */
-static
-bool
-innobase_flush_logs(handlerton *hton)
-/*=====================*/
- /* out: TRUE if error */
-{
- bool result = 0;
-
- DBUG_ENTER("innobase_flush_logs");
-
- log_buffer_flush_to_disk();
-
- DBUG_RETURN(result);
-}
-
-/*********************************************************************
-Commits a transaction in an InnoDB database. */
-static
-void
-innobase_commit_low(
-/*================*/
- trx_t* trx) /* in: transaction handle */
-{
- if (trx->conc_state == TRX_NOT_STARTED) {
-
- return;
- }
-
- trx_commit_for_mysql(trx);
-}
-
-/*********************************************************************
-Creates an InnoDB transaction struct for the thd if it does not yet have one.
-Starts a new InnoDB transaction if a transaction is not yet started. And
-assigns a new snapshot for a consistent read if the transaction does not yet
-have one. */
-static
-int
-innobase_start_trx_and_assign_read_view(
-/*====================================*/
- /* out: 0 */
- handlerton *hton, /* in: Innodb handlerton */
- THD* thd) /* in: MySQL thread handle of the user for whom
- the transaction should be committed */
-{
- trx_t* trx;
-
- DBUG_ENTER("innobase_start_trx_and_assign_read_view");
-
- /* Create a new trx struct for thd, if it does not yet have one */
-
- trx = check_trx_exists(thd);
-
- /* This is just to play safe: release a possible FIFO ticket and
- search latch. Since we will reserve the kernel mutex, we have to
- release the search system latch first to obey the latching order. */
-
- innobase_release_stat_resources(trx);
-
- /* If the transaction is not started yet, start it */
-
- trx_start_if_not_started_noninline(trx);
-
- /* Assign a read view if the transaction does not have it yet */
-
- trx_assign_read_view(trx);
-
- /* Set the MySQL flag to mark that there is an active transaction */
-
- if (trx->active_trans == 0) {
- innobase_register_trx_and_stmt(hton, current_thd);
- trx->active_trans = 1;
- }
-
- DBUG_RETURN(0);
-}
-
-/*********************************************************************
-Commits a transaction in an InnoDB database or marks an SQL statement
-ended. */
-static
-int
-innobase_commit(
-/*============*/
- /* out: 0 */
- handlerton *hton, /* in: Innodb handlerton */
- THD* thd, /* in: MySQL thread handle of the user for whom
- the transaction should be committed */
- bool all) /* in: TRUE - commit transaction
- FALSE - the current SQL statement ended */
-{
- trx_t* trx;
-
- DBUG_ENTER("innobase_commit");
- DBUG_PRINT("trans", ("ending transaction"));
-
- trx = check_trx_exists(thd);
-
- /* Update the info whether we should skip XA steps that eat CPU time */
- trx->support_xa = THDVAR(thd, support_xa);
-
- /* Since we will reserve the kernel mutex, we have to release
- the search system latch first to obey the latching order. */
-
- if (trx->has_search_latch) {
- trx_search_latch_release_if_reserved(trx);
- }
-
- /* The flag trx->active_trans is set to 1 in
-
- 1. ::external_lock(),
- 2. ::start_stmt(),
- 3. innobase_query_caching_of_table_permitted(),
- 4. innobase_savepoint(),
- 5. ::init_table_handle_for_HANDLER(),
- 6. innobase_start_trx_and_assign_read_view(),
- 7. ::transactional_table_lock()
-
- and it is only set to 0 in a commit or a rollback. If it is 0 we know
- there cannot be resources to be freed and we could return immediately.
- For the time being, we play safe and do the cleanup though there should
- be nothing to clean up. */
-
- if (trx->active_trans == 0
- && trx->conc_state != TRX_NOT_STARTED) {
-
- sql_print_error("trx->active_trans == 0, but"
- " trx->conc_state != TRX_NOT_STARTED");
- }
- if (all
- || (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
-
- /* We were instructed to commit the whole transaction, or
- this is an SQL statement end and autocommit is on */
-
- /* We need current binlog position for ibbackup to work.
- Note, the position is current because of
- prepare_commit_mutex */
-retry:
- if (srv_commit_concurrency > 0) {
- pthread_mutex_lock(&commit_cond_m);
- commit_threads++;
-
- if (commit_threads > srv_commit_concurrency) {
- commit_threads--;
- pthread_cond_wait(&commit_cond,
- &commit_cond_m);
- pthread_mutex_unlock(&commit_cond_m);
- goto retry;
- }
- else {
- pthread_mutex_unlock(&commit_cond_m);
- }
- }
-
- trx->mysql_log_file_name = mysql_bin_log_file_name();
- trx->mysql_log_offset = (ib_longlong) mysql_bin_log_file_pos();
-
- innobase_commit_low(trx);
-
- if (srv_commit_concurrency > 0) {
- pthread_mutex_lock(&commit_cond_m);
- commit_threads--;
- pthread_cond_signal(&commit_cond);
- pthread_mutex_unlock(&commit_cond_m);
- }
-
- if (trx->active_trans == 2) {
-
- pthread_mutex_unlock(&prepare_commit_mutex);
- }
-
- trx->active_trans = 0;
-
- } else {
- /* We just mark the SQL statement ended and do not do a
- transaction commit */
-
- /* If we had reserved the auto-inc lock for some
- table in this SQL statement we release it now */
-
- row_unlock_table_autoinc_for_mysql(trx);
-
- /* Store the current undo_no of the transaction so that we
- know where to roll back if we have to roll back the next
- SQL statement */
-
- trx_mark_sql_stat_end(trx);
- }
-
- trx->n_autoinc_rows = 0; /* Reset the number AUTO-INC rows required */
-
- if (trx->declared_to_be_inside_innodb) {
- /* Release our possible ticket in the FIFO */
-
- srv_conc_force_exit_innodb(trx);
- }
-
- /* Tell the InnoDB server that there might be work for utility
- threads: */
- srv_active_wake_master_thread();
-
- DBUG_RETURN(0);
-}
-
-/*********************************************************************
-Rolls back a transaction or the latest SQL statement. */
-static
-int
-innobase_rollback(
-/*==============*/
- /* out: 0 or error number */
- handlerton *hton, /* in: Innodb handlerton */
- THD* thd, /* in: handle to the MySQL thread of the user
- whose transaction should be rolled back */
- bool all) /* in: TRUE - commit transaction
- FALSE - the current SQL statement ended */
-{
- int error = 0;
- trx_t* trx;
-
- DBUG_ENTER("innobase_rollback");
- DBUG_PRINT("trans", ("aborting transaction"));
-
- trx = check_trx_exists(thd);
-
- /* Update the info whether we should skip XA steps that eat CPU time */
- trx->support_xa = THDVAR(thd, support_xa);
-
- /* Release a possible FIFO ticket and search latch. Since we will
- reserve the kernel mutex, we have to release the search system latch
- first to obey the latching order. */
-
- innobase_release_stat_resources(trx);
-
- /* If we had reserved the auto-inc lock for some table (if
- we come here to roll back the latest SQL statement) we
- release it now before a possibly lengthy rollback */
-
- row_unlock_table_autoinc_for_mysql(trx);
-
- if (all
- || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
-
- error = trx_rollback_for_mysql(trx);
- trx->active_trans = 0;
- } else {
- error = trx_rollback_last_sql_stat_for_mysql(trx);
- }
-
- DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
-}
-
-/*********************************************************************
-Rolls back a transaction */
-static
-int
-innobase_rollback_trx(
-/*==================*/
- /* out: 0 or error number */
- trx_t* trx) /* in: transaction */
-{
- int error = 0;
-
- DBUG_ENTER("innobase_rollback_trx");
- DBUG_PRINT("trans", ("aborting transaction"));
-
- /* Release a possible FIFO ticket and search latch. Since we will
- reserve the kernel mutex, we have to release the search system latch
- first to obey the latching order. */
-
- innobase_release_stat_resources(trx);
-
- /* If we had reserved the auto-inc lock for some table (if
- we come here to roll back the latest SQL statement) we
- release it now before a possibly lengthy rollback */
-
- row_unlock_table_autoinc_for_mysql(trx);
-
- error = trx_rollback_for_mysql(trx);
-
- DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
-}
-
-/*********************************************************************
-Rolls back a transaction to a savepoint. */
-static
-int
-innobase_rollback_to_savepoint(
-/*===========================*/
- /* out: 0 if success, HA_ERR_NO_SAVEPOINT if
- no savepoint with the given name */
- handlerton *hton, /* in: Innodb handlerton */
- THD* thd, /* in: handle to the MySQL thread of the user
- whose transaction should be rolled back */
- void* savepoint) /* in: savepoint data */
-{
- ib_longlong mysql_binlog_cache_pos;
- int error = 0;
- trx_t* trx;
- char name[64];
-
- DBUG_ENTER("innobase_rollback_to_savepoint");
-
- trx = check_trx_exists(thd);
-
- /* Release a possible FIFO ticket and search latch. Since we will
- reserve the kernel mutex, we have to release the search system latch
- first to obey the latching order. */
-
- innobase_release_stat_resources(trx);
-
- /* TODO: use provided savepoint data area to store savepoint data */
-
- longlong2str((ulint)savepoint, name, 36);
-
- error = (int) trx_rollback_to_savepoint_for_mysql(trx, name,
- &mysql_binlog_cache_pos);
- DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
-}
-
-/*********************************************************************
-Release transaction savepoint name. */
-static
-int
-innobase_release_savepoint(
-/*=======================*/
- /* out: 0 if success, HA_ERR_NO_SAVEPOINT if
- no savepoint with the given name */
- handlerton* hton, /* in: handlerton for Innodb */
- THD* thd, /* in: handle to the MySQL thread of the user
- whose transaction should be rolled back */
- void* savepoint) /* in: savepoint data */
-{
- int error = 0;
- trx_t* trx;
- char name[64];
-
- DBUG_ENTER("innobase_release_savepoint");
-
- trx = check_trx_exists(thd);
-
- /* TODO: use provided savepoint data area to store savepoint data */
-
- longlong2str((ulint)savepoint, name, 36);
-
- error = (int) trx_release_savepoint_for_mysql(trx, name);
-
- DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
-}
-
-/*********************************************************************
-Sets a transaction savepoint. */
-static
-int
-innobase_savepoint(
-/*===============*/
- /* out: always 0, that is, always succeeds */
- handlerton* hton, /* in: handle to the Innodb handlerton */
- THD* thd, /* in: handle to the MySQL thread */
- void* savepoint) /* in: savepoint data */
-{
- int error = 0;
- trx_t* trx;
-
- DBUG_ENTER("innobase_savepoint");
-
- /*
- In the autocommit mode there is no sense to set a savepoint
- (unless we are in sub-statement), so SQL layer ensures that
- this method is never called in such situation.
- */
-#ifdef MYSQL_SERVER /* plugins cannot access thd->in_sub_stmt */
- DBUG_ASSERT(thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN) ||
- thd->in_sub_stmt);
-#endif /* MYSQL_SERVER */
-
- trx = check_trx_exists(thd);
-
- /* Release a possible FIFO ticket and search latch. Since we will
- reserve the kernel mutex, we have to release the search system latch
- first to obey the latching order. */
-
- innobase_release_stat_resources(trx);
-
- /* cannot happen outside of transaction */
- DBUG_ASSERT(trx->active_trans);
-
- /* TODO: use provided savepoint data area to store savepoint data */
- char name[64];
- longlong2str((ulint)savepoint,name,36);
-
- error = (int) trx_savepoint_for_mysql(trx, name, (ib_longlong)0);
-
- DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
-}
-
-/*********************************************************************
-Frees a possible InnoDB trx object associated with the current THD. */
-static
-int
-innobase_close_connection(
-/*======================*/
- /* out: 0 or error number */
- handlerton* hton, /* in: innobase handlerton */
- THD* thd) /* in: handle to the MySQL thread of the user
- whose resources should be free'd */
-{
- trx_t* trx;
-
- DBUG_ENTER("innobase_close_connection");
- DBUG_ASSERT(hton == innodb_hton_ptr);
- trx = thd_to_trx(thd);
-
- ut_a(trx);
-
- if (trx->active_trans == 0
- && trx->conc_state != TRX_NOT_STARTED) {
-
- sql_print_error("trx->active_trans == 0, but"
- " trx->conc_state != TRX_NOT_STARTED");
- }
-
-
- if (trx->conc_state != TRX_NOT_STARTED &&
- global_system_variables.log_warnings) {
- sql_print_warning(
- "MySQL is closing a connection that has an active "
- "InnoDB transaction. %lu row modifications will "
- "roll back.",
- (ulong) trx->undo_no.low);
- }
-
- innobase_rollback_trx(trx);
-
- thr_local_free(trx->mysql_thread_id);
- trx_free_for_mysql(trx);
-
- DBUG_RETURN(0);
-}
-
-
-/*****************************************************************************
-** InnoDB database tables
-*****************************************************************************/
-
-/********************************************************************
-Get the record format from the data dictionary. */
-enum row_type
-ha_innobase::get_row_type() const
-/*=============================*/
- /* out: ROW_TYPE_REDUNDANT or ROW_TYPE_COMPACT */
-{
- if (prebuilt && prebuilt->table) {
- if (dict_table_is_comp_noninline(prebuilt->table)) {
- return(ROW_TYPE_COMPACT);
- } else {
- return(ROW_TYPE_REDUNDANT);
- }
- }
- ut_ad(0);
- return(ROW_TYPE_NOT_USED);
-}
-
-
-
-/********************************************************************
-Get the table flags to use for the statement. */
-handler::Table_flags
-ha_innobase::table_flags() const
-{
- /* Need to use tx_isolation here since table flags is (also)
- called before prebuilt is inited. */
- ulong const tx_isolation = thd_tx_isolation(current_thd);
- if (tx_isolation <= ISO_READ_COMMITTED)
- return int_table_flags;
- return int_table_flags | HA_BINLOG_STMT_CAPABLE;
-}
-
-/********************************************************************
-Gives the file extension of an InnoDB single-table tablespace. */
-static const char* ha_innobase_exts[] = {
- ".ibd",
- NullS
-};
-
-const char**
-ha_innobase::bas_ext() const
-/*========================*/
- /* out: file extension string */
-{
- return ha_innobase_exts;
-}
-
-
-/*********************************************************************
-Normalizes a table name string. A normalized name consists of the
-database name catenated to '/' and table name. An example:
-test/mytable. On Windows normalization puts both the database name and the
-table name always to lower case. */
-static
-void
-normalize_table_name(
-/*=================*/
- char* norm_name, /* out: normalized name as a
- null-terminated string */
- const char* name) /* in: table name string */
-{
- char* name_ptr;
- char* db_ptr;
- char* ptr;
-
- /* Scan name from the end */
-
- ptr = strend(name)-1;
-
- while (ptr >= name && *ptr != '\\' && *ptr != '/') {
- ptr--;
- }
-
- name_ptr = ptr + 1;
-
- DBUG_ASSERT(ptr > name);
-
- ptr--;
-
- while (ptr >= name && *ptr != '\\' && *ptr != '/') {
- ptr--;
- }
-
- db_ptr = ptr + 1;
-
- memcpy(norm_name, db_ptr, strlen(name) + 1 - (db_ptr - name));
-
- norm_name[name_ptr - db_ptr - 1] = '/';
-
-#ifdef __WIN__
- innobase_casedn_str(norm_name);
-#endif
-}
-
-/************************************************************************
-Set the autoinc column max value. This should only be called once from
-ha_innobase::open(). Therefore there's no need for a covering lock. */
-
-ulong
-ha_innobase::innobase_initialize_autoinc()
-/*======================================*/
-{
- dict_index_t* index;
- ulonglong auto_inc;
- const char* col_name;
- ulint error = DB_SUCCESS;
- dict_table_t* innodb_table = prebuilt->table;
-
- col_name = table->found_next_number_field->field_name;
- index = innobase_get_index(table->s->next_number_index);
-
- /* Execute SELECT MAX(col_name) FROM TABLE; */
- error = row_search_max_autoinc(index, col_name, &auto_inc);
-
- if (error == DB_SUCCESS) {
-
- /* At the this stage we dont' know the increment
- or the offset, so use default inrement of 1. */
- ++auto_inc;
-
- dict_table_autoinc_initialize(innodb_table, auto_inc);
-
- } else {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: (%lu) Couldn't read "
- "the MAX(%s) autoinc value from the "
- "index (%s).\n", error, col_name, index->name);
- }
-
- return(ulong(error));
-}
-
-/*********************************************************************
-Creates and opens a handle to a table which already exists in an InnoDB
-database. */
-
-int
-ha_innobase::open(
-/*==============*/
- /* out: 1 if error, 0 if success */
- const char* name, /* in: table name */
- int mode, /* in: not used */
- uint test_if_locked) /* in: not used */
-{
- dict_table_t* ib_table;
- char norm_name[1000];
- THD* thd;
- ulint retries = 0;
- char* is_part = NULL;
-
- DBUG_ENTER("ha_innobase::open");
-
- UT_NOT_USED(mode);
- UT_NOT_USED(test_if_locked);
-
- thd = ha_thd();
-
- /* Under some cases MySQL seems to call this function while
- holding btr_search_latch. This breaks the latching order as
- we acquire dict_sys->mutex below and leads to a deadlock. */
- if (thd != NULL) {
- innobase_release_temporary_latches(ht, thd);
- }
-
- normalize_table_name(norm_name, name);
-
- user_thd = NULL;
-
- if (!(share=get_share(name))) {
-
- DBUG_RETURN(1);
- }
-
- /* Create buffers for packing the fields of a record. Why
- table->reclength did not work here? Obviously, because char
- fields when packed actually became 1 byte longer, when we also
- stored the string length as the first byte. */
-
- upd_and_key_val_buff_len =
- table->s->reclength + table->s->max_key_length
- + MAX_REF_PARTS * 3;
- if (!(uchar*) my_multi_malloc(MYF(MY_WME),
- &upd_buff, upd_and_key_val_buff_len,
- &key_val_buff, upd_and_key_val_buff_len,
- NullS)) {
- free_share(share);
-
- DBUG_RETURN(1);
- }
-
- /* We look for pattern #P# to see if the table is partitioned
- MySQL table. The retry logic for partitioned tables is a
- workaround for http://bugs.mysql.com/bug.php?id=33349. Look
- at support issue https://support.mysql.com/view.php?id=21080
- for more details. */
- is_part = strstr(norm_name, "#P#");
-retry:
- /* Get pointer to a table object in InnoDB dictionary cache */
- ib_table = dict_table_get(norm_name, TRUE);
-
- if (NULL == ib_table) {
- if (is_part && retries < 10) {
- ++retries;
- os_thread_sleep(100000);
- goto retry;
- }
-
- if (is_part) {
- sql_print_error("Failed to open table %s after "
- "%lu attemtps.\n", norm_name,
- retries);
- }
-
- sql_print_error("Cannot find or open table %s from\n"
- "the internal data dictionary of InnoDB "
- "though the .frm file for the\n"
- "table exists. Maybe you have deleted and "
- "recreated InnoDB data\n"
- "files but have forgotten to delete the "
- "corresponding .frm files\n"
- "of InnoDB tables, or you have moved .frm "
- "files to another database?\n"
- "or, the table contains indexes that this "
- "version of the engine\n"
- "doesn't support.\n"
- "See http://dev.mysql.com/doc/refman/5.1/en/innodb-troubleshooting.html\n"
- "how you can resolve the problem.\n",
- norm_name);
- free_share(share);
- my_free(upd_buff, MYF(0));
- my_errno = ENOENT;
-
- DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
- }
-
- if (ib_table->ibd_file_missing && !thd_tablespace_op(thd)) {
- sql_print_error("MySQL is trying to open a table handle but "
- "the .ibd file for\ntable %s does not exist.\n"
- "Have you deleted the .ibd file from the "
- "database directory under\nthe MySQL datadir, "
- "or have you used DISCARD TABLESPACE?\n"
- "See http://dev.mysql.com/doc/refman/5.1/en/innodb-troubleshooting.html\n"
- "how you can resolve the problem.\n",
- norm_name);
- free_share(share);
- my_free(upd_buff, MYF(0));
- my_errno = ENOENT;
-
- dict_table_decrement_handle_count(ib_table);
- DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
- }
-
- prebuilt = row_create_prebuilt(ib_table);
-
- prebuilt->mysql_row_len = table->s->reclength;
- prebuilt->default_rec = table->s->default_values;
- ut_ad(prebuilt->default_rec);
-
- /* Looks like MySQL-3.23 sometimes has primary key number != 0 */
-
- primary_key = table->s->primary_key;
- key_used_on_scan = primary_key;
-
- /* Allocate a buffer for a 'row reference'. A row reference is
- a string of bytes of length ref_length which uniquely specifies
- a row in our table. Note that MySQL may also compare two row
- references for equality by doing a simple memcmp on the strings
- of length ref_length! */
-
- if (!row_table_got_default_clust_index(ib_table)) {
- if (primary_key >= MAX_KEY) {
- sql_print_error("Table %s has a primary key in InnoDB data "
- "dictionary, but not in MySQL!", name);
- }
-
- prebuilt->clust_index_was_generated = FALSE;
-
- /* MySQL allocates the buffer for ref. key_info->key_length
- includes space for all key columns + one byte for each column
- that may be NULL. ref_length must be as exact as possible to
- save space, because all row reference buffers are allocated
- based on ref_length. */
-
- ref_length = table->key_info[primary_key].key_length;
- } else {
- if (primary_key != MAX_KEY) {
- sql_print_error("Table %s has no primary key in InnoDB data "
- "dictionary, but has one in MySQL! If you "
- "created the table with a MySQL version < "
- "3.23.54 and did not define a primary key, "
- "but defined a unique key with all non-NULL "
- "columns, then MySQL internally treats that "
- "key as the primary key. You can fix this "
- "error by dump + DROP + CREATE + reimport "
- "of the table.", name);
- }
-
- prebuilt->clust_index_was_generated = TRUE;
-
- ref_length = DATA_ROW_ID_LEN;
-
- /* If we automatically created the clustered index, then
- MySQL does not know about it, and MySQL must NOT be aware
- of the index used on scan, to make it avoid checking if we
- update the column of the index. That is why we assert below
- that key_used_on_scan is the undefined value MAX_KEY.
- The column is the row id in the automatical generation case,
- and it will never be updated anyway. */
-
- if (key_used_on_scan != MAX_KEY) {
- sql_print_warning(
- "Table %s key_used_on_scan is %lu even "
- "though there is no primary key inside "
- "InnoDB.", name, (ulong) key_used_on_scan);
- }
- }
-
- stats.block_size = 16 * 1024; /* Index block size in InnoDB: used by MySQL
- in query optimization */
-
- /* Init table lock structure */
- thr_lock_data_init(&share->lock,&lock,(void*) 0);
-
- info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
-
- /* Only if the table has an AUTOINC column. */
- if (prebuilt->table != NULL && table->found_next_number_field != NULL) {
- ulint error;
-
- dict_table_autoinc_lock(prebuilt->table);
-
- /* Since a table can already be "open" in InnoDB's internal
- data dictionary, we only init the autoinc counter once, the
- first time the table is loaded. We can safely reuse the
- autoinc value from a previous MySQL open. */
- if (dict_table_autoinc_read(prebuilt->table) == 0) {
-
- error = innobase_initialize_autoinc();
- /* Should always succeed! */
- ut_a(error == DB_SUCCESS);
- }
-
- dict_table_autoinc_unlock(prebuilt->table);
- }
-
- DBUG_RETURN(0);
-}
-
-uint
-ha_innobase::max_supported_key_part_length() const
-{
- return(DICT_MAX_INDEX_COL_LEN - 1);
-}
-
-/**********************************************************************
-Closes a handle to an InnoDB table. */
-
-int
-ha_innobase::close(void)
-/*====================*/
- /* out: 0 */
-{
- THD* thd;
-
- DBUG_ENTER("ha_innobase::close");
-
- thd = current_thd; // avoid calling current_thd twice, it may be slow
- if (thd != NULL) {
- innobase_release_temporary_latches(ht, thd);
- }
-
- row_prebuilt_free(prebuilt);
-
- my_free(upd_buff, MYF(0));
- free_share(share);
-
- /* Tell InnoDB server that there might be work for
- utility threads: */
-
- srv_active_wake_master_thread();
-
- DBUG_RETURN(0);
-}
-
-/* The following accessor functions should really be inside MySQL code! */
-
-/******************************************************************
-Gets field offset for a field in a table. */
-inline
-uint
-get_field_offset(
-/*=============*/
- /* out: offset */
- TABLE* table, /* in: MySQL table object */
- Field* field) /* in: MySQL field object */
-{
- return((uint) (field->ptr - table->record[0]));
-}
-
-/******************************************************************
-Checks if a field in a record is SQL NULL. Uses the record format
-information in table to track the null bit in record. */
-static inline
-uint
-field_in_record_is_null(
-/*====================*/
- /* out: 1 if NULL, 0 otherwise */
- TABLE* table, /* in: MySQL table object */
- Field* field, /* in: MySQL field object */
- char* record) /* in: a row in MySQL format */
-{
- int null_offset;
-
- if (!field->null_ptr) {
-
- return(0);
- }
-
- null_offset = (uint) ((char*) field->null_ptr
- - (char*) table->record[0]);
-
- if (record[null_offset] & field->null_bit) {
-
- return(1);
- }
-
- return(0);
-}
-
-/******************************************************************
-Sets a field in a record to SQL NULL. Uses the record format
-information in table to track the null bit in record. */
-inline
-void
-set_field_in_record_to_null(
-/*========================*/
- TABLE* table, /* in: MySQL table object */
- Field* field, /* in: MySQL field object */
- char* record) /* in: a row in MySQL format */
-{
- int null_offset;
-
- null_offset = (uint) ((char*) field->null_ptr
- - (char*) table->record[0]);
-
- record[null_offset] = record[null_offset] | field->null_bit;
-}
-
-extern "C" {
-/*****************************************************************
-InnoDB uses this function to compare two data fields for which the data type
-is such that we must use MySQL code to compare them. NOTE that the prototype
-of this function is in rem0cmp.c in InnoDB source code! If you change this
-function, remember to update the prototype there! */
-
-int
-innobase_mysql_cmp(
-/*===============*/
- /* out: 1, 0, -1, if a is greater,
- equal, less than b, respectively */
- int mysql_type, /* in: MySQL type */
- uint charset_number, /* in: number of the charset */
- unsigned char* a, /* in: data field */
- unsigned int a_length, /* in: data field length,
- not UNIV_SQL_NULL */
- unsigned char* b, /* in: data field */
- unsigned int b_length) /* in: data field length,
- not UNIV_SQL_NULL */
-{
- CHARSET_INFO* charset;
- enum_field_types mysql_tp;
- int ret;
-
- DBUG_ASSERT(a_length != UNIV_SQL_NULL);
- DBUG_ASSERT(b_length != UNIV_SQL_NULL);
-
- mysql_tp = (enum_field_types) mysql_type;
-
- switch (mysql_tp) {
-
- case MYSQL_TYPE_BIT:
- case MYSQL_TYPE_STRING:
- case MYSQL_TYPE_VAR_STRING:
- case MYSQL_TYPE_TINY_BLOB:
- case MYSQL_TYPE_MEDIUM_BLOB:
- case MYSQL_TYPE_BLOB:
- case MYSQL_TYPE_LONG_BLOB:
- case MYSQL_TYPE_VARCHAR:
- /* Use the charset number to pick the right charset struct for
- the comparison. Since the MySQL function get_charset may be
- slow before Bar removes the mutex operation there, we first
- look at 2 common charsets directly. */
-
- if (charset_number == default_charset_info->number) {
- charset = default_charset_info;
- } else if (charset_number == my_charset_latin1.number) {
- charset = &my_charset_latin1;
- } else {
- charset = get_charset(charset_number, MYF(MY_WME));
-
- if (charset == NULL) {
- sql_print_error("InnoDB needs charset %lu for doing "
- "a comparison, but MySQL cannot "
- "find that charset.",
- (ulong) charset_number);
- ut_a(0);
- }
- }
-
- /* Starting from 4.1.3, we use strnncollsp() in comparisons of
- non-latin1_swedish_ci strings. NOTE that the collation order
- changes then: 'b\0\0...' is ordered BEFORE 'b ...'. Users
- having indexes on such data need to rebuild their tables! */
-
- ret = charset->coll->strnncollsp(charset,
- a, a_length,
- b, b_length, 0);
- if (ret < 0) {
- return(-1);
- } else if (ret > 0) {
- return(1);
- } else {
- return(0);
- }
- default:
- assert(0);
- }
-
- return(0);
-}
-}
-
-/******************************************************************
-Converts a MySQL type to an InnoDB type. Note that this function returns
-the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1
-VARCHAR and the new true VARCHAR in >= 5.0.3 by the 'prtype'. */
-inline
-ulint
-get_innobase_type_from_mysql_type(
-/*==============================*/
- /* out: DATA_BINARY, DATA_VARCHAR, ... */
- ulint* unsigned_flag, /* out: DATA_UNSIGNED if an 'unsigned type';
- at least ENUM and SET, and unsigned integer
- types are 'unsigned types' */
- Field* field) /* in: MySQL field */
-{
- /* The following asserts try to check that the MySQL type code fits in
- 8 bits: this is used in ibuf and also when DATA_NOT_NULL is ORed to
- the type */
-
- DBUG_ASSERT((ulint)MYSQL_TYPE_STRING < 256);
- DBUG_ASSERT((ulint)MYSQL_TYPE_VAR_STRING < 256);
- DBUG_ASSERT((ulint)MYSQL_TYPE_DOUBLE < 256);
- DBUG_ASSERT((ulint)MYSQL_TYPE_FLOAT < 256);
- DBUG_ASSERT((ulint)MYSQL_TYPE_DECIMAL < 256);
-
- if (field->flags & UNSIGNED_FLAG) {
-
- *unsigned_flag = DATA_UNSIGNED;
- } else {
- *unsigned_flag = 0;
- }
-
- if (field->real_type() == MYSQL_TYPE_ENUM
- || field->real_type() == MYSQL_TYPE_SET) {
-
- /* MySQL has field->type() a string type for these, but the
- data is actually internally stored as an unsigned integer
- code! */
-
- *unsigned_flag = DATA_UNSIGNED; /* MySQL has its own unsigned
- flag set to zero, even though
- internally this is an unsigned
- integer type */
- return(DATA_INT);
- }
-
- switch (field->type()) {
- /* NOTE that we only allow string types in DATA_MYSQL and
- DATA_VARMYSQL */
- case MYSQL_TYPE_VAR_STRING: /* old <= 4.1 VARCHAR */
- case MYSQL_TYPE_VARCHAR: /* new >= 5.0.3 true VARCHAR */
- if (field->binary()) {
- return(DATA_BINARY);
- } else if (strcmp(
- field->charset()->name,
- "latin1_swedish_ci") == 0) {
- return(DATA_VARCHAR);
- } else {
- return(DATA_VARMYSQL);
- }
- case MYSQL_TYPE_BIT:
- case MYSQL_TYPE_STRING: if (field->binary()) {
-
- return(DATA_FIXBINARY);
- } else if (strcmp(
- field->charset()->name,
- "latin1_swedish_ci") == 0) {
- return(DATA_CHAR);
- } else {
- return(DATA_MYSQL);
- }
- case MYSQL_TYPE_NEWDECIMAL:
- return(DATA_FIXBINARY);
- case MYSQL_TYPE_LONG:
- case MYSQL_TYPE_LONGLONG:
- case MYSQL_TYPE_TINY:
- case MYSQL_TYPE_SHORT:
- case MYSQL_TYPE_INT24:
- case MYSQL_TYPE_DATE:
- case MYSQL_TYPE_DATETIME:
- case MYSQL_TYPE_YEAR:
- case MYSQL_TYPE_NEWDATE:
- case MYSQL_TYPE_TIME:
- case MYSQL_TYPE_TIMESTAMP:
- return(DATA_INT);
- case MYSQL_TYPE_FLOAT:
- return(DATA_FLOAT);
- case MYSQL_TYPE_DOUBLE:
- return(DATA_DOUBLE);
- case MYSQL_TYPE_DECIMAL:
- return(DATA_DECIMAL);
- case MYSQL_TYPE_GEOMETRY:
- case MYSQL_TYPE_TINY_BLOB:
- case MYSQL_TYPE_MEDIUM_BLOB:
- case MYSQL_TYPE_BLOB:
- case MYSQL_TYPE_LONG_BLOB:
- return(DATA_BLOB);
- default:
- assert(0);
- }
-
- return(0);
-}
-
-/***********************************************************************
-Writes an unsigned integer value < 64k to 2 bytes, in the little-endian
-storage format. */
-inline
-void
-innobase_write_to_2_little_endian(
-/*==============================*/
- byte* buf, /* in: where to store */
- ulint val) /* in: value to write, must be < 64k */
-{
- ut_a(val < 256 * 256);
-
- buf[0] = (byte)(val & 0xFF);
- buf[1] = (byte)(val / 256);
-}
-
-/***********************************************************************
-Reads an unsigned integer value < 64k from 2 bytes, in the little-endian
-storage format. */
-inline
-uint
-innobase_read_from_2_little_endian(
-/*===============================*/
- /* out: value */
- const uchar* buf) /* in: from where to read */
-{
- return (uint) ((ulint)(buf[0]) + 256 * ((ulint)(buf[1])));
-}
-
-/***********************************************************************
-Stores a key value for a row to a buffer. */
-
-uint
-ha_innobase::store_key_val_for_row(
-/*===============================*/
- /* out: key value length as stored in buff */
- uint keynr, /* in: key number */
- char* buff, /* in/out: buffer for the key value (in MySQL
- format) */
- uint buff_len,/* in: buffer length */
- const uchar* record)/* in: row in MySQL format */
-{
- KEY* key_info = table->key_info + keynr;
- KEY_PART_INFO* key_part = key_info->key_part;
- KEY_PART_INFO* end = key_part + key_info->key_parts;
- char* buff_start = buff;
- enum_field_types mysql_type;
- Field* field;
- ibool is_null;
-
- DBUG_ENTER("store_key_val_for_row");
-
- /* The format for storing a key field in MySQL is the following:
-
- 1. If the column can be NULL, then in the first byte we put 1 if the
- field value is NULL, 0 otherwise.
-
- 2. If the column is of a BLOB type (it must be a column prefix field
- in this case), then we put the length of the data in the field to the
- next 2 bytes, in the little-endian format. If the field is SQL NULL,
- then these 2 bytes are set to 0. Note that the length of data in the
- field is <= column prefix length.
-
- 3. In a column prefix field, prefix_len next bytes are reserved for
- data. In a normal field the max field length next bytes are reserved
- for data. For a VARCHAR(n) the max field length is n. If the stored
- value is the SQL NULL then these data bytes are set to 0.
-
- 4. We always use a 2 byte length for a true >= 5.0.3 VARCHAR. Note that
- in the MySQL row format, the length is stored in 1 or 2 bytes,
- depending on the maximum allowed length. But in the MySQL key value
- format, the length always takes 2 bytes.
-
- We have to zero-fill the buffer so that MySQL is able to use a
- simple memcmp to compare two key values to determine if they are
- equal. MySQL does this to compare contents of two 'ref' values. */
-
- bzero(buff, buff_len);
-
- for (; key_part != end; key_part++) {
- is_null = FALSE;
-
- if (key_part->null_bit) {
- if (record[key_part->null_offset]
- & key_part->null_bit) {
- *buff = 1;
- is_null = TRUE;
- } else {
- *buff = 0;
- }
- buff++;
- }
-
- field = key_part->field;
- mysql_type = field->type();
-
- if (mysql_type == MYSQL_TYPE_VARCHAR) {
- /* >= 5.0.3 true VARCHAR */
- ulint lenlen;
- ulint len;
- byte* data;
- ulint key_len;
- ulint true_len;
- CHARSET_INFO* cs;
- int error=0;
-
- key_len = key_part->length;
-
- if (is_null) {
- buff += key_len + 2;
-
- continue;
- }
- cs = field->charset();
-
- lenlen = (ulint)
- (((Field_varstring*)field)->length_bytes);
-
- data = row_mysql_read_true_varchar(&len,
- (byte*) (record
- + (ulint)get_field_offset(table, field)),
- lenlen);
-
- true_len = len;
-
- /* For multi byte character sets we need to calculate
- the true length of the key */
-
- if (len > 0 && cs->mbmaxlen > 1) {
- true_len = (ulint) cs->cset->well_formed_len(cs,
- (const char *) data,
- (const char *) data + len,
- (uint) (key_len /
- cs->mbmaxlen),
- &error);
- }
-
- /* In a column prefix index, we may need to truncate
- the stored value: */
-
- if (true_len > key_len) {
- true_len = key_len;
- }
-
- /* The length in a key value is always stored in 2
- bytes */
-
- row_mysql_store_true_var_len((byte*)buff, true_len, 2);
- buff += 2;
-
- memcpy(buff, data, true_len);
-
- /* Note that we always reserve the maximum possible
- length of the true VARCHAR in the key value, though
- only len first bytes after the 2 length bytes contain
- actual data. The rest of the space was reset to zero
- in the bzero() call above. */
-
- buff += key_len;
-
- } else if (mysql_type == MYSQL_TYPE_TINY_BLOB
- || mysql_type == MYSQL_TYPE_MEDIUM_BLOB
- || mysql_type == MYSQL_TYPE_BLOB
- || mysql_type == MYSQL_TYPE_LONG_BLOB) {
-
- CHARSET_INFO* cs;
- ulint key_len;
- ulint true_len;
- int error=0;
- ulint blob_len;
- byte* blob_data;
-
- ut_a(key_part->key_part_flag & HA_PART_KEY_SEG);
-
- key_len = key_part->length;
-
- if (is_null) {
- buff += key_len + 2;
-
- continue;
- }
-
- cs = field->charset();
-
- blob_data = row_mysql_read_blob_ref(&blob_len,
- (byte*) (record
- + (ulint)get_field_offset(table, field)),
- (ulint) field->pack_length());
-
- true_len = blob_len;
-
- ut_a(get_field_offset(table, field)
- == key_part->offset);
-
- /* For multi byte character sets we need to calculate
- the true length of the key */
-
- if (blob_len > 0 && cs->mbmaxlen > 1) {
- true_len = (ulint) cs->cset->well_formed_len(cs,
- (const char *) blob_data,
- (const char *) blob_data
- + blob_len,
- (uint) (key_len /
- cs->mbmaxlen),
- &error);
- }
-
- /* All indexes on BLOB and TEXT are column prefix
- indexes, and we may need to truncate the data to be
- stored in the key value: */
-
- if (true_len > key_len) {
- true_len = key_len;
- }
-
- /* MySQL reserves 2 bytes for the length and the
- storage of the number is little-endian */
-
- innobase_write_to_2_little_endian(
- (byte*)buff, true_len);
- buff += 2;
-
- memcpy(buff, blob_data, true_len);
-
- /* Note that we always reserve the maximum possible
- length of the BLOB prefix in the key value. */
-
- buff += key_len;
- } else {
- /* Here we handle all other data types except the
- true VARCHAR, BLOB and TEXT. Note that the column
- value we store may be also in a column prefix
- index. */
-
- CHARSET_INFO* cs;
- ulint true_len;
- ulint key_len;
- const uchar* src_start;
- int error=0;
- enum_field_types real_type;
-
- key_len = key_part->length;
-
- if (is_null) {
- buff += key_len;
-
- continue;
- }
-
- src_start = record + key_part->offset;
- real_type = field->real_type();
- true_len = key_len;
-
- /* Character set for the field is defined only
- to fields whose type is string and real field
- type is not enum or set. For these fields check
- if character set is multi byte. */
-
- if (real_type != MYSQL_TYPE_ENUM
- && real_type != MYSQL_TYPE_SET
- && ( mysql_type == MYSQL_TYPE_VAR_STRING
- || mysql_type == MYSQL_TYPE_STRING)) {
-
- cs = field->charset();
-
- /* For multi byte character sets we need to
- calculate the true length of the key */
-
- if (key_len > 0 && cs->mbmaxlen > 1) {
-
- true_len = (ulint)
- cs->cset->well_formed_len(cs,
- (const char *)src_start,
- (const char *)src_start
- + key_len,
- (uint) (key_len /
- cs->mbmaxlen),
- &error);
- }
- }
-
- memcpy(buff, src_start, true_len);
- buff += true_len;
-
- /* Pad the unused space with spaces. Note that no
- padding is ever needed for UCS-2 because in MySQL,
- all UCS2 characters are 2 bytes, as MySQL does not
- support surrogate pairs, which are needed to represent
- characters in the range U+10000 to U+10FFFF. */
-
- if (true_len < key_len) {
- ulint pad_len = key_len - true_len;
- memset(buff, ' ', pad_len);
- buff += pad_len;
- }
- }
- }
-
- ut_a(buff <= buff_start + buff_len);
-
- DBUG_RETURN((uint)(buff - buff_start));
-}
-
-/******************************************************************
-Builds a 'template' to the prebuilt struct. The template is used in fast
-retrieval of just those column values MySQL needs in its processing. */
-static
-void
-build_template(
-/*===========*/
- row_prebuilt_t* prebuilt, /* in/out: prebuilt struct */
- THD* thd, /* in: current user thread, used
- only if templ_type is
- ROW_MYSQL_REC_FIELDS */
- TABLE* table, /* in: MySQL table */
- uint templ_type) /* in: ROW_MYSQL_WHOLE_ROW or
- ROW_MYSQL_REC_FIELDS */
-{
- dict_index_t* index;
- dict_index_t* clust_index;
- mysql_row_templ_t* templ;
- Field* field;
- ulint n_fields;
- ulint n_requested_fields = 0;
- ibool fetch_all_in_key = FALSE;
- ibool fetch_primary_key_cols = FALSE;
- ulint i;
- /* byte offset of the end of last requested column */
- ulint mysql_prefix_len = 0;
-
- if (prebuilt->select_lock_type == LOCK_X) {
- /* We always retrieve the whole clustered index record if we
- use exclusive row level locks, for example, if the read is
- done in an UPDATE statement. */
-
- templ_type = ROW_MYSQL_WHOLE_ROW;
- }
-
- if (templ_type == ROW_MYSQL_REC_FIELDS) {
- if (prebuilt->hint_need_to_fetch_extra_cols
- == ROW_RETRIEVE_ALL_COLS) {
-
- /* We know we must at least fetch all columns in the
- key, or all columns in the table */
-
- if (prebuilt->read_just_key) {
- /* MySQL has instructed us that it is enough
- to fetch the columns in the key; looks like
- MySQL can set this flag also when there is
- only a prefix of the column in the key: in
- that case we retrieve the whole column from
- the clustered index */
-
- fetch_all_in_key = TRUE;
- } else {
- templ_type = ROW_MYSQL_WHOLE_ROW;
- }
- } else if (prebuilt->hint_need_to_fetch_extra_cols
- == ROW_RETRIEVE_PRIMARY_KEY) {
- /* We must at least fetch all primary key cols. Note
- that if the clustered index was internally generated
- by InnoDB on the row id (no primary key was
- defined), then row_search_for_mysql() will always
- retrieve the row id to a special buffer in the
- prebuilt struct. */
-
- fetch_primary_key_cols = TRUE;
- }
- }
-
- clust_index = dict_table_get_first_index_noninline(prebuilt->table);
-
- if (templ_type == ROW_MYSQL_REC_FIELDS) {
- index = prebuilt->index;
- } else {
- index = clust_index;
- }
-
- if (index == clust_index) {
- prebuilt->need_to_access_clustered = TRUE;
- } else {
- prebuilt->need_to_access_clustered = FALSE;
- /* Below we check column by column if we need to access
- the clustered index */
- }
-
- n_fields = (ulint)table->s->fields; /* number of columns */
-
- if (!prebuilt->mysql_template) {
- prebuilt->mysql_template = (mysql_row_templ_t*)
- mem_alloc_noninline(
- n_fields * sizeof(mysql_row_templ_t));
- }
-
- prebuilt->template_type = templ_type;
- prebuilt->null_bitmap_len = table->s->null_bytes;
-
- prebuilt->templ_contains_blob = FALSE;
-
- /* Note that in InnoDB, i is the column number. MySQL calls columns
- 'fields'. */
- for (i = 0; i < n_fields; i++) {
- templ = prebuilt->mysql_template + n_requested_fields;
- field = table->field[i];
-
- if (UNIV_LIKELY(templ_type == ROW_MYSQL_REC_FIELDS)) {
- /* Decide which columns we should fetch
- and which we can skip. */
- register const ibool index_contains_field =
- dict_index_contains_col_or_prefix(index, i);
-
- if (!index_contains_field && prebuilt->read_just_key) {
- /* If this is a 'key read', we do not need
- columns that are not in the key */
-
- goto skip_field;
- }
-
- if (index_contains_field && fetch_all_in_key) {
- /* This field is needed in the query */
-
- goto include_field;
- }
-
- if (bitmap_is_set(table->read_set, i) ||
- bitmap_is_set(table->write_set, i)) {
- /* This field is needed in the query */
-
- goto include_field;
- }
-
- if (fetch_primary_key_cols
- && dict_table_col_in_clustered_key(
- index->table, i)) {
- /* This field is needed in the query */
-
- goto include_field;
- }
-
- /* This field is not needed in the query, skip it */
-
- goto skip_field;
- }
-include_field:
- n_requested_fields++;
-
- templ->col_no = i;
-
- if (index == clust_index) {
- templ->rec_field_no = dict_col_get_clust_pos_noninline(
- &index->table->cols[i], index);
- } else {
- templ->rec_field_no = dict_index_get_nth_col_pos(
- index, i);
- }
-
- if (templ->rec_field_no == ULINT_UNDEFINED) {
- prebuilt->need_to_access_clustered = TRUE;
- }
-
- if (field->null_ptr) {
- templ->mysql_null_byte_offset =
- (ulint) ((char*) field->null_ptr
- - (char*) table->record[0]);
-
- templ->mysql_null_bit_mask = (ulint) field->null_bit;
- } else {
- templ->mysql_null_bit_mask = 0;
- }
-
- templ->mysql_col_offset = (ulint)
- get_field_offset(table, field);
-
- templ->mysql_col_len = (ulint) field->pack_length();
- if (mysql_prefix_len < templ->mysql_col_offset
- + templ->mysql_col_len) {
- mysql_prefix_len = templ->mysql_col_offset
- + templ->mysql_col_len;
- }
- templ->type = index->table->cols[i].mtype;
- templ->mysql_type = (ulint)field->type();
-
- if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) {
- templ->mysql_length_bytes = (ulint)
- (((Field_varstring*)field)->length_bytes);
- }
-
- templ->charset = dtype_get_charset_coll_noninline(
- index->table->cols[i].prtype);
- templ->mbminlen = index->table->cols[i].mbminlen;
- templ->mbmaxlen = index->table->cols[i].mbmaxlen;
- templ->is_unsigned = index->table->cols[i].prtype
- & DATA_UNSIGNED;
- if (templ->type == DATA_BLOB) {
- prebuilt->templ_contains_blob = TRUE;
- }
-skip_field:
- ;
- }
-
- prebuilt->n_template = n_requested_fields;
- prebuilt->mysql_prefix_len = mysql_prefix_len;
-
- if (index != clust_index && prebuilt->need_to_access_clustered) {
- /* Change rec_field_no's to correspond to the clustered index
- record */
- for (i = 0; i < n_requested_fields; i++) {
- templ = prebuilt->mysql_template + i;
-
- templ->rec_field_no = dict_col_get_clust_pos_noninline(
- &index->table->cols[templ->col_no],
- clust_index);
- }
- }
-}
-
-/************************************************************************
-Get the upper limit of the MySQL integral and floating-point type. */
-
-ulonglong
-ha_innobase::innobase_get_int_col_max_value(
-/*========================================*/
- const Field* field)
-{
- ulonglong max_value = 0;
-
- switch(field->key_type()) {
- /* TINY */
- case HA_KEYTYPE_BINARY:
- max_value = 0xFFULL;
- break;
- case HA_KEYTYPE_INT8:
- max_value = 0x7FULL;
- break;
- /* SHORT */
- case HA_KEYTYPE_USHORT_INT:
- max_value = 0xFFFFULL;
- break;
- case HA_KEYTYPE_SHORT_INT:
- max_value = 0x7FFFULL;
- break;
- /* MEDIUM */
- case HA_KEYTYPE_UINT24:
- max_value = 0xFFFFFFULL;
- break;
- case HA_KEYTYPE_INT24:
- max_value = 0x7FFFFFULL;
- break;
- /* LONG */
- case HA_KEYTYPE_ULONG_INT:
- max_value = 0xFFFFFFFFULL;
- break;
- case HA_KEYTYPE_LONG_INT:
- max_value = 0x7FFFFFFFULL;
- break;
- /* BIG */
- case HA_KEYTYPE_ULONGLONG:
- max_value = 0xFFFFFFFFFFFFFFFFULL;
- break;
- case HA_KEYTYPE_LONGLONG:
- max_value = 0x7FFFFFFFFFFFFFFFULL;
- break;
- case HA_KEYTYPE_FLOAT:
- /* We use the maximum as per IEEE754-2008 standard, 2^24 */
- max_value = 0x1000000ULL;
- break;
- case HA_KEYTYPE_DOUBLE:
- /* We use the maximum as per IEEE754-2008 standard, 2^53 */
- max_value = 0x20000000000000ULL;
- break;
- default:
- ut_error;
- }
-
- return(max_value);
-}
-
-/************************************************************************
-This special handling is really to overcome the limitations of MySQL's
-binlogging. We need to eliminate the non-determinism that will arise in
-INSERT ... SELECT type of statements, since MySQL binlog only stores the
-min value of the autoinc interval. Once that is fixed we can get rid of
-the special lock handling.*/
-
-ulong
-ha_innobase::innobase_lock_autoinc(void)
-/*====================================*/
- /* out: DB_SUCCESS if all OK else
- error code */
-{
- ulint error = DB_SUCCESS;
-
- switch (innobase_autoinc_lock_mode) {
- case AUTOINC_NO_LOCKING:
- /* Acquire only the AUTOINC mutex. */
- dict_table_autoinc_lock(prebuilt->table);
- break;
-
- case AUTOINC_NEW_STYLE_LOCKING:
- /* For simple (single/multi) row INSERTs, we fallback to the
- old style only if another transaction has already acquired
- the AUTOINC lock on behalf of a LOAD FILE or INSERT ... SELECT
- etc. type of statement. */
- if (thd_sql_command(user_thd) == SQLCOM_INSERT
- || thd_sql_command(user_thd) == SQLCOM_REPLACE) {
- dict_table_t* table = prebuilt->table;
-
- /* Acquire the AUTOINC mutex. */
- dict_table_autoinc_lock(table);
-
- /* We need to check that another transaction isn't
- already holding the AUTOINC lock on the table. */
- if (table->n_waiting_or_granted_auto_inc_locks) {
- /* Release the mutex to avoid deadlocks. */
- dict_table_autoinc_unlock(table);
- } else {
- break;
- }
- }
- /* Fall through to old style locking. */
-
- case AUTOINC_OLD_STYLE_LOCKING:
- error = row_lock_table_autoinc_for_mysql(prebuilt);
-
- if (error == DB_SUCCESS) {
-
- /* Acquire the AUTOINC mutex. */
- dict_table_autoinc_lock(prebuilt->table);
- }
- break;
-
- default:
- ut_error;
- }
-
- return(ulong(error));
-}
-
-/************************************************************************
-Reset the autoinc value in the table.*/
-
-ulong
-ha_innobase::innobase_reset_autoinc(
-/*================================*/
- /* out: DB_SUCCESS if all went well
- else error code */
- ulonglong autoinc) /* in: value to store */
-{
- ulint error;
-
- error = innobase_lock_autoinc();
-
- if (error == DB_SUCCESS) {
-
- dict_table_autoinc_initialize(prebuilt->table, autoinc);
-
- dict_table_autoinc_unlock(prebuilt->table);
- }
-
- return(ulong(error));
-}
-
-/************************************************************************
-Store the autoinc value in the table. The autoinc value is only set if
-it's greater than the existing autoinc value in the table.*/
-
-ulong
-ha_innobase::innobase_set_max_autoinc(
-/*==================================*/
- /* out: DB_SUCCES if all went well
- else error code */
- ulonglong auto_inc) /* in: value to store */
-{
- ulint error;
-
- error = innobase_lock_autoinc();
-
- if (error == DB_SUCCESS) {
-
- dict_table_autoinc_update_if_greater(prebuilt->table, auto_inc);
-
- dict_table_autoinc_unlock(prebuilt->table);
- }
-
- return(ulong(error));
-}
-
-/************************************************************************
-Stores a row in an InnoDB database, to the table specified in this
-handle. */
-
-int
-ha_innobase::write_row(
-/*===================*/
- /* out: error code */
- uchar* record) /* in: a row in MySQL format */
-{
- ulint error = 0;
- int error_result= 0;
- ibool auto_inc_used= FALSE;
- ulint sql_command;
- trx_t* trx = thd_to_trx(user_thd);
-
- DBUG_ENTER("ha_innobase::write_row");
-
- if (prebuilt->trx != trx) {
- sql_print_error("The transaction object for the table handle is at "
- "%p, but for the current thread it is at %p",
- prebuilt->trx, trx);
-
- fputs("InnoDB: Dump of 200 bytes around prebuilt: ", stderr);
- ut_print_buf(stderr, ((const byte*)prebuilt) - 100, 200);
- fputs("\n"
- "InnoDB: Dump of 200 bytes around ha_data: ",
- stderr);
- ut_print_buf(stderr, ((const byte*) trx) - 100, 200);
- putc('\n', stderr);
- ut_error;
- }
-
- ha_statistic_increment(&SSV::ha_write_count);
-
- if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT)
- table->timestamp_field->set_time();
-
- sql_command = thd_sql_command(user_thd);
-
- if ((sql_command == SQLCOM_ALTER_TABLE
- || sql_command == SQLCOM_OPTIMIZE
- || sql_command == SQLCOM_CREATE_INDEX
- || sql_command == SQLCOM_DROP_INDEX)
- && num_write_row >= 10000) {
- /* ALTER TABLE is COMMITted at every 10000 copied rows.
- The IX table lock for the original table has to be re-issued.
- As this method will be called on a temporary table where the
- contents of the original table is being copied to, it is
- a bit tricky to determine the source table. The cursor
- position in the source table need not be adjusted after the
- intermediate COMMIT, since writes by other transactions are
- being blocked by a MySQL table lock TL_WRITE_ALLOW_READ. */
-
- dict_table_t* src_table;
- ulint mode;
-
- num_write_row = 0;
-
- /* Commit the transaction. This will release the table
- locks, so they have to be acquired again. */
-
- /* Altering an InnoDB table */
- /* Get the source table. */
- src_table = lock_get_src_table(
- prebuilt->trx, prebuilt->table, &mode);
- if (!src_table) {
-no_commit:
- /* Unknown situation: do not commit */
- /*
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: ALTER TABLE is holding lock"
- " on %lu tables!\n",
- prebuilt->trx->mysql_n_tables_locked);
- */
- ;
- } else if (src_table == prebuilt->table) {
- /* Source table is not in InnoDB format:
- no need to re-acquire locks on it. */
-
- /* Altering to InnoDB format */
- innobase_commit(ht, user_thd, 1);
- /* Note that this transaction is still active. */
- prebuilt->trx->active_trans = 1;
- /* We will need an IX lock on the destination table. */
- prebuilt->sql_stat_start = TRUE;
- } else {
- /* Ensure that there are no other table locks than
- LOCK_IX and LOCK_AUTO_INC on the destination table. */
-
- if (!lock_is_table_exclusive(prebuilt->table,
- prebuilt->trx)) {
- goto no_commit;
- }
-
- /* Commit the transaction. This will release the table
- locks, so they have to be acquired again. */
- innobase_commit(ht, user_thd, 1);
- /* Note that this transaction is still active. */
- prebuilt->trx->active_trans = 1;
- /* Re-acquire the table lock on the source table. */
- row_lock_table_for_mysql(prebuilt, src_table, mode);
- /* We will need an IX lock on the destination table. */
- prebuilt->sql_stat_start = TRUE;
- }
- }
-
- num_write_row++;
-
- /* This is the case where the table has an auto-increment column */
- if (table->next_number_field && record == table->record[0]) {
-
- /* Reset the error code before calling
- innobase_get_auto_increment(). */
- prebuilt->autoinc_error = DB_SUCCESS;
-
- if ((error = update_auto_increment())) {
-
- /* We don't want to mask autoinc overflow errors. */
- if (prebuilt->autoinc_error != DB_SUCCESS) {
- error = (int) prebuilt->autoinc_error;
-
- goto report_error;
- }
-
- /* MySQL errors are passed straight back. */
- error_result = (int) error;
- goto func_exit;
- }
-
- auto_inc_used = TRUE;
- }
-
- if (prebuilt->mysql_template == NULL
- || prebuilt->template_type != ROW_MYSQL_WHOLE_ROW) {
-
- /* Build the template used in converting quickly between
- the two database formats */
-
- build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW);
- }
-
- innodb_srv_conc_enter_innodb(prebuilt->trx);
-
- error = row_insert_for_mysql((byte*) record, prebuilt);
-
- /* Handle duplicate key errors */
- if (auto_inc_used) {
- ulint err;
- ulonglong auto_inc;
- ulonglong col_max_value;
-
- /* Note the number of rows processed for this statement, used
- by get_auto_increment() to determine the number of AUTO-INC
- values to reserve. This is only useful for a mult-value INSERT
- and is a statement level counter.*/
- if (trx->n_autoinc_rows > 0) {
- --trx->n_autoinc_rows;
- }
-
- /* We need the upper limit of the col type to check for
- whether we update the table autoinc counter or not. */
- col_max_value = innobase_get_int_col_max_value(
- table->next_number_field);
-
- /* Get the value that MySQL attempted to store in the table.*/
- auto_inc = table->next_number_field->val_int();
-
- switch (error) {
- case DB_DUPLICATE_KEY:
-
- /* A REPLACE command and LOAD DATA INFILE REPLACE
- handle a duplicate key error themselves, but we
- must update the autoinc counter if we are performing
- those statements. */
-
- switch (sql_command) {
- case SQLCOM_LOAD:
- if ((trx->duplicates
- & (TRX_DUP_IGNORE | TRX_DUP_REPLACE))) {
-
- goto set_max_autoinc;
- }
- break;
-
- case SQLCOM_REPLACE:
- case SQLCOM_INSERT_SELECT:
- case SQLCOM_REPLACE_SELECT:
- goto set_max_autoinc;
- break;
-
- default:
- break;
- }
-
- break;
-
- case DB_SUCCESS:
- /* If the actual value inserted is greater than
- the upper limit of the interval, then we try and
- update the table upper limit. Note: last_value
- will be 0 if get_auto_increment() was not called.*/
-
- if (auto_inc <= col_max_value
- && auto_inc >= prebuilt->autoinc_last_value) {
-set_max_autoinc:
- ut_a(prebuilt->autoinc_increment > 0);
-
- ulonglong need;
- ulonglong offset;
-
- offset = prebuilt->autoinc_offset;
- need = prebuilt->autoinc_increment;
-
- auto_inc = innobase_next_autoinc(
- auto_inc, need, offset, col_max_value);
-
- err = innobase_set_max_autoinc(auto_inc);
-
- if (err != DB_SUCCESS) {
- error = err;
- }
- }
- break;
- }
- }
-
- innodb_srv_conc_exit_innodb(prebuilt->trx);
-
-report_error:
- error_result = convert_error_code_to_mysql((int) error, user_thd);
-
-func_exit:
- innobase_active_small();
-
- DBUG_RETURN(error_result);
-}
-
-/**************************************************************************
-Checks which fields have changed in a row and stores information
-of them to an update vector. */
-static
-int
-calc_row_difference(
-/*================*/
- /* out: error number or 0 */
- upd_t* uvect, /* in/out: update vector */
- uchar* old_row, /* in: old row in MySQL format */
- uchar* new_row, /* in: new row in MySQL format */
- struct st_table* table, /* in: table in MySQL data
- dictionary */
- uchar* upd_buff, /* in: buffer to use */
- ulint buff_len, /* in: buffer length */
- row_prebuilt_t* prebuilt, /* in: InnoDB prebuilt struct */
- THD* thd) /* in: user thread */
-{
- uchar* original_upd_buff = upd_buff;
- Field* field;
- enum_field_types field_mysql_type;
- uint n_fields;
- ulint o_len;
- ulint n_len;
- ulint col_pack_len;
- byte* new_mysql_row_col;
- byte* o_ptr;
- byte* n_ptr;
- byte* buf;
- upd_field_t* ufield;
- ulint col_type;
- ulint n_changed = 0;
- dfield_t dfield;
- dict_index_t* clust_index;
- uint i;
-
- n_fields = table->s->fields;
- clust_index = dict_table_get_first_index_noninline(prebuilt->table);
-
- /* We use upd_buff to convert changed fields */
- buf = (byte*) upd_buff;
-
- for (i = 0; i < n_fields; i++) {
- field = table->field[i];
-
- o_ptr = (byte*) old_row + get_field_offset(table, field);
- n_ptr = (byte*) new_row + get_field_offset(table, field);
-
- /* Use new_mysql_row_col and col_pack_len save the values */
-
- new_mysql_row_col = n_ptr;
- col_pack_len = field->pack_length();
-
- o_len = col_pack_len;
- n_len = col_pack_len;
-
- /* We use o_ptr and n_ptr to dig up the actual data for
- comparison. */
-
- field_mysql_type = field->type();
-
- col_type = prebuilt->table->cols[i].mtype;
-
- switch (col_type) {
-
- case DATA_BLOB:
- o_ptr = row_mysql_read_blob_ref(&o_len, o_ptr, o_len);
- n_ptr = row_mysql_read_blob_ref(&n_len, n_ptr, n_len);
-
- break;
-
- case DATA_VARCHAR:
- case DATA_BINARY:
- case DATA_VARMYSQL:
- if (field_mysql_type == MYSQL_TYPE_VARCHAR) {
- /* This is a >= 5.0.3 type true VARCHAR where
- the real payload data length is stored in
- 1 or 2 bytes */
-
- o_ptr = row_mysql_read_true_varchar(
- &o_len, o_ptr,
- (ulint)
- (((Field_varstring*)field)->length_bytes));
-
- n_ptr = row_mysql_read_true_varchar(
- &n_len, n_ptr,
- (ulint)
- (((Field_varstring*)field)->length_bytes));
- }
-
- break;
- default:
- ;
- }
-
- if (field->null_ptr) {
- if (field_in_record_is_null(table, field,
- (char*) old_row)) {
- o_len = UNIV_SQL_NULL;
- }
-
- if (field_in_record_is_null(table, field,
- (char*) new_row)) {
- n_len = UNIV_SQL_NULL;
- }
- }
-
- if (o_len != n_len || (o_len != UNIV_SQL_NULL &&
- 0 != memcmp(o_ptr, n_ptr, o_len))) {
- /* The field has changed */
-
- ufield = uvect->fields + n_changed;
-
- /* Let us use a dummy dfield to make the conversion
- from the MySQL column format to the InnoDB format */
-
- dict_col_copy_type_noninline(prebuilt->table->cols + i,
- &dfield.type);
-
- if (n_len != UNIV_SQL_NULL) {
- buf = row_mysql_store_col_in_innobase_format(
- &dfield,
- (byte*)buf,
- TRUE,
- new_mysql_row_col,
- col_pack_len,
- dict_table_is_comp_noninline(
- prebuilt->table));
- ufield->new_val.data = dfield.data;
- ufield->new_val.len = dfield.len;
- } else {
- ufield->new_val.data = NULL;
- ufield->new_val.len = UNIV_SQL_NULL;
- }
-
- ufield->exp = NULL;
- ufield->field_no = dict_col_get_clust_pos_noninline(
- &prebuilt->table->cols[i], clust_index);
- n_changed++;
- }
- }
-
- uvect->n_fields = n_changed;
- uvect->info_bits = 0;
-
- ut_a(buf <= (byte*)original_upd_buff + buff_len);
-
- return(0);
-}
-
-/**************************************************************************
-Updates a row given as a parameter to a new value. Note that we are given
-whole rows, not just the fields which are updated: this incurs some
-overhead for CPU when we check which fields are actually updated.
-TODO: currently InnoDB does not prevent the 'Halloween problem':
-in a searched update a single row can get updated several times
-if its index columns are updated! */
-
-int
-ha_innobase::update_row(
-/*====================*/
- /* out: error number or 0 */
- const uchar* old_row, /* in: old row in MySQL format */
- uchar* new_row) /* in: new row in MySQL format */
-{
- upd_t* uvect;
- int error = 0;
- trx_t* trx = thd_to_trx(user_thd);
-
- DBUG_ENTER("ha_innobase::update_row");
-
- ut_a(prebuilt->trx == trx);
-
- ha_statistic_increment(&SSV::ha_update_count);
-
- if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE)
- table->timestamp_field->set_time();
-
- if (prebuilt->upd_node) {
- uvect = prebuilt->upd_node->update;
- } else {
- uvect = row_get_prebuilt_update_vector(prebuilt);
- }
-
- /* Build an update vector from the modified fields in the rows
- (uses upd_buff of the handle) */
-
- calc_row_difference(uvect, (uchar*) old_row, new_row, table,
- upd_buff, (ulint)upd_and_key_val_buff_len,
- prebuilt, user_thd);
-
- /* This is not a delete */
- prebuilt->upd_node->is_delete = FALSE;
-
- assert(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW);
-
- innodb_srv_conc_enter_innodb(trx);
-
- error = row_update_for_mysql((byte*) old_row, prebuilt);
-
- /* We need to do some special AUTOINC handling for the following case:
-
- INSERT INTO t (c1,c2) VALUES(x,y) ON DUPLICATE KEY UPDATE ...
-
- We need to use the AUTOINC counter that was actually used by
- MySQL in the UPDATE statement, which can be different from the
- value used in the INSERT statement.*/
-
- if (error == DB_SUCCESS
- && table->next_number_field
- && new_row == table->record[0]
- && thd_sql_command(user_thd) == SQLCOM_INSERT
- && (trx->duplicates & (TRX_DUP_IGNORE | TRX_DUP_REPLACE))
- == TRX_DUP_IGNORE) {
-
- ulonglong auto_inc;
- ulonglong col_max_value;
-
- auto_inc = table->next_number_field->val_int();
-
- /* We need the upper limit of the col type to check for
- whether we update the table autoinc counter or not. */
- col_max_value = innobase_get_int_col_max_value(
- table->next_number_field);
-
- if (auto_inc <= col_max_value && auto_inc != 0) {
-
- ulonglong need;
- ulonglong offset;
-
- offset = prebuilt->autoinc_offset;
- need = prebuilt->autoinc_increment;
-
- auto_inc = innobase_next_autoinc(
- auto_inc, need, offset, col_max_value);
-
- error = innobase_set_max_autoinc(auto_inc);
- }
- }
-
- innodb_srv_conc_exit_innodb(trx);
-
- error = convert_error_code_to_mysql(error, user_thd);
-
- if (error == 0 /* success */
- && uvect->n_fields == 0 /* no columns were updated */) {
-
- /* This is the same as success, but instructs
- MySQL that the row is not really updated and it
- should not increase the count of updated rows.
- This is fix for http://bugs.mysql.com/29157 */
- error = HA_ERR_RECORD_IS_THE_SAME;
- }
-
- /* Tell InnoDB server that there might be work for
- utility threads: */
-
- innobase_active_small();
-
- DBUG_RETURN(error);
-}
-
-/**************************************************************************
-Deletes a row given as the parameter. */
-
-int
-ha_innobase::delete_row(
-/*====================*/
- /* out: error number or 0 */
- const uchar* record) /* in: a row in MySQL format */
-{
- int error = 0;
- trx_t* trx = thd_to_trx(user_thd);
-
- DBUG_ENTER("ha_innobase::delete_row");
-
- ut_a(prebuilt->trx == trx);
-
- ha_statistic_increment(&SSV::ha_delete_count);
-
- if (!prebuilt->upd_node) {
- row_get_prebuilt_update_vector(prebuilt);
- }
-
- /* This is a delete */
-
- prebuilt->upd_node->is_delete = TRUE;
-
- innodb_srv_conc_enter_innodb(trx);
-
- error = row_update_for_mysql((byte*) record, prebuilt);
-
- innodb_srv_conc_exit_innodb(trx);
-
- error = convert_error_code_to_mysql(error, user_thd);
-
- /* Tell the InnoDB server that there might be work for
- utility threads: */
-
- innobase_active_small();
-
- DBUG_RETURN(error);
-}
-
-/**************************************************************************
-Removes a new lock set on a row, if it was not read optimistically. This can
-be called after a row has been read in the processing of an UPDATE or a DELETE
-query, if the option innodb_locks_unsafe_for_binlog is set. */
-
-void
-ha_innobase::unlock_row(void)
-/*=========================*/
-{
- DBUG_ENTER("ha_innobase::unlock_row");
-
- /* Consistent read does not take any locks, thus there is
- nothing to unlock. */
-
- if (prebuilt->select_lock_type == LOCK_NONE) {
- DBUG_VOID_RETURN;
- }
-
- switch (prebuilt->row_read_type) {
- case ROW_READ_WITH_LOCKS:
- if (!srv_locks_unsafe_for_binlog
- && prebuilt->trx->isolation_level
- != TRX_ISO_READ_COMMITTED) {
- break;
- }
- /* fall through */
- case ROW_READ_TRY_SEMI_CONSISTENT:
- row_unlock_for_mysql(prebuilt, FALSE);
- break;
- case ROW_READ_DID_SEMI_CONSISTENT:
- prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
- break;
- }
-
- DBUG_VOID_RETURN;
-}
-
-/* See handler.h and row0mysql.h for docs on this function. */
-bool
-ha_innobase::was_semi_consistent_read(void)
-/*=======================================*/
-{
- return(prebuilt->row_read_type == ROW_READ_DID_SEMI_CONSISTENT);
-}
-
-/* See handler.h and row0mysql.h for docs on this function. */
-void
-ha_innobase::try_semi_consistent_read(bool yes)
-/*===========================================*/
-{
- ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
-
- /* Row read type is set to semi consistent read if this was
- requested by the MySQL and either innodb_locks_unsafe_for_binlog
- option is used or this session is using READ COMMITTED isolation
- level. */
-
- if (yes
- && (srv_locks_unsafe_for_binlog
- || prebuilt->trx->isolation_level == TRX_ISO_READ_COMMITTED)) {
- prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
- } else {
- prebuilt->row_read_type = ROW_READ_WITH_LOCKS;
- }
-}
-
-/**********************************************************************
-Initializes a handle to use an index. */
-
-int
-ha_innobase::index_init(
-/*====================*/
- /* out: 0 or error number */
- uint keynr, /* in: key (index) number */
- bool sorted) /* in: 1 if result MUST be sorted according to index */
-{
- int error = 0;
- DBUG_ENTER("index_init");
-
- error = change_active_index(keynr);
-
- DBUG_RETURN(error);
-}
-
-/**********************************************************************
-Currently does nothing. */
-
-int
-ha_innobase::index_end(void)
-/*========================*/
-{
- int error = 0;
- DBUG_ENTER("index_end");
- active_index=MAX_KEY;
- DBUG_RETURN(error);
-}
-
-/*************************************************************************
-Converts a search mode flag understood by MySQL to a flag understood
-by InnoDB. */
-inline
-ulint
-convert_search_mode_to_innobase(
-/*============================*/
- enum ha_rkey_function find_flag)
-{
- switch (find_flag) {
- case HA_READ_KEY_EXACT:
- /* this does not require the index to be UNIQUE */
- return(PAGE_CUR_GE);
- case HA_READ_KEY_OR_NEXT:
- return(PAGE_CUR_GE);
- case HA_READ_KEY_OR_PREV:
- return(PAGE_CUR_LE);
- case HA_READ_AFTER_KEY:
- return(PAGE_CUR_G);
- case HA_READ_BEFORE_KEY:
- return(PAGE_CUR_L);
- case HA_READ_PREFIX:
- return(PAGE_CUR_GE);
- case HA_READ_PREFIX_LAST:
- return(PAGE_CUR_LE);
- case HA_READ_PREFIX_LAST_OR_PREV:
- return(PAGE_CUR_LE);
- /* In MySQL-4.0 HA_READ_PREFIX and HA_READ_PREFIX_LAST always
- pass a complete-field prefix of a key value as the search
- tuple. I.e., it is not allowed that the last field would
- just contain n first bytes of the full field value.
- MySQL uses a 'padding' trick to convert LIKE 'abc%'
- type queries so that it can use as a search tuple
- a complete-field-prefix of a key value. Thus, the InnoDB
- search mode PAGE_CUR_LE_OR_EXTENDS is never used.
- TODO: when/if MySQL starts to use also partial-field
- prefixes, we have to deal with stripping of spaces
- and comparison of non-latin1 char type fields in
- innobase_mysql_cmp() to get PAGE_CUR_LE_OR_EXTENDS to
- work correctly. */
- case HA_READ_MBR_CONTAIN:
- case HA_READ_MBR_INTERSECT:
- case HA_READ_MBR_WITHIN:
- case HA_READ_MBR_DISJOINT:
- case HA_READ_MBR_EQUAL:
- return(PAGE_CUR_UNSUPP);
- /* do not use "default:" in order to produce a gcc warning:
- enumeration value '...' not handled in switch
- (if -Wswitch or -Wall is used) */
- }
-
- my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "this functionality");
-
- return(PAGE_CUR_UNSUPP);
-}
-
-/*
- BACKGROUND INFO: HOW A SELECT SQL QUERY IS EXECUTED
- ---------------------------------------------------
-The following does not cover all the details, but explains how we determine
-the start of a new SQL statement, and what is associated with it.
-
-For each table in the database the MySQL interpreter may have several
-table handle instances in use, also in a single SQL query. For each table
-handle instance there is an InnoDB 'prebuilt' struct which contains most
-of the InnoDB data associated with this table handle instance.
-
- A) if the user has not explicitly set any MySQL table level locks:
-
- 1) MySQL calls ::external_lock to set an 'intention' table level lock on
-the table of the handle instance. There we set
-prebuilt->sql_stat_start = TRUE. The flag sql_stat_start should be set
-true if we are taking this table handle instance to use in a new SQL
-statement issued by the user. We also increment trx->n_mysql_tables_in_use.
-
- 2) If prebuilt->sql_stat_start == TRUE we 'pre-compile' the MySQL search
-instructions to prebuilt->template of the table handle instance in
-::index_read. The template is used to save CPU time in large joins.
-
- 3) In row_search_for_mysql, if prebuilt->sql_stat_start is true, we
-allocate a new consistent read view for the trx if it does not yet have one,
-or in the case of a locking read, set an InnoDB 'intention' table level
-lock on the table.
-
- 4) We do the SELECT. MySQL may repeatedly call ::index_read for the
-same table handle instance, if it is a join.
-
- 5) When the SELECT ends, MySQL removes its intention table level locks
-in ::external_lock. When trx->n_mysql_tables_in_use drops to zero,
- (a) we execute a COMMIT there if the autocommit is on,
- (b) we also release possible 'SQL statement level resources' InnoDB may
-have for this SQL statement. The MySQL interpreter does NOT execute
-autocommit for pure read transactions, though it should. That is why the
-table handler in that case has to execute the COMMIT in ::external_lock.
-
- B) If the user has explicitly set MySQL table level locks, then MySQL
-does NOT call ::external_lock at the start of the statement. To determine
-when we are at the start of a new SQL statement we at the start of
-::index_read also compare the query id to the latest query id where the
-table handle instance was used. If it has changed, we know we are at the
-start of a new SQL statement. Since the query id can theoretically
-overwrap, we use this test only as a secondary way of determining the
-start of a new SQL statement. */
-
-
-/**************************************************************************
-Positions an index cursor to the index specified in the handle. Fetches the
-row if any. */
-
-int
-ha_innobase::index_read(
-/*====================*/
- /* out: 0, HA_ERR_KEY_NOT_FOUND,
- or error number */
- uchar* buf, /* in/out: buffer for the returned
- row */
- const uchar* key_ptr, /* in: key value; if this is NULL
- we position the cursor at the
- start or end of index; this can
- also contain an InnoDB row id, in
- which case key_len is the InnoDB
- row id length; the key value can
- also be a prefix of a full key value,
- and the last column can be a prefix
- of a full column */
- uint key_len,/* in: key value length */
- enum ha_rkey_function find_flag)/* in: search flags from my_base.h */
-{
- ulint mode;
- dict_index_t* index;
- ulint match_mode = 0;
- int error;
- ulint ret;
-
- DBUG_ENTER("index_read");
-
- ut_a(prebuilt->trx == thd_to_trx(user_thd));
-
- ha_statistic_increment(&SSV::ha_read_key_count);
-
- index = prebuilt->index;
-
- /* Note that if the index for which the search template is built is not
- necessarily prebuilt->index, but can also be the clustered index */
-
- if (prebuilt->sql_stat_start) {
- build_template(prebuilt, user_thd, table,
- ROW_MYSQL_REC_FIELDS);
- }
-
- if (key_ptr) {
- /* Convert the search key value to InnoDB format into
- prebuilt->search_tuple */
-
- row_sel_convert_mysql_key_to_innobase(prebuilt->search_tuple,
- (byte*) key_val_buff,
- (ulint)upd_and_key_val_buff_len,
- index,
- (byte*) key_ptr,
- (ulint) key_len, prebuilt->trx);
- } else {
- /* We position the cursor to the last or the first entry
- in the index */
-
- dtuple_set_n_fields(prebuilt->search_tuple, 0);
- }
-
- mode = convert_search_mode_to_innobase(find_flag);
-
- match_mode = 0;
-
- if (find_flag == HA_READ_KEY_EXACT) {
- match_mode = ROW_SEL_EXACT;
-
- } else if (find_flag == HA_READ_PREFIX
- || find_flag == HA_READ_PREFIX_LAST) {
- match_mode = ROW_SEL_EXACT_PREFIX;
- }
-
- last_match_mode = (uint) match_mode;
-
- if (mode != PAGE_CUR_UNSUPP) {
-
- innodb_srv_conc_enter_innodb(prebuilt->trx);
-
- ret = row_search_for_mysql((byte*) buf, mode, prebuilt,
- match_mode, 0);
-
- innodb_srv_conc_exit_innodb(prebuilt->trx);
- } else {
-
- ret = DB_UNSUPPORTED;
- }
-
- if (ret == DB_SUCCESS) {
- error = 0;
- table->status = 0;
-
- } else if (ret == DB_RECORD_NOT_FOUND) {
- error = HA_ERR_KEY_NOT_FOUND;
- table->status = STATUS_NOT_FOUND;
-
- } else if (ret == DB_END_OF_INDEX) {
- error = HA_ERR_KEY_NOT_FOUND;
- table->status = STATUS_NOT_FOUND;
- } else {
- error = convert_error_code_to_mysql((int) ret, user_thd);
- table->status = STATUS_NOT_FOUND;
- }
-
- DBUG_RETURN(error);
-}
-
-/***********************************************************************
-The following functions works like index_read, but it find the last
-row with the current key value or prefix. */
-
-int
-ha_innobase::index_read_last(
-/*=========================*/
- /* out: 0, HA_ERR_KEY_NOT_FOUND, or an
- error code */
- uchar* buf, /* out: fetched row */
- const uchar* key_ptr,/* in: key value, or a prefix of a full
- key value */
- uint key_len)/* in: length of the key val or prefix
- in bytes */
-{
- return(index_read(buf, key_ptr, key_len, HA_READ_PREFIX_LAST));
-}
-
-/************************************************************************
-Get the index for a handle. Does not change active index.*/
-
-dict_index_t*
-ha_innobase::innobase_get_index(
-/*============================*/
- /* out: NULL or index instance. */
- uint keynr) /* in: use this index; MAX_KEY means always
- clustered index, even if it was internally
- generated by InnoDB */
-{
- KEY* key = 0;
- dict_index_t* index = 0;
-
- DBUG_ENTER("innobase_get_index");
- ha_statistic_increment(&SSV::ha_read_key_count);
-
- ut_ad(user_thd == ha_thd());
- ut_a(prebuilt->trx == thd_to_trx(user_thd));
-
- if (keynr != MAX_KEY && table->s->keys > 0) {
- key = table->key_info + keynr;
-
- index = dict_table_get_index_noninline(
- prebuilt->table, key->name);
- } else {
- index = dict_table_get_first_index_noninline(prebuilt->table);
- }
-
- if (!index) {
- sql_print_error(
- "Innodb could not find key n:o %u with name %s "
- "from dict cache for table %s",
- keynr, key ? key->name : "NULL",
- prebuilt->table->name);
- }
-
- DBUG_RETURN(index);
-}
-
-/************************************************************************
-Changes the active index of a handle. */
-
-int
-ha_innobase::change_active_index(
-/*=============================*/
- /* out: 0 or error code */
- uint keynr) /* in: use this index; MAX_KEY means always clustered
- index, even if it was internally generated by
- InnoDB */
-{
- DBUG_ENTER("change_active_index");
-
- ut_ad(user_thd == ha_thd());
- ut_a(prebuilt->trx == thd_to_trx(user_thd));
-
- active_index = keynr;
-
- prebuilt->index = innobase_get_index(keynr);
-
- if (!prebuilt->index) {
- DBUG_RETURN(1);
- }
-
- assert(prebuilt->search_tuple != 0);
-
- dtuple_set_n_fields(prebuilt->search_tuple, prebuilt->index->n_fields);
-
- dict_index_copy_types(prebuilt->search_tuple, prebuilt->index,
- prebuilt->index->n_fields);
-
- /* MySQL changes the active index for a handle also during some
- queries, for example SELECT MAX(a), SUM(a) first retrieves the MAX()
- and then calculates the sum. Previously we played safe and used
- the flag ROW_MYSQL_WHOLE_ROW below, but that caused unnecessary
- copying. Starting from MySQL-4.1 we use a more efficient flag here. */
-
- build_template(prebuilt, user_thd, table, ROW_MYSQL_REC_FIELDS);
-
- DBUG_RETURN(0);
-}
-
-/**************************************************************************
-Positions an index cursor to the index specified in keynr. Fetches the
-row if any. */
-/* ??? This is only used to read whole keys ??? */
-
-int
-ha_innobase::index_read_idx(
-/*========================*/
- /* out: error number or 0 */
- uchar* buf, /* in/out: buffer for the returned
- row */
- uint keynr, /* in: use this index */
- const uchar* key, /* in: key value; if this is NULL
- we position the cursor at the
- start or end of index */
- uint key_len, /* in: key value length */
- enum ha_rkey_function find_flag)/* in: search flags from my_base.h */
-{
- if (change_active_index(keynr)) {
-
- return(1);
- }
-
- return(index_read(buf, key, key_len, find_flag));
-}
-
-/***************************************************************************
-Reads the next or previous row from a cursor, which must have previously been
-positioned using index_read. */
-
-int
-ha_innobase::general_fetch(
-/*=======================*/
- /* out: 0, HA_ERR_END_OF_FILE, or error
- number */
- uchar* buf, /* in/out: buffer for next row in MySQL
- format */
- uint direction, /* in: ROW_SEL_NEXT or ROW_SEL_PREV */
- uint match_mode) /* in: 0, ROW_SEL_EXACT, or
- ROW_SEL_EXACT_PREFIX */
-{
- ulint ret;
- int error = 0;
-
- DBUG_ENTER("general_fetch");
-
- ut_a(prebuilt->trx == thd_to_trx(user_thd));
-
- innodb_srv_conc_enter_innodb(prebuilt->trx);
-
- ret = row_search_for_mysql((byte*)buf, 0, prebuilt, match_mode,
- direction);
- innodb_srv_conc_exit_innodb(prebuilt->trx);
-
- if (ret == DB_SUCCESS) {
- error = 0;
- table->status = 0;
-
- } else if (ret == DB_RECORD_NOT_FOUND) {
- error = HA_ERR_END_OF_FILE;
- table->status = STATUS_NOT_FOUND;
-
- } else if (ret == DB_END_OF_INDEX) {
- error = HA_ERR_END_OF_FILE;
- table->status = STATUS_NOT_FOUND;
- } else {
- error = convert_error_code_to_mysql((int) ret, user_thd);
- table->status = STATUS_NOT_FOUND;
- }
-
- DBUG_RETURN(error);
-}
-
-/***************************************************************************
-Reads the next row from a cursor, which must have previously been
-positioned using index_read. */
-
-int
-ha_innobase::index_next(
-/*====================*/
- /* out: 0, HA_ERR_END_OF_FILE, or error
- number */
- uchar* buf) /* in/out: buffer for next row in MySQL
- format */
-{
- ha_statistic_increment(&SSV::ha_read_next_count);
-
- return(general_fetch(buf, ROW_SEL_NEXT, 0));
-}
-
-/***********************************************************************
-Reads the next row matching to the key value given as the parameter. */
-
-int
-ha_innobase::index_next_same(
-/*=========================*/
- /* out: 0, HA_ERR_END_OF_FILE, or error
- number */
- uchar* buf, /* in/out: buffer for the row */
- const uchar* key, /* in: key value */
- uint keylen) /* in: key value length */
-{
- ha_statistic_increment(&SSV::ha_read_next_count);
-
- return(general_fetch(buf, ROW_SEL_NEXT, last_match_mode));
-}
-
-/***************************************************************************
-Reads the previous row from a cursor, which must have previously been
-positioned using index_read. */
-
-int
-ha_innobase::index_prev(
-/*====================*/
- /* out: 0, HA_ERR_END_OF_FILE, or error number */
- uchar* buf) /* in/out: buffer for previous row in MySQL format */
-{
- ha_statistic_increment(&SSV::ha_read_prev_count);
-
- return(general_fetch(buf, ROW_SEL_PREV, 0));
-}
-
-/************************************************************************
-Positions a cursor on the first record in an index and reads the
-corresponding row to buf. */
-
-int
-ha_innobase::index_first(
-/*=====================*/
- /* out: 0, HA_ERR_END_OF_FILE, or error code */
- uchar* buf) /* in/out: buffer for the row */
-{
- int error;
-
- DBUG_ENTER("index_first");
- ha_statistic_increment(&SSV::ha_read_first_count);
-
- error = index_read(buf, NULL, 0, HA_READ_AFTER_KEY);
-
- /* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */
-
- if (error == HA_ERR_KEY_NOT_FOUND) {
- error = HA_ERR_END_OF_FILE;
- }
-
- DBUG_RETURN(error);
-}
-
-/************************************************************************
-Positions a cursor on the last record in an index and reads the
-corresponding row to buf. */
-
-int
-ha_innobase::index_last(
-/*====================*/
- /* out: 0, HA_ERR_END_OF_FILE, or error code */
- uchar* buf) /* in/out: buffer for the row */
-{
- int error;
-
- DBUG_ENTER("index_last");
- ha_statistic_increment(&SSV::ha_read_last_count);
-
- error = index_read(buf, NULL, 0, HA_READ_BEFORE_KEY);
-
- /* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */
-
- if (error == HA_ERR_KEY_NOT_FOUND) {
- error = HA_ERR_END_OF_FILE;
- }
-
- DBUG_RETURN(error);
-}
-
-/********************************************************************
-Initialize a table scan. */
-
-int
-ha_innobase::rnd_init(
-/*==================*/
- /* out: 0 or error number */
- bool scan) /* in: ???????? */
-{
- int err;
-
- /* Store the active index value so that we can restore the original
- value after a scan */
-
- if (prebuilt->clust_index_was_generated) {
- err = change_active_index(MAX_KEY);
- } else {
- err = change_active_index(primary_key);
- }
-
- /* Don't use semi-consistent read in random row reads (by position).
- This means we must disable semi_consistent_read if scan is false */
-
- if (!scan) {
- try_semi_consistent_read(0);
- }
-
- start_of_scan = 1;
-
- return(err);
-}
-
-/*********************************************************************
-Ends a table scan. */
-
-int
-ha_innobase::rnd_end(void)
-/*======================*/
- /* out: 0 or error number */
-{
- return(index_end());
-}
-
-/*********************************************************************
-Reads the next row in a table scan (also used to read the FIRST row
-in a table scan). */
-
-int
-ha_innobase::rnd_next(
-/*==================*/
- /* out: 0, HA_ERR_END_OF_FILE, or error number */
- uchar* buf) /* in/out: returns the row in this buffer,
- in MySQL format */
-{
- int error;
-
- DBUG_ENTER("rnd_next");
- ha_statistic_increment(&SSV::ha_read_rnd_next_count);
-
- if (start_of_scan) {
- error = index_first(buf);
- if (error == HA_ERR_KEY_NOT_FOUND) {
- error = HA_ERR_END_OF_FILE;
- }
- start_of_scan = 0;
- } else {
- error = general_fetch(buf, ROW_SEL_NEXT, 0);
- }
-
- DBUG_RETURN(error);
-}
-
-/**************************************************************************
-Fetches a row from the table based on a row reference. */
-
-int
-ha_innobase::rnd_pos(
-/*=================*/
- /* out: 0, HA_ERR_KEY_NOT_FOUND, or error code */
- uchar* buf, /* in/out: buffer for the row */
- uchar* pos) /* in: primary key value of the row in the
- MySQL format, or the row id if the clustered
- index was internally generated by InnoDB; the
- length of data in pos has to be ref_length */
-{
- int error;
- uint keynr = active_index;
- DBUG_ENTER("rnd_pos");
- DBUG_DUMP("key", pos, ref_length);
-
- ha_statistic_increment(&SSV::ha_read_rnd_count);
-
- ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
-
- if (prebuilt->clust_index_was_generated) {
- /* No primary key was defined for the table and we
- generated the clustered index from the row id: the
- row reference is the row id, not any key value
- that MySQL knows of */
-
- error = change_active_index(MAX_KEY);
- } else {
- error = change_active_index(primary_key);
- }
-
- if (error) {
- DBUG_PRINT("error", ("Got error: %d", error));
- DBUG_RETURN(error);
- }
-
- /* Note that we assume the length of the row reference is fixed
- for the table, and it is == ref_length */
-
- error = index_read(buf, pos, ref_length, HA_READ_KEY_EXACT);
-
- if (error) {
- DBUG_PRINT("error", ("Got error: %d", error));
- }
-
- change_active_index(keynr);
-
- DBUG_RETURN(error);
-}
-
-/*************************************************************************
-Stores a reference to the current row to 'ref' field of the handle. Note
-that in the case where we have generated the clustered index for the
-table, the function parameter is illogical: we MUST ASSUME that 'record'
-is the current 'position' of the handle, because if row ref is actually
-the row id internally generated in InnoDB, then 'record' does not contain
-it. We just guess that the row id must be for the record where the handle
-was positioned the last time. */
-
-void
-ha_innobase::position(
-/*==================*/
- const uchar* record) /* in: row in MySQL format */
-{
- uint len;
-
- ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
-
- if (prebuilt->clust_index_was_generated) {
- /* No primary key was defined for the table and we
- generated the clustered index from row id: the
- row reference will be the row id, not any key value
- that MySQL knows of */
-
- len = DATA_ROW_ID_LEN;
-
- memcpy(ref, prebuilt->row_id, len);
- } else {
- len = store_key_val_for_row(primary_key, (char*)ref,
- ref_length, record);
- }
-
- /* We assume that the 'ref' value len is always fixed for the same
- table. */
-
- if (len != ref_length) {
- sql_print_error("Stored ref len is %lu, but table ref len is %lu",
- (ulong) len, (ulong) ref_length);
- }
-}
-
-/*********************************************************************
-If it's a DB_TOO_BIG_RECORD error then set a suitable message to
-return to the client.*/
-inline
-void
-innodb_check_for_record_too_big_error(
-/*==================================*/
- ulint comp, /* in: ROW_FORMAT: nonzero=COMPACT, 0=REDUNDANT */
- int error) /* in: error code to check */
-{
- if (error == (int)DB_TOO_BIG_RECORD) {
- ulint max_row_size
- = page_get_free_space_of_empty_noninline(comp) / 2;
-
- my_error(ER_TOO_BIG_ROWSIZE, MYF(0), max_row_size);
- }
-}
-
-/* limit innodb monitor access to users with PROCESS privilege.
-See http://bugs.mysql.com/32710 for expl. why we choose PROCESS. */
-#define IS_MAGIC_TABLE_AND_USER_DENIED_ACCESS(table_name, thd) \
- (row_is_magic_monitor_table(table_name) \
- && check_global_access(thd, PROCESS_ACL))
-
-/*********************************************************************
-Creates a table definition to an InnoDB database. */
-static
-int
-create_table_def(
-/*=============*/
- trx_t* trx, /* in: InnoDB transaction handle */
- TABLE* form, /* in: information on table
- columns and indexes */
- const char* table_name, /* in: table name */
- const char* path_of_temp_table,/* in: if this is a table explicitly
- created by the user with the
- TEMPORARY keyword, then this
- parameter is the dir path where the
- table should be placed if we create
- an .ibd file for it (no .ibd extension
- in the path, though); otherwise this
- is NULL */
- ulint flags) /* in: table flags */
-{
- Field* field;
- dict_table_t* table;
- ulint n_cols;
- int error;
- ulint col_type;
- ulint col_len;
- ulint nulls_allowed;
- ulint unsigned_type;
- ulint binary_type;
- ulint long_true_varchar;
- ulint charset_no;
- ulint i;
-
- DBUG_ENTER("create_table_def");
- DBUG_PRINT("enter", ("table_name: %s", table_name));
-
- ut_a(trx->mysql_thd != NULL);
- if (IS_MAGIC_TABLE_AND_USER_DENIED_ACCESS(table_name,
- (THD*) trx->mysql_thd)) {
- DBUG_RETURN(HA_ERR_GENERIC);
- }
-
- n_cols = form->s->fields;
-
- /* We pass 0 as the space id, and determine at a lower level the space
- id where to store the table */
-
- table = dict_mem_table_create(table_name, 0, n_cols, flags);
-
- if (path_of_temp_table) {
- table->dir_path_of_temp_table =
- mem_heap_strdup(table->heap, path_of_temp_table);
- }
-
- for (i = 0; i < n_cols; i++) {
- field = form->field[i];
-
- col_type = get_innobase_type_from_mysql_type(&unsigned_type,
- field);
- if (field->null_ptr) {
- nulls_allowed = 0;
- } else {
- nulls_allowed = DATA_NOT_NULL;
- }
-
- if (field->binary()) {
- binary_type = DATA_BINARY_TYPE;
- } else {
- binary_type = 0;
- }
-
- charset_no = 0;
-
- if (dtype_is_string_type(col_type)) {
-
- charset_no = (ulint)field->charset()->number;
-
- ut_a(charset_no < 256); /* in data0type.h we assume
- that the number fits in one
- byte */
- }
-
- ut_a(field->type() < 256); /* we assume in dtype_form_prtype()
- that this fits in one byte */
- col_len = field->pack_length();
-
- /* The MySQL pack length contains 1 or 2 bytes length field
- for a true VARCHAR. Let us subtract that, so that the InnoDB
- column length in the InnoDB data dictionary is the real
- maximum byte length of the actual data. */
-
- long_true_varchar = 0;
-
- if (field->type() == MYSQL_TYPE_VARCHAR) {
- col_len -= ((Field_varstring*)field)->length_bytes;
-
- if (((Field_varstring*)field)->length_bytes == 2) {
- long_true_varchar = DATA_LONG_TRUE_VARCHAR;
- }
- }
-
- dict_mem_table_add_col(table, table->heap,
- (char*) field->field_name,
- col_type,
- dtype_form_prtype(
- (ulint)field->type()
- | nulls_allowed | unsigned_type
- | binary_type | long_true_varchar,
- charset_no),
- col_len);
- }
-
- error = row_create_table_for_mysql(table, trx);
-
- innodb_check_for_record_too_big_error(flags & DICT_TF_COMPACT, error);
-
- error = convert_error_code_to_mysql(error, NULL);
-
- DBUG_RETURN(error);
-}
-
-/*********************************************************************
-Creates an index in an InnoDB database. */
-static
-int
-create_index(
-/*=========*/
- trx_t* trx, /* in: InnoDB transaction handle */
- TABLE* form, /* in: information on table
- columns and indexes */
- const char* table_name, /* in: table name */
- uint key_num) /* in: index number */
-{
- Field* field;
- dict_index_t* index;
- int error;
- ulint n_fields;
- KEY* key;
- KEY_PART_INFO* key_part;
- ulint ind_type;
- ulint col_type;
- ulint prefix_len;
- ulint is_unsigned;
- ulint i;
- ulint j;
- ulint* field_lengths;
-
- DBUG_ENTER("create_index");
-
- key = form->key_info + key_num;
-
- n_fields = key->key_parts;
-
- ind_type = 0;
-
- if (key_num == form->s->primary_key) {
- ind_type = ind_type | DICT_CLUSTERED;
- }
-
- if (key->flags & HA_NOSAME ) {
- ind_type = ind_type | DICT_UNIQUE;
- }
-
- /* We pass 0 as the space id, and determine at a lower level the space
- id where to store the table */
-
- index = dict_mem_index_create((char*) table_name, key->name, 0,
- ind_type, n_fields);
-
- field_lengths = (ulint*) my_malloc(sizeof(ulint) * n_fields,
- MYF(MY_FAE));
-
- for (i = 0; i < n_fields; i++) {
- key_part = key->key_part + i;
-
- /* (The flag HA_PART_KEY_SEG denotes in MySQL a column prefix
- field in an index: we only store a specified number of first
- bytes of the column to the index field.) The flag does not
- seem to be properly set by MySQL. Let us fall back on testing
- the length of the key part versus the column. */
-
- field = NULL;
- for (j = 0; j < form->s->fields; j++) {
-
- field = form->field[j];
-
- if (0 == innobase_strcasecmp(
- field->field_name,
- key_part->field->field_name)) {
- /* Found the corresponding column */
-
- break;
- }
- }
-
- ut_a(j < form->s->fields);
-
- col_type = get_innobase_type_from_mysql_type(
- &is_unsigned, key_part->field);
-
- if (DATA_BLOB == col_type
- || (key_part->length < field->pack_length()
- && field->type() != MYSQL_TYPE_VARCHAR)
- || (field->type() == MYSQL_TYPE_VARCHAR
- && key_part->length < field->pack_length()
- - ((Field_varstring*)field)->length_bytes)) {
-
- prefix_len = key_part->length;
-
- if (col_type == DATA_INT
- || col_type == DATA_FLOAT
- || col_type == DATA_DOUBLE
- || col_type == DATA_DECIMAL) {
- sql_print_error(
- "MySQL is trying to create a column "
- "prefix index field, on an "
- "inappropriate data type. Table "
- "name %s, column name %s.",
- table_name,
- key_part->field->field_name);
-
- prefix_len = 0;
- }
- } else {
- prefix_len = 0;
- }
-
- field_lengths[i] = key_part->length;
-
- dict_mem_index_add_field(index,
- (char*) key_part->field->field_name, prefix_len);
- }
-
- /* Even though we've defined max_supported_key_part_length, we
- still do our own checking using field_lengths to be absolutely
- sure we don't create too long indexes. */
- error = row_create_index_for_mysql(index, trx, field_lengths);
-
- innodb_check_for_record_too_big_error(form->s->row_type
- != ROW_TYPE_REDUNDANT, error);
-
- error = convert_error_code_to_mysql(error, NULL);
-
- my_free(field_lengths, MYF(0));
-
- DBUG_RETURN(error);
-}
-
-/*********************************************************************
-Creates an index to an InnoDB table when the user has defined no
-primary index. */
-static
-int
-create_clustered_index_when_no_primary(
-/*===================================*/
- trx_t* trx, /* in: InnoDB transaction handle */
- ulint comp, /* in: ROW_FORMAT:
- nonzero=COMPACT, 0=REDUNDANT */
- const char* table_name) /* in: table name */
-{
- dict_index_t* index;
- int error;
-
- /* We pass 0 as the space id, and determine at a lower level the space
- id where to store the table */
-
- index = dict_mem_index_create(table_name, "GEN_CLUST_INDEX",
- 0, DICT_CLUSTERED, 0);
- error = row_create_index_for_mysql(index, trx, NULL);
-
- innodb_check_for_record_too_big_error(comp, error);
-
- error = convert_error_code_to_mysql(error, NULL);
-
- return(error);
-}
-
-/*********************************************************************
-Update create_info. Used in SHOW CREATE TABLE et al. */
-
-void
-ha_innobase::update_create_info(
-/*============================*/
- HA_CREATE_INFO* create_info) /* in/out: create info */
-{
- if (!(create_info->used_fields & HA_CREATE_USED_AUTO)) {
- ha_innobase::info(HA_STATUS_AUTO);
- create_info->auto_increment_value = stats.auto_increment_value;
- }
-}
-
-/*********************************************************************
-Creates a new table to an InnoDB database. */
-
-int
-ha_innobase::create(
-/*================*/
- /* out: error number */
- const char* name, /* in: table name */
- TABLE* form, /* in: information on table
- columns and indexes */
- HA_CREATE_INFO* create_info) /* in: more information of the
- created table, contains also the
- create statement string */
-{
- int error;
- dict_table_t* innobase_table;
- trx_t* parent_trx;
- trx_t* trx;
- int primary_key_no;
- uint i;
- char name2[FN_REFLEN];
- char norm_name[FN_REFLEN];
- THD* thd = ha_thd();
- ib_longlong auto_inc_value;
- ulint flags;
-
- DBUG_ENTER("ha_innobase::create");
-
- DBUG_ASSERT(thd != NULL);
- DBUG_ASSERT(create_info != NULL);
-
-#ifdef __WIN__
- /* Names passed in from server are in two formats:
- 1. <database_name>/<table_name>: for normal table creation
- 2. full path: for temp table creation, or sym link
-
- When srv_file_per_table is on, check for full path pattern, i.e.
- X:\dir\..., X is a driver letter, or
- \\dir1\dir2\..., UNC path
- returns error if it is in full path format, but not creating a temp.
- table. Currently InnoDB does not support symbolic link on Windows. */
-
- if (srv_file_per_table
- && (!create_info->options & HA_LEX_CREATE_TMP_TABLE)) {
-
- if ((name[1] == ':')
- || (name[0] == '\\' && name[1] == '\\')) {
- sql_print_error("Cannot create table %s\n", name);
- DBUG_RETURN(HA_ERR_GENERIC);
- }
- }
-#endif
-
- if (form->s->fields > 1000) {
- /* The limit probably should be REC_MAX_N_FIELDS - 3 = 1020,
- but we play safe here */
-
- DBUG_RETURN(HA_ERR_TO_BIG_ROW);
- }
-
- /* Get the transaction associated with the current thd, or create one
- if not yet created */
-
- parent_trx = check_trx_exists(thd);
-
- /* In case MySQL calls this in the middle of a SELECT query, release
- possible adaptive hash latch to avoid deadlocks of threads */
-
- trx_search_latch_release_if_reserved(parent_trx);
-
- trx = trx_allocate_for_mysql();
-
- trx->mysql_thd = thd;
- trx->mysql_query_str = thd_query(thd);
-
- if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) {
- trx->check_foreigns = FALSE;
- }
-
- if (thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS)) {
- trx->check_unique_secondary = FALSE;
- }
-
- if (lower_case_table_names) {
- srv_lower_case_table_names = TRUE;
- } else {
- srv_lower_case_table_names = FALSE;
- }
-
- strcpy(name2, name);
-
- normalize_table_name(norm_name, name2);
-
- /* Latch the InnoDB data dictionary exclusively so that no deadlocks
- or lock waits can happen in it during a table create operation.
- Drop table etc. do this latching in row0mysql.c. */
-
- row_mysql_lock_data_dictionary(trx);
-
- /* Create the table definition in InnoDB */
-
- flags = 0;
-
- if (form->s->row_type != ROW_TYPE_REDUNDANT) {
- flags |= DICT_TF_COMPACT;
- }
-
- error = create_table_def(trx, form, norm_name,
- create_info->options & HA_LEX_CREATE_TMP_TABLE ? name2 : NULL,
- flags);
-
- if (error) {
- goto cleanup;
- }
-
- /* Look for a primary key */
-
- primary_key_no= (form->s->primary_key != MAX_KEY ?
- (int) form->s->primary_key :
- -1);
-
- /* Our function row_get_mysql_key_number_for_index assumes
- the primary key is always number 0, if it exists */
-
- DBUG_ASSERT(primary_key_no == -1 || primary_key_no == 0);
-
- /* Create the keys */
-
- if (form->s->keys == 0 || primary_key_no == -1) {
- /* Create an index which is used as the clustered index;
- order the rows by their row id which is internally generated
- by InnoDB */
-
- error = create_clustered_index_when_no_primary(
- trx, form->s->row_type != ROW_TYPE_REDUNDANT,
- norm_name);
- if (error) {
- goto cleanup;
- }
- }
-
- if (primary_key_no != -1) {
- /* In InnoDB the clustered index must always be created
- first */
- if ((error = create_index(trx, form, norm_name,
- (uint) primary_key_no))) {
- goto cleanup;
- }
- }
-
- for (i = 0; i < form->s->keys; i++) {
-
- if (i != (uint) primary_key_no) {
-
- if ((error = create_index(trx, form, norm_name, i))) {
- goto cleanup;
- }
- }
- }
-
- if (*trx->mysql_query_str) {
- error = row_table_add_foreign_constraints(trx,
- *trx->mysql_query_str, norm_name,
- create_info->options & HA_LEX_CREATE_TMP_TABLE);
-
- error = convert_error_code_to_mysql(error, NULL);
-
- if (error) {
- goto cleanup;
- }
- }
-
- innobase_commit_low(trx);
-
- row_mysql_unlock_data_dictionary(trx);
-
- /* Flush the log to reduce probability that the .frm files and
- the InnoDB data dictionary get out-of-sync if the user runs
- with innodb_flush_log_at_trx_commit = 0 */
-
- log_buffer_flush_to_disk();
-
- innobase_table = dict_table_get(norm_name, FALSE);
-
- DBUG_ASSERT(innobase_table != 0);
-
- /* Note: We can't call update_thd() as prebuilt will not be
- setup at this stage and so we use thd. */
-
- /* We need to copy the AUTOINC value from the old table if
- this is an ALTER TABLE. */
-
- if (((create_info->used_fields & HA_CREATE_USED_AUTO)
- || thd_sql_command(thd) == SQLCOM_ALTER_TABLE)
- && create_info->auto_increment_value != 0) {
-
- /* Query was ALTER TABLE...AUTO_INCREMENT = x; or
- CREATE TABLE ...AUTO_INCREMENT = x; Find out a table
- definition from the dictionary and get the current value
- of the auto increment field. Set a new value to the
- auto increment field if the value is greater than the
- maximum value in the column. */
-
- auto_inc_value = create_info->auto_increment_value;
-
- dict_table_autoinc_lock(innobase_table);
- dict_table_autoinc_initialize(innobase_table, auto_inc_value);
- dict_table_autoinc_unlock(innobase_table);
- }
-
- /* Tell the InnoDB server that there might be work for
- utility threads: */
-
- srv_active_wake_master_thread();
-
- trx_free_for_mysql(trx);
-
- DBUG_RETURN(0);
-
-cleanup:
- innobase_commit_low(trx);
-
- row_mysql_unlock_data_dictionary(trx);
-
- trx_free_for_mysql(trx);
-
- DBUG_RETURN(error);
-}
-
-/*********************************************************************
-Discards or imports an InnoDB tablespace. */
-
-int
-ha_innobase::discard_or_import_tablespace(
-/*======================================*/
- /* out: 0 == success, -1 == error */
- my_bool discard) /* in: TRUE if discard, else import */
-{
- dict_table_t* dict_table;
- trx_t* trx;
- int err;
-
- DBUG_ENTER("ha_innobase::discard_or_import_tablespace");
-
- ut_a(prebuilt->trx);
- ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N);
- ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
-
- dict_table = prebuilt->table;
- trx = prebuilt->trx;
-
- if (discard) {
- err = row_discard_tablespace_for_mysql(dict_table->name, trx);
- } else {
- err = row_import_tablespace_for_mysql(dict_table->name, trx);
- }
-
- err = convert_error_code_to_mysql(err, NULL);
-
- DBUG_RETURN(err);
-}
-
-/*********************************************************************
-Deletes all rows of an InnoDB table. */
-
-int
-ha_innobase::delete_all_rows(void)
-/*==============================*/
- /* out: error number */
-{
- int error;
-
- DBUG_ENTER("ha_innobase::delete_all_rows");
-
- /* Get the transaction associated with the current thd, or create one
- if not yet created, and update prebuilt->trx */
-
- update_thd(ha_thd());
-
- if (thd_sql_command(user_thd) != SQLCOM_TRUNCATE) {
- fallback:
- /* We only handle TRUNCATE TABLE t as a special case.
- DELETE FROM t will have to use ha_innobase::delete_row(),
- because DELETE is transactional while TRUNCATE is not. */
- DBUG_RETURN(my_errno=HA_ERR_WRONG_COMMAND);
- }
-
- /* Truncate the table in InnoDB */
-
- error = row_truncate_table_for_mysql(prebuilt->table, prebuilt->trx);
- if (error == DB_ERROR) {
- /* Cannot truncate; resort to ha_innobase::delete_row() */
- goto fallback;
- }
-
- error = convert_error_code_to_mysql(error, NULL);
-
- DBUG_RETURN(error);
-}
-
-/*********************************************************************
-Drops a table from an InnoDB database. Before calling this function,
-MySQL calls innobase_commit to commit the transaction of the current user.
-Then the current user cannot have locks set on the table. Drop table
-operation inside InnoDB will remove all locks any user has on the table
-inside InnoDB. */
-
-int
-ha_innobase::delete_table(
-/*======================*/
- /* out: error number */
- const char* name) /* in: table name */
-{
- ulint name_len;
- int error;
- trx_t* parent_trx;
- trx_t* trx;
- THD *thd = ha_thd();
- char norm_name[1000];
-
- DBUG_ENTER("ha_innobase::delete_table");
-
- /* Strangely, MySQL passes the table name without the '.frm'
- extension, in contrast to ::create */
- normalize_table_name(norm_name, name);
-
- if (IS_MAGIC_TABLE_AND_USER_DENIED_ACCESS(norm_name, thd)) {
- DBUG_RETURN(HA_ERR_GENERIC);
- }
-
- /* Get the transaction associated with the current thd, or create one
- if not yet created */
-
- parent_trx = check_trx_exists(thd);
-
- /* In case MySQL calls this in the middle of a SELECT query, release
- possible adaptive hash latch to avoid deadlocks of threads */
-
- trx_search_latch_release_if_reserved(parent_trx);
-
- if (lower_case_table_names) {
- srv_lower_case_table_names = TRUE;
- } else {
- srv_lower_case_table_names = FALSE;
- }
-
- trx = trx_allocate_for_mysql();
-
- trx->mysql_thd = thd;
- trx->mysql_query_str = thd_query(thd);
-
- if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) {
- trx->check_foreigns = FALSE;
- }
-
- if (thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS)) {
- trx->check_unique_secondary = FALSE;
- }
-
- name_len = strlen(name);
-
- assert(name_len < 1000);
-
- /* Drop the table in InnoDB */
-
- error = row_drop_table_for_mysql(norm_name, trx,
- thd_sql_command(thd)
- == SQLCOM_DROP_DB);
-
- /* Flush the log to reduce probability that the .frm files and
- the InnoDB data dictionary get out-of-sync if the user runs
- with innodb_flush_log_at_trx_commit = 0 */
-
- log_buffer_flush_to_disk();
-
- /* Tell the InnoDB server that there might be work for
- utility threads: */
-
- srv_active_wake_master_thread();
-
- innobase_commit_low(trx);
-
- trx_free_for_mysql(trx);
-
- error = convert_error_code_to_mysql(error, NULL);
-
- DBUG_RETURN(error);
-}
-
-/*********************************************************************
-Removes all tables in the named database inside InnoDB. */
-static
-void
-innobase_drop_database(
-/*===================*/
- /* out: error number */
- handlerton *hton, /* in: handlerton of Innodb */
- char* path) /* in: database path; inside InnoDB the name
- of the last directory in the path is used as
- the database name: for example, in 'mysql/data/test'
- the database name is 'test' */
-{
- ulint len = 0;
- trx_t* parent_trx;
- trx_t* trx;
- char* ptr;
- int error;
- char* namebuf;
- THD* thd = current_thd;
-
- /* Get the transaction associated with the current thd, or create one
- if not yet created */
-
- parent_trx = check_trx_exists(thd);
-
- /* In case MySQL calls this in the middle of a SELECT query, release
- possible adaptive hash latch to avoid deadlocks of threads */
-
- trx_search_latch_release_if_reserved(parent_trx);
-
- ptr = strend(path) - 2;
-
- while (ptr >= path && *ptr != '\\' && *ptr != '/') {
- ptr--;
- len++;
- }
-
- ptr++;
- namebuf = (char*) my_malloc((uint) len + 2, MYF(0));
-
- memcpy(namebuf, ptr, len);
- namebuf[len] = '/';
- namebuf[len + 1] = '\0';
-#ifdef __WIN__
- innobase_casedn_str(namebuf);
-#endif
- trx = trx_allocate_for_mysql();
- trx->mysql_thd = thd;
- trx->mysql_query_str = thd_query(thd);
-
- if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) {
- trx->check_foreigns = FALSE;
- }
-
- error = row_drop_database_for_mysql(namebuf, trx);
- my_free(namebuf, MYF(0));
-
- /* Flush the log to reduce probability that the .frm files and
- the InnoDB data dictionary get out-of-sync if the user runs
- with innodb_flush_log_at_trx_commit = 0 */
-
- log_buffer_flush_to_disk();
-
- /* Tell the InnoDB server that there might be work for
- utility threads: */
-
- srv_active_wake_master_thread();
-
- innobase_commit_low(trx);
- trx_free_for_mysql(trx);
-#ifdef NO_LONGER_INTERESTED_IN_DROP_DB_ERROR
- error = convert_error_code_to_mysql(error, NULL);
-
- return(error);
-#else
- return;
-#endif
-}
-
-/*************************************************************************
-Renames an InnoDB table. */
-
-int
-ha_innobase::rename_table(
-/*======================*/
- /* out: 0 or error code */
- const char* from, /* in: old name of the table */
- const char* to) /* in: new name of the table */
-{
- ulint name_len1;
- ulint name_len2;
- int error;
- trx_t* parent_trx;
- trx_t* trx;
- char norm_from[1000];
- char norm_to[1000];
- THD* thd = ha_thd();
-
- DBUG_ENTER("ha_innobase::rename_table");
-
- /* Get the transaction associated with the current thd, or create one
- if not yet created */
-
- parent_trx = check_trx_exists(thd);
-
- /* In case MySQL calls this in the middle of a SELECT query, release
- possible adaptive hash latch to avoid deadlocks of threads */
-
- trx_search_latch_release_if_reserved(parent_trx);
-
- if (lower_case_table_names) {
- srv_lower_case_table_names = TRUE;
- } else {
- srv_lower_case_table_names = FALSE;
- }
-
- trx = trx_allocate_for_mysql();
- trx->mysql_thd = thd;
- trx->mysql_query_str = thd_query(thd);
-
- if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) {
- trx->check_foreigns = FALSE;
- }
-
- name_len1 = strlen(from);
- name_len2 = strlen(to);
-
- assert(name_len1 < 1000);
- assert(name_len2 < 1000);
-
- normalize_table_name(norm_from, from);
- normalize_table_name(norm_to, to);
-
- /* Rename the table in InnoDB */
-
- error = row_rename_table_for_mysql(norm_from, norm_to, trx);
-
- /* Flush the log to reduce probability that the .frm files and
- the InnoDB data dictionary get out-of-sync if the user runs
- with innodb_flush_log_at_trx_commit = 0 */
-
- log_buffer_flush_to_disk();
-
- /* Tell the InnoDB server that there might be work for
- utility threads: */
-
- srv_active_wake_master_thread();
-
- innobase_commit_low(trx);
- trx_free_for_mysql(trx);
-
- error = convert_error_code_to_mysql(error, NULL);
-
- DBUG_RETURN(error);
-}
-
-/*************************************************************************
-Estimates the number of index records in a range. */
-
-ha_rows
-ha_innobase::records_in_range(
-/*==========================*/
- /* out: estimated number of
- rows */
- uint keynr, /* in: index number */
- key_range *min_key, /* in: start key value of the
- range, may also be 0 */
- key_range *max_key) /* in: range end key val, may
- also be 0 */
-{
- KEY* key;
- dict_index_t* index;
- uchar* key_val_buff2 = (uchar*) my_malloc(
- table->s->reclength
- + table->s->max_key_length + 100,
- MYF(MY_FAE));
- ulint buff2_len = table->s->reclength
- + table->s->max_key_length + 100;
- dtuple_t* range_start;
- dtuple_t* range_end;
- ib_longlong n_rows;
- ulint mode1;
- ulint mode2;
- void* heap1;
- void* heap2;
-
- DBUG_ENTER("records_in_range");
-
- ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
-
- prebuilt->trx->op_info = (char*)"estimating records in index range";
-
- /* In case MySQL calls this in the middle of a SELECT query, release
- possible adaptive hash latch to avoid deadlocks of threads */
-
- trx_search_latch_release_if_reserved(prebuilt->trx);
-
- active_index = keynr;
-
- key = table->key_info + active_index;
-
- index = dict_table_get_index_noninline(prebuilt->table, key->name);
-
- range_start = dtuple_create_for_mysql(&heap1, key->key_parts);
- dict_index_copy_types(range_start, index, key->key_parts);
-
- range_end = dtuple_create_for_mysql(&heap2, key->key_parts);
- dict_index_copy_types(range_end, index, key->key_parts);
-
- row_sel_convert_mysql_key_to_innobase(
- range_start, (byte*) key_val_buff,
- (ulint)upd_and_key_val_buff_len,
- index,
- (byte*) (min_key ? min_key->key :
- (const uchar*) 0),
- (ulint) (min_key ? min_key->length : 0),
- prebuilt->trx);
-
- row_sel_convert_mysql_key_to_innobase(
- range_end, (byte*) key_val_buff2,
- buff2_len, index,
- (byte*) (max_key ? max_key->key :
- (const uchar*) 0),
- (ulint) (max_key ? max_key->length : 0),
- prebuilt->trx);
-
- mode1 = convert_search_mode_to_innobase(min_key ? min_key->flag :
- HA_READ_KEY_EXACT);
- mode2 = convert_search_mode_to_innobase(max_key ? max_key->flag :
- HA_READ_KEY_EXACT);
-
- if (mode1 != PAGE_CUR_UNSUPP && mode2 != PAGE_CUR_UNSUPP) {
-
- n_rows = btr_estimate_n_rows_in_range(index, range_start,
- mode1, range_end,
- mode2);
- } else {
-
- n_rows = HA_POS_ERROR;
- }
-
- dtuple_free_for_mysql(heap1);
- dtuple_free_for_mysql(heap2);
-
- my_free(key_val_buff2, MYF(0));
-
- prebuilt->trx->op_info = (char*)"";
-
- /* The MySQL optimizer seems to believe an estimate of 0 rows is
- always accurate and may return the result 'Empty set' based on that.
- The accuracy is not guaranteed, and even if it were, for a locking
- read we should anyway perform the search to set the next-key lock.
- Add 1 to the value to make sure MySQL does not make the assumption! */
-
- if (n_rows == 0) {
- n_rows = 1;
- }
-
- DBUG_RETURN((ha_rows) n_rows);
-}
-
-/*************************************************************************
-Gives an UPPER BOUND to the number of rows in a table. This is used in
-filesort.cc. */
-
-ha_rows
-ha_innobase::estimate_rows_upper_bound(void)
-/*======================================*/
- /* out: upper bound of rows */
-{
- dict_index_t* index;
- ulonglong estimate;
- ulonglong local_data_file_length;
-
- DBUG_ENTER("estimate_rows_upper_bound");
-
- /* We do not know if MySQL can call this function before calling
- external_lock(). To be safe, update the thd of the current table
- handle. */
-
- update_thd(ha_thd());
-
- prebuilt->trx->op_info = (char*)
- "calculating upper bound for table rows";
-
- /* In case MySQL calls this in the middle of a SELECT query, release
- possible adaptive hash latch to avoid deadlocks of threads */
-
- trx_search_latch_release_if_reserved(prebuilt->trx);
-
- index = dict_table_get_first_index_noninline(prebuilt->table);
-
- local_data_file_length = ((ulonglong) index->stat_n_leaf_pages)
- * UNIV_PAGE_SIZE;
-
- /* Calculate a minimum length for a clustered index record and from
- that an upper bound for the number of rows. Since we only calculate
- new statistics in row0mysql.c when a table has grown by a threshold
- factor, we must add a safety factor 2 in front of the formula below. */
-
- estimate = 2 * local_data_file_length /
- dict_index_calc_min_rec_len(index);
-
- prebuilt->trx->op_info = (char*)"";
-
- DBUG_RETURN((ha_rows) estimate);
-}
-
-/*************************************************************************
-How many seeks it will take to read through the table. This is to be
-comparable to the number returned by records_in_range so that we can
-decide if we should scan the table or use keys. */
-
-double
-ha_innobase::scan_time()
-/*====================*/
- /* out: estimated time measured in disk seeks */
-{
- /* Since MySQL seems to favor table scans too much over index
- searches, we pretend that a sequential read takes the same time
- as a random disk read, that is, we do not divide the following
- by 10, which would be physically realistic. */
-
- return((double) (prebuilt->table->stat_clustered_index_size));
-}
-
-/**********************************************************************
-Calculate the time it takes to read a set of ranges through an index
-This enables us to optimise reads for clustered indexes. */
-
-double
-ha_innobase::read_time(
-/*===================*/
- /* out: estimated time measured in disk seeks */
- uint index, /* in: key number */
- uint ranges, /* in: how many ranges */
- ha_rows rows) /* in: estimated number of rows in the ranges */
-{
- ha_rows total_rows;
- double time_for_scan;
-
- if (index != table->s->primary_key) {
- /* Not clustered */
- return(handler::read_time(index, ranges, rows));
- }
-
- if (rows <= 2) {
-
- return((double) rows);
- }
-
- /* Assume that the read time is proportional to the scan time for all
- rows + at most one seek per range. */
-
- time_for_scan = scan_time();
-
- if ((total_rows = estimate_rows_upper_bound()) < rows) {
-
- return(time_for_scan);
- }
-
- return(ranges + (double) rows / (double) total_rows * time_for_scan);
-}
-
-/*************************************************************************
-Returns statistics information of the table to the MySQL interpreter,
-in various fields of the handle object. */
-
-int
-ha_innobase::info(
-/*==============*/
- uint flag) /* in: what information MySQL requests */
-{
- dict_table_t* ib_table;
- dict_index_t* index;
- ha_rows rec_per_key;
- ib_longlong n_rows;
- ulong j;
- ulong i;
- char path[FN_REFLEN];
- os_file_stat_t stat_info;
-
- DBUG_ENTER("info");
-
- /* If we are forcing recovery at a high level, we will suppress
- statistics calculation on tables, because that may crash the
- server if an index is badly corrupted. */
-
- if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
-
- /* We return success (0) instead of HA_ERR_CRASHED,
- because we want MySQL to process this query and not
- stop, like it would do if it received the error code
- HA_ERR_CRASHED. */
-
- DBUG_RETURN(0);
- }
-
- /* We do not know if MySQL can call this function before calling
- external_lock(). To be safe, update the thd of the current table
- handle. */
-
- update_thd(ha_thd());
-
- /* In case MySQL calls this in the middle of a SELECT query, release
- possible adaptive hash latch to avoid deadlocks of threads */
-
- prebuilt->trx->op_info = (char*)"returning various info to MySQL";
-
- trx_search_latch_release_if_reserved(prebuilt->trx);
-
- ib_table = prebuilt->table;
-
- if (flag & HA_STATUS_TIME) {
- if (innobase_stats_on_metadata) {
- /* In sql_show we call with this flag: update
- then statistics so that they are up-to-date */
-
- prebuilt->trx->op_info = "updating table statistics";
-
- dict_update_statistics(ib_table);
-
- prebuilt->trx->op_info = "returning various info to MySQL";
- }
-
- my_snprintf(path, sizeof(path), "%s/%s%s",
- mysql_data_home, ib_table->name, reg_ext);
-
- unpack_filename(path,path);
-
- /* Note that we do not know the access time of the table,
- nor the CHECK TABLE time, nor the UPDATE or INSERT time. */
-
- if (os_file_get_status(path,&stat_info)) {
- stats.create_time = stat_info.ctime;
- }
- }
-
- if (flag & HA_STATUS_VARIABLE) {
- n_rows = ib_table->stat_n_rows;
-
- /* Because we do not protect stat_n_rows by any mutex in a
- delete, it is theoretically possible that the value can be
- smaller than zero! TODO: fix this race.
-
- The MySQL optimizer seems to assume in a left join that n_rows
- is an accurate estimate if it is zero. Of course, it is not,
- since we do not have any locks on the rows yet at this phase.
- Since SHOW TABLE STATUS seems to call this function with the
- HA_STATUS_TIME flag set, while the left join optimizer does not
- set that flag, we add one to a zero value if the flag is not
- set. That way SHOW TABLE STATUS will show the best estimate,
- while the optimizer never sees the table empty. */
-
- if (n_rows < 0) {
- n_rows = 0;
- }
-
- if (n_rows == 0 && !(flag & HA_STATUS_TIME)) {
- n_rows++;
- }
-
- /* Fix bug#40386: Not flushing query cache after truncate.
- n_rows can not be 0 unless the table is empty, set to 1
- instead. The original problem of bug#29507 is actually
- fixed in the server code. */
- if (thd_sql_command(user_thd) == SQLCOM_TRUNCATE) {
-
- n_rows = 1;
-
- /* We need to reset the prebuilt value too, otherwise
- checks for values greater than the last value written
- to the table will fail and the autoinc counter will
- not be updated. This will force write_row() into
- attempting an update of the table's AUTOINC counter. */
-
- prebuilt->autoinc_last_value = 0;
- }
-
- stats.records = (ha_rows)n_rows;
- stats.deleted = 0;
- stats.data_file_length = ((ulonglong)
- ib_table->stat_clustered_index_size)
- * UNIV_PAGE_SIZE;
- stats.index_file_length = ((ulonglong)
- ib_table->stat_sum_of_other_index_sizes)
- * UNIV_PAGE_SIZE;
-
- /* Since fsp_get_available_space_in_free_extents() is
- acquiring latches inside InnoDB, we do not call it if we
- are asked by MySQL to avoid locking. Another reason to
- avoid the call is that it uses quite a lot of CPU.
- See Bug#38185.
- We do not update delete_length if no locking is requested
- so the "old" value can remain. delete_length is initialized
- to 0 in the ha_statistics' constructor. */
- if (!(flag & HA_STATUS_NO_LOCK)) {
-
- /* lock the data dictionary to avoid races with
- ibd_file_missing and tablespace_discarded */
- row_mysql_lock_data_dictionary(prebuilt->trx);
-
- /* ib_table->space must be an existent tablespace */
- if (!ib_table->ibd_file_missing
- && !ib_table->tablespace_discarded) {
-
- stats.delete_length =
- fsp_get_available_space_in_free_extents(
- ib_table->space) * 1024;
- } else {
-
- THD* thd;
-
- thd = ha_thd();
-
- push_warning_printf(
- thd,
- MYSQL_ERROR::WARN_LEVEL_WARN,
- ER_CANT_GET_STAT,
- "InnoDB: Trying to get the free "
- "space for table %s but its "
- "tablespace has been discarded or "
- "the .ibd file is missing. Setting "
- "the free space to zero.",
- ib_table->name);
-
- stats.delete_length = 0;
- }
-
- row_mysql_unlock_data_dictionary(prebuilt->trx);
- }
-
- stats.check_time = 0;
-
- if (stats.records == 0) {
- stats.mean_rec_length = 0;
- } else {
- stats.mean_rec_length = (ulong) (stats.data_file_length / stats.records);
- }
- }
-
- if (flag & HA_STATUS_CONST) {
- index = dict_table_get_first_index_noninline(ib_table);
-
- if (prebuilt->clust_index_was_generated) {
- index = dict_table_get_next_index_noninline(index);
- }
-
- for (i = 0; i < table->s->keys; i++) {
- if (index == NULL) {
- sql_print_error("Table %s contains fewer "
- "indexes inside InnoDB than "
- "are defined in the MySQL "
- ".frm file. Have you mixed up "
- ".frm files from different "
- "installations? See "
-"http://dev.mysql.com/doc/refman/5.1/en/innodb-troubleshooting.html\n",
-
- ib_table->name);
- break;
- }
-
- for (j = 0; j < table->key_info[i].key_parts; j++) {
-
- if (j + 1 > index->n_uniq) {
- sql_print_error(
-"Index %s of %s has %lu columns unique inside InnoDB, but MySQL is asking "
-"statistics for %lu columns. Have you mixed up .frm files from different "
-"installations? "
-"See http://dev.mysql.com/doc/refman/5.1/en/innodb-troubleshooting.html\n",
- index->name,
- ib_table->name,
- (unsigned long)
- index->n_uniq, j + 1);
- break;
- }
-
- if (index->stat_n_diff_key_vals[j + 1] == 0) {
-
- rec_per_key = stats.records;
- } else {
- rec_per_key = (ha_rows)(stats.records /
- index->stat_n_diff_key_vals[j + 1]);
- }
-
- /* Since MySQL seems to favor table scans
- too much over index searches, we pretend
- index selectivity is 2 times better than
- our estimate: */
-
- rec_per_key = rec_per_key / 2;
-
- if (rec_per_key == 0) {
- rec_per_key = 1;
- }
-
- table->key_info[i].rec_per_key[j]=
- rec_per_key >= ~(ulong) 0 ? ~(ulong) 0 :
- (ulong) rec_per_key;
- }
-
- index = dict_table_get_next_index_noninline(index);
- }
- }
-
- if (flag & HA_STATUS_ERRKEY) {
- ut_a(prebuilt->trx);
- ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N);
-
- errkey = (unsigned int) row_get_mysql_key_number_for_index(
- (dict_index_t*) trx_get_error_info(prebuilt->trx));
- }
-
- if (flag & HA_STATUS_AUTO && table->found_next_number_field) {
- stats.auto_increment_value = innobase_peek_autoinc();
- }
-
- prebuilt->trx->op_info = (char*)"";
-
- DBUG_RETURN(0);
-}
-
-/**************************************************************************
-Updates index cardinalities of the table, based on 8 random dives into
-each index tree. This does NOT calculate exact statistics on the table. */
-
-int
-ha_innobase::analyze(
-/*=================*/
- /* out: returns always 0 (success) */
- THD* thd, /* in: connection thread handle */
- HA_CHECK_OPT* check_opt) /* in: currently ignored */
-{
- /* Simply call ::info() with all the flags */
- info(HA_STATUS_TIME | HA_STATUS_CONST | HA_STATUS_VARIABLE);
-
- return(0);
-}
-
-/**************************************************************************
-This is mapped to "ALTER TABLE tablename ENGINE=InnoDB", which rebuilds
-the table in MySQL. */
-
-int
-ha_innobase::optimize(
-/*==================*/
- THD* thd, /* in: connection thread handle */
- HA_CHECK_OPT* check_opt) /* in: currently ignored */
-{
- return(HA_ADMIN_TRY_ALTER);
-}
-
-/***********************************************************************
-Tries to check that an InnoDB table is not corrupted. If corruption is
-noticed, prints to stderr information about it. In case of corruption
-may also assert a failure and crash the server. */
-
-int
-ha_innobase::check(
-/*===============*/
- /* out: HA_ADMIN_CORRUPT or
- HA_ADMIN_OK */
- THD* thd, /* in: user thread handle */
- HA_CHECK_OPT* check_opt) /* in: check options, currently
- ignored */
-{
- ulint ret;
-
- DBUG_ASSERT(thd == ha_thd());
- ut_a(prebuilt->trx);
- ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N);
- ut_a(prebuilt->trx == thd_to_trx(thd));
-
- if (prebuilt->mysql_template == NULL) {
- /* Build the template; we will use a dummy template
- in index scans done in checking */
-
- build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW);
- }
-
- ret = row_check_table_for_mysql(prebuilt);
-
- if (ret == DB_SUCCESS) {
- return(HA_ADMIN_OK);
- }
-
- return(HA_ADMIN_CORRUPT);
-}
-
-/*****************************************************************
-Adds information about free space in the InnoDB tablespace to a table comment
-which is printed out when a user calls SHOW TABLE STATUS. Adds also info on
-foreign keys. */
-
-char*
-ha_innobase::update_table_comment(
-/*==============================*/
- /* out: table comment + InnoDB free space +
- info on foreign keys */
- const char* comment)/* in: table comment defined by user */
-{
- uint length = (uint) strlen(comment);
- char* str;
- long flen;
-
- /* We do not know if MySQL can call this function before calling
- external_lock(). To be safe, update the thd of the current table
- handle. */
-
- if (length > 64000 - 3) {
- return((char*)comment); /* string too long */
- }
-
- update_thd(ha_thd());
-
- prebuilt->trx->op_info = (char*)"returning table comment";
-
- /* In case MySQL calls this in the middle of a SELECT query, release
- possible adaptive hash latch to avoid deadlocks of threads */
-
- trx_search_latch_release_if_reserved(prebuilt->trx);
- str = NULL;
-
- /* output the data to a temporary file */
-
- mutex_enter_noninline(&srv_dict_tmpfile_mutex);
- rewind(srv_dict_tmpfile);
-
- fprintf(srv_dict_tmpfile, "InnoDB free: %llu kB",
- fsp_get_available_space_in_free_extents(
- prebuilt->table->space));
-
- dict_print_info_on_foreign_keys(FALSE, srv_dict_tmpfile,
- prebuilt->trx, prebuilt->table);
- flen = ftell(srv_dict_tmpfile);
- if (flen < 0) {
- flen = 0;
- } else if (length + flen + 3 > 64000) {
- flen = 64000 - 3 - length;
- }
-
- /* allocate buffer for the full string, and
- read the contents of the temporary file */
-
- str = (char*) my_malloc(length + flen + 3, MYF(0));
-
- if (str) {
- char* pos = str + length;
- if (length) {
- memcpy(str, comment, length);
- *pos++ = ';';
- *pos++ = ' ';
- }
- rewind(srv_dict_tmpfile);
- flen = (uint) fread(pos, 1, flen, srv_dict_tmpfile);
- pos[flen] = 0;
- }
-
- mutex_exit_noninline(&srv_dict_tmpfile_mutex);
-
- prebuilt->trx->op_info = (char*)"";
-
- return(str ? str : (char*) comment);
-}
-
-/***********************************************************************
-Gets the foreign key create info for a table stored in InnoDB. */
-
-char*
-ha_innobase::get_foreign_key_create_info(void)
-/*==========================================*/
- /* out, own: character string in the form which
- can be inserted to the CREATE TABLE statement,
- MUST be freed with ::free_foreign_key_create_info */
-{
- char* str = 0;
- long flen;
-
- ut_a(prebuilt != NULL);
-
- /* We do not know if MySQL can call this function before calling
- external_lock(). To be safe, update the thd of the current table
- handle. */
-
- update_thd(ha_thd());
-
- prebuilt->trx->op_info = (char*)"getting info on foreign keys";
-
- /* In case MySQL calls this in the middle of a SELECT query,
- release possible adaptive hash latch to avoid
- deadlocks of threads */
-
- trx_search_latch_release_if_reserved(prebuilt->trx);
-
- mutex_enter_noninline(&srv_dict_tmpfile_mutex);
- rewind(srv_dict_tmpfile);
-
- /* output the data to a temporary file */
- dict_print_info_on_foreign_keys(TRUE, srv_dict_tmpfile,
- prebuilt->trx, prebuilt->table);
- prebuilt->trx->op_info = (char*)"";
-
- flen = ftell(srv_dict_tmpfile);
- if (flen < 0) {
- flen = 0;
- } else if (flen > 64000 - 1) {
- flen = 64000 - 1;
- }
-
- /* allocate buffer for the string, and
- read the contents of the temporary file */
-
- str = (char*) my_malloc(flen + 1, MYF(0));
-
- if (str) {
- rewind(srv_dict_tmpfile);
- flen = (uint) fread(str, 1, flen, srv_dict_tmpfile);
- str[flen] = 0;
- }
-
- mutex_exit_noninline(&srv_dict_tmpfile_mutex);
-
- return(str);
-}
-
-
-int
-ha_innobase::get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list)
-{
- dict_foreign_t* foreign;
-
- DBUG_ENTER("get_foreign_key_list");
- ut_a(prebuilt != NULL);
- update_thd(ha_thd());
- prebuilt->trx->op_info = (char*)"getting list of foreign keys";
- trx_search_latch_release_if_reserved(prebuilt->trx);
- mutex_enter_noninline(&(dict_sys->mutex));
- foreign = UT_LIST_GET_FIRST(prebuilt->table->foreign_list);
-
- while (foreign != NULL) {
- uint i;
- FOREIGN_KEY_INFO f_key_info;
- LEX_STRING *name= 0;
- uint ulen;
- char uname[NAME_LEN+1]; /* Unencoded name */
- char db_name[NAME_LEN+1];
- const char *tmp_buff;
-
- tmp_buff= foreign->id;
- i= 0;
- while (tmp_buff[i] != '/')
- i++;
- tmp_buff+= i + 1;
- f_key_info.forein_id = thd_make_lex_string(thd, 0,
- tmp_buff, (uint) strlen(tmp_buff), 1);
- tmp_buff= foreign->referenced_table_name;
-
- /* Database name */
- i= 0;
- while (tmp_buff[i] != '/')
- {
- db_name[i]= tmp_buff[i];
- i++;
- }
- db_name[i]= 0;
- ulen= filename_to_tablename(db_name, uname, sizeof(uname));
- f_key_info.referenced_db = thd_make_lex_string(thd, 0,
- uname, ulen, 1);
-
- /* Table name */
- tmp_buff+= i + 1;
- ulen= filename_to_tablename(tmp_buff, uname, sizeof(uname));
- f_key_info.referenced_table = thd_make_lex_string(thd, 0,
- uname, ulen, 1);
-
- for (i= 0;;) {
- tmp_buff= foreign->foreign_col_names[i];
- name = thd_make_lex_string(thd, name,
- tmp_buff, (uint) strlen(tmp_buff), 1);
- f_key_info.foreign_fields.push_back(name);
- tmp_buff= foreign->referenced_col_names[i];
- name = thd_make_lex_string(thd, name,
- tmp_buff, (uint) strlen(tmp_buff), 1);
- f_key_info.referenced_fields.push_back(name);
- if (++i >= foreign->n_fields)
- break;
- }
-
- ulong length;
- if (foreign->type & DICT_FOREIGN_ON_DELETE_CASCADE)
- {
- length=7;
- tmp_buff= "CASCADE";
- }
- else if (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL)
- {
- length=8;
- tmp_buff= "SET NULL";
- }
- else if (foreign->type & DICT_FOREIGN_ON_DELETE_NO_ACTION)
- {
- length=9;
- tmp_buff= "NO ACTION";
- }
- else
- {
- length=8;
- tmp_buff= "RESTRICT";
- }
- f_key_info.delete_method = thd_make_lex_string(
- thd, f_key_info.delete_method, tmp_buff, length, 1);
-
-
- if (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE)
- {
- length=7;
- tmp_buff= "CASCADE";
- }
- else if (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL)
- {
- length=8;
- tmp_buff= "SET NULL";
- }
- else if (foreign->type & DICT_FOREIGN_ON_UPDATE_NO_ACTION)
- {
- length=9;
- tmp_buff= "NO ACTION";
- }
- else
- {
- length=8;
- tmp_buff= "RESTRICT";
- }
- f_key_info.update_method = thd_make_lex_string(
- thd, f_key_info.update_method, tmp_buff, length, 1);
- if (foreign->referenced_index &&
- foreign->referenced_index->name)
- {
- f_key_info.referenced_key_name = thd_make_lex_string(
- thd, f_key_info.referenced_key_name,
- foreign->referenced_index->name,
- (uint) strlen(foreign->referenced_index->name), 1);
- }
- else
- f_key_info.referenced_key_name= 0;
-
- FOREIGN_KEY_INFO *pf_key_info = (FOREIGN_KEY_INFO *)
- thd_memdup(thd, &f_key_info, sizeof(FOREIGN_KEY_INFO));
- f_key_list->push_back(pf_key_info);
- foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
- }
- mutex_exit_noninline(&(dict_sys->mutex));
- prebuilt->trx->op_info = (char*)"";
-
- DBUG_RETURN(0);
-}
-
-/*********************************************************************
-Checks if ALTER TABLE may change the storage engine of the table.
-Changing storage engines is not allowed for tables for which there
-are foreign key constraints (parent or child tables). */
-
-bool
-ha_innobase::can_switch_engines(void)
-/*=================================*/
-{
- bool can_switch;
-
- DBUG_ENTER("ha_innobase::can_switch_engines");
-
- ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
-
- prebuilt->trx->op_info =
- "determining if there are foreign key constraints";
- row_mysql_lock_data_dictionary(prebuilt->trx);
-
- can_switch = !UT_LIST_GET_FIRST(prebuilt->table->referenced_list)
- && !UT_LIST_GET_FIRST(prebuilt->table->foreign_list);
-
- row_mysql_unlock_data_dictionary(prebuilt->trx);
- prebuilt->trx->op_info = "";
-
- DBUG_RETURN(can_switch);
-}
-
-/***********************************************************************
-Checks if a table is referenced by a foreign key. The MySQL manual states that
-a REPLACE is either equivalent to an INSERT, or DELETE(s) + INSERT. Only a
-delete is then allowed internally to resolve a duplicate key conflict in
-REPLACE, not an update. */
-
-uint
-ha_innobase::referenced_by_foreign_key(void)
-/*========================================*/
- /* out: > 0 if referenced by a FOREIGN KEY */
-{
- if (dict_table_referenced_by_foreign_key(prebuilt->table)) {
-
- return(1);
- }
-
- return(0);
-}
-
-/***********************************************************************
-Frees the foreign key create info for a table stored in InnoDB, if it is
-non-NULL. */
-
-void
-ha_innobase::free_foreign_key_create_info(
-/*======================================*/
- char* str) /* in, own: create info string to free */
-{
- if (str) {
- my_free(str, MYF(0));
- }
-}
-
-/***********************************************************************
-Tells something additional to the handler about how to do things. */
-
-int
-ha_innobase::extra(
-/*===============*/
- /* out: 0 or error number */
- enum ha_extra_function operation)
- /* in: HA_EXTRA_FLUSH or some other flag */
-{
- /* Warning: since it is not sure that MySQL calls external_lock
- before calling this function, the trx field in prebuilt can be
- obsolete! */
-
- switch (operation) {
- case HA_EXTRA_FLUSH:
- if (prebuilt->blob_heap) {
- row_mysql_prebuilt_free_blob_heap(prebuilt);
- }
- break;
- case HA_EXTRA_RESET_STATE:
- reset_template(prebuilt);
- break;
- case HA_EXTRA_NO_KEYREAD:
- prebuilt->read_just_key = 0;
- break;
- case HA_EXTRA_KEYREAD:
- prebuilt->read_just_key = 1;
- break;
- case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
- prebuilt->keep_other_fields_on_keyread = 1;
- break;
-
- /* IMPORTANT: prebuilt->trx can be obsolete in
- this method, because it is not sure that MySQL
- calls external_lock before this method with the
- parameters below. We must not invoke update_thd()
- either, because the calling threads may change.
- CAREFUL HERE, OR MEMORY CORRUPTION MAY OCCUR! */
- case HA_EXTRA_IGNORE_DUP_KEY:
- thd_to_trx(ha_thd())->duplicates |= TRX_DUP_IGNORE;
- break;
- case HA_EXTRA_WRITE_CAN_REPLACE:
- thd_to_trx(ha_thd())->duplicates |= TRX_DUP_REPLACE;
- break;
- case HA_EXTRA_WRITE_CANNOT_REPLACE:
- thd_to_trx(ha_thd())->duplicates &= ~TRX_DUP_REPLACE;
- break;
- case HA_EXTRA_NO_IGNORE_DUP_KEY:
- thd_to_trx(ha_thd())->duplicates &=
- ~(TRX_DUP_IGNORE | TRX_DUP_REPLACE);
- break;
- default:/* Do nothing */
- ;
- }
-
- return(0);
-}
-
-/**********************************************************************
-Reset state of file to after 'open'.
-This function is called after every statement for all tables used
-by that statement. */
-int ha_innobase::reset()
-{
- if (prebuilt->blob_heap) {
- row_mysql_prebuilt_free_blob_heap(prebuilt);
- }
-
- reset_template(prebuilt);
-
- /* TODO: This should really be reset in reset_template() but for now
- it's safer to do it explicitly here. */
-
- /* This is a statement level counter. */
- prebuilt->autoinc_last_value = 0;
-
- return(0);
-}
-
-/**********************************************************************
-MySQL calls this function at the start of each SQL statement inside LOCK
-TABLES. Inside LOCK TABLES the ::external_lock method does not work to
-mark SQL statement borders. Note also a special case: if a temporary table
-is created inside LOCK TABLES, MySQL has not called external_lock() at all
-on that table.
-MySQL-5.0 also calls this before each statement in an execution of a stored
-procedure. To make the execution more deterministic for binlogging, MySQL-5.0
-locks all tables involved in a stored procedure with full explicit table
-locks (thd_in_lock_tables(thd) holds in store_lock()) before executing the
-procedure. */
-
-int
-ha_innobase::start_stmt(
-/*====================*/
- /* out: 0 or error code */
- THD* thd, /* in: handle to the user thread */
- thr_lock_type lock_type)
-{
- trx_t* trx;
-
- update_thd(thd);
-
- trx = prebuilt->trx;
-
- /* Here we release the search latch and the InnoDB thread FIFO ticket
- if they were reserved. They should have been released already at the
- end of the previous statement, but because inside LOCK TABLES the
- lock count method does not work to mark the end of a SELECT statement,
- that may not be the case. We MUST release the search latch before an
- INSERT, for example. */
-
- innobase_release_stat_resources(trx);
-
- /* Reset the AUTOINC statement level counter for multi-row INSERTs. */
- trx->n_autoinc_rows = 0;
-
- prebuilt->sql_stat_start = TRUE;
- prebuilt->hint_need_to_fetch_extra_cols = 0;
- reset_template(prebuilt);
-
- if (!prebuilt->mysql_has_locked) {
- /* This handle is for a temporary table created inside
- this same LOCK TABLES; since MySQL does NOT call external_lock
- in this case, we must use x-row locks inside InnoDB to be
- prepared for an update of a row */
-
- prebuilt->select_lock_type = LOCK_X;
- } else {
- if (trx->isolation_level != TRX_ISO_SERIALIZABLE
- && thd_sql_command(thd) == SQLCOM_SELECT
- && lock_type == TL_READ) {
-
- /* For other than temporary tables, we obtain
- no lock for consistent read (plain SELECT). */
-
- prebuilt->select_lock_type = LOCK_NONE;
- } else {
- /* Not a consistent read: restore the
- select_lock_type value. The value of
- stored_select_lock_type was decided in:
- 1) ::store_lock(),
- 2) ::external_lock(),
- 3) ::init_table_handle_for_HANDLER(), and
- 4) ::transactional_table_lock(). */
-
- prebuilt->select_lock_type =
- prebuilt->stored_select_lock_type;
- }
- }
-
- trx->detailed_error[0] = '\0';
-
- /* Set the MySQL flag to mark that there is an active transaction */
- if (trx->active_trans == 0) {
-
- innobase_register_trx_and_stmt(ht, thd);
- trx->active_trans = 1;
- } else {
- innobase_register_stmt(ht, thd);
- }
-
- return(0);
-}
-
-/**********************************************************************
-Maps a MySQL trx isolation level code to the InnoDB isolation level code */
-inline
-ulint
-innobase_map_isolation_level(
-/*=========================*/
- /* out: InnoDB isolation level */
- enum_tx_isolation iso) /* in: MySQL isolation level code */
-{
- switch(iso) {
- case ISO_REPEATABLE_READ: return(TRX_ISO_REPEATABLE_READ);
- case ISO_READ_COMMITTED: return(TRX_ISO_READ_COMMITTED);
- case ISO_SERIALIZABLE: return(TRX_ISO_SERIALIZABLE);
- case ISO_READ_UNCOMMITTED: return(TRX_ISO_READ_UNCOMMITTED);
- default: ut_a(0); return(0);
- }
-}
-
-/**********************************************************************
-As MySQL will execute an external lock for every new table it uses when it
-starts to process an SQL statement (an exception is when MySQL calls
-start_stmt for the handle) we can use this function to store the pointer to
-the THD in the handle. We will also use this function to communicate
-to InnoDB that a new SQL statement has started and that we must store a
-savepoint to our transaction handle, so that we are able to roll back
-the SQL statement in case of an error. */
-
-int
-ha_innobase::external_lock(
-/*=======================*/
- /* out: 0 */
- THD* thd, /* in: handle to the user thread */
- int lock_type) /* in: lock type */
-{
- trx_t* trx;
-
- DBUG_ENTER("ha_innobase::external_lock");
- DBUG_PRINT("enter",("lock_type: %d", lock_type));
-
- update_thd(thd);
-
- /* Statement based binlogging does not work in isolation level
- READ UNCOMMITTED and READ COMMITTED since the necessary
- locks cannot be taken. In this case, we print an
- informative error message and return with an error. */
- if (lock_type == F_WRLCK)
- {
- ulong const binlog_format= thd_binlog_format(thd);
- ulong const tx_isolation = thd_tx_isolation(current_thd);
- if (tx_isolation <= ISO_READ_COMMITTED &&
- binlog_format == BINLOG_FORMAT_STMT)
- {
- char buf[256];
- my_snprintf(buf, sizeof(buf),
- "Transaction level '%s' in"
- " InnoDB is not safe for binlog mode '%s'",
- tx_isolation_names[tx_isolation],
- binlog_format_names[binlog_format]);
- my_error(ER_BINLOG_LOGGING_IMPOSSIBLE, MYF(0), buf);
- DBUG_RETURN(HA_ERR_LOGGING_IMPOSSIBLE);
- }
- }
-
-
- trx = prebuilt->trx;
-
- prebuilt->sql_stat_start = TRUE;
- prebuilt->hint_need_to_fetch_extra_cols = 0;
-
- reset_template(prebuilt);
-
- if (lock_type == F_WRLCK) {
-
- /* If this is a SELECT, then it is in UPDATE TABLE ...
- or SELECT ... FOR UPDATE */
- prebuilt->select_lock_type = LOCK_X;
- prebuilt->stored_select_lock_type = LOCK_X;
- }
-
- if (lock_type != F_UNLCK) {
- /* MySQL is setting a new table lock */
-
- trx->detailed_error[0] = '\0';
-
- /* Set the MySQL flag to mark that there is an active
- transaction */
- if (trx->active_trans == 0) {
-
- innobase_register_trx_and_stmt(ht, thd);
- trx->active_trans = 1;
- } else if (trx->n_mysql_tables_in_use == 0) {
- innobase_register_stmt(ht, thd);
- }
-
- if (trx->isolation_level == TRX_ISO_SERIALIZABLE
- && prebuilt->select_lock_type == LOCK_NONE
- && thd_test_options(thd,
- OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
-
- /* To get serializable execution, we let InnoDB
- conceptually add 'LOCK IN SHARE MODE' to all SELECTs
- which otherwise would have been consistent reads. An
- exception is consistent reads in the AUTOCOMMIT=1 mode:
- we know that they are read-only transactions, and they
- can be serialized also if performed as consistent
- reads. */
-
- prebuilt->select_lock_type = LOCK_S;
- prebuilt->stored_select_lock_type = LOCK_S;
- }
-
- /* Starting from 4.1.9, no InnoDB table lock is taken in LOCK
- TABLES if AUTOCOMMIT=1. It does not make much sense to acquire
- an InnoDB table lock if it is released immediately at the end
- of LOCK TABLES, and InnoDB's table locks in that case cause
- VERY easily deadlocks.
-
- We do not set InnoDB table locks if user has not explicitly
- requested a table lock. Note that thd_in_lock_tables(thd)
- can hold in some cases, e.g., at the start of a stored
- procedure call (SQLCOM_CALL). */
-
- if (prebuilt->select_lock_type != LOCK_NONE) {
-
- if (thd_sql_command(thd) == SQLCOM_LOCK_TABLES
- && THDVAR(thd, table_locks)
- && thd_test_options(thd, OPTION_NOT_AUTOCOMMIT)
- && thd_in_lock_tables(thd)) {
-
- ulint error = row_lock_table_for_mysql(
- prebuilt, NULL, 0);
-
- if (error != DB_SUCCESS) {
- error = convert_error_code_to_mysql(
- (int) error, thd);
- DBUG_RETURN((int) error);
- }
- }
-
- trx->mysql_n_tables_locked++;
- }
-
- trx->n_mysql_tables_in_use++;
- prebuilt->mysql_has_locked = TRUE;
-
- DBUG_RETURN(0);
- }
-
- /* MySQL is releasing a table lock */
-
- trx->n_mysql_tables_in_use--;
- prebuilt->mysql_has_locked = FALSE;
-
- /* Release a possible FIFO ticket and search latch. Since we
- may reserve the kernel mutex, we have to release the search
- system latch first to obey the latching order. */
-
- innobase_release_stat_resources(trx);
-
- /* If the MySQL lock count drops to zero we know that the current SQL
- statement has ended */
-
- if (trx->n_mysql_tables_in_use == 0) {
-
- trx->mysql_n_tables_locked = 0;
- prebuilt->used_in_HANDLER = FALSE;
-
- if (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
- if (trx->active_trans != 0) {
- innobase_commit(ht, thd, TRUE);
- }
- } else {
- if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
- && trx->global_read_view) {
-
- /* At low transaction isolation levels we let
- each consistent read set its own snapshot */
-
- read_view_close_for_mysql(trx);
- }
- }
- }
-
- DBUG_RETURN(0);
-}
-
-/**********************************************************************
-With this function MySQL request a transactional lock to a table when
-user issued query LOCK TABLES..WHERE ENGINE = InnoDB. */
-
-int
-ha_innobase::transactional_table_lock(
-/*==================================*/
- /* out: error code */
- THD* thd, /* in: handle to the user thread */
- int lock_type) /* in: lock type */
-{
- trx_t* trx;
-
- DBUG_ENTER("ha_innobase::transactional_table_lock");
- DBUG_PRINT("enter",("lock_type: %d", lock_type));
-
- /* We do not know if MySQL can call this function before calling
- external_lock(). To be safe, update the thd of the current table
- handle. */
-
- update_thd(thd);
-
- if (prebuilt->table->ibd_file_missing && !thd_tablespace_op(thd)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: MySQL is trying to use a table handle"
- " but the .ibd file for\n"
- "InnoDB: table %s does not exist.\n"
- "InnoDB: Have you deleted the .ibd file"
- " from the database directory under\n"
- "InnoDB: the MySQL datadir?"
- "InnoDB: See"
- " http://dev.mysql.com/doc/refman/5.1/en/innodb-troubleshooting.html\n"
- "InnoDB: how you can resolve the problem.\n",
- prebuilt->table->name);
- DBUG_RETURN(HA_ERR_CRASHED);
- }
-
- trx = prebuilt->trx;
-
- prebuilt->sql_stat_start = TRUE;
- prebuilt->hint_need_to_fetch_extra_cols = 0;
-
- reset_template(prebuilt);
-
- if (lock_type == F_WRLCK) {
- prebuilt->select_lock_type = LOCK_X;
- prebuilt->stored_select_lock_type = LOCK_X;
- } else if (lock_type == F_RDLCK) {
- prebuilt->select_lock_type = LOCK_S;
- prebuilt->stored_select_lock_type = LOCK_S;
- } else {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB error:\n"
-"MySQL is trying to set transactional table lock with corrupted lock type\n"
-"to table %s, lock type %d does not exist.\n",
- prebuilt->table->name, lock_type);
- DBUG_RETURN(HA_ERR_CRASHED);
- }
-
- /* MySQL is setting a new transactional table lock */
-
- /* Set the MySQL flag to mark that there is an active transaction */
- if (trx->active_trans == 0) {
-
- innobase_register_trx_and_stmt(ht, thd);
- trx->active_trans = 1;
- }
-
- if (THDVAR(thd, table_locks) && thd_in_lock_tables(thd)) {
- ulint error = DB_SUCCESS;
-
- error = row_lock_table_for_mysql(prebuilt, NULL, 0);
-
- if (error != DB_SUCCESS) {
- error = convert_error_code_to_mysql((int) error, thd);
- DBUG_RETURN((int) error);
- }
-
- if (thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
-
- /* Store the current undo_no of the transaction
- so that we know where to roll back if we have
- to roll back the next SQL statement */
-
- trx_mark_sql_stat_end(trx);
- }
- }
-
- DBUG_RETURN(0);
-}
-
-/****************************************************************************
-Here we export InnoDB status variables to MySQL. */
-static
-int
-innodb_export_status()
-/*==================*/
-{
- if (innodb_inited) {
- srv_export_innodb_status();
- }
-
- return 0;
-}
-
-/****************************************************************************
-Implements the SHOW INNODB STATUS command. Sends the output of the InnoDB
-Monitor to the client. */
-static
-bool
-innodb_show_status(
-/*===============*/
- handlerton* hton, /* in: the innodb handlerton */
- THD* thd, /* in: the MySQL query thread of the caller */
- stat_print_fn *stat_print)
-{
- trx_t* trx;
- static const char truncated_msg[] = "... truncated...\n";
- const long MAX_STATUS_SIZE = 64000;
- ulint trx_list_start = ULINT_UNDEFINED;
- ulint trx_list_end = ULINT_UNDEFINED;
-
- DBUG_ENTER("innodb_show_status");
-
- trx = check_trx_exists(thd);
-
- innobase_release_stat_resources(trx);
-
- /* We let the InnoDB Monitor to output at most MAX_STATUS_SIZE
- bytes of text. */
-
- long flen, usable_len;
- char* str;
-
- mutex_enter_noninline(&srv_monitor_file_mutex);
- rewind(srv_monitor_file);
- srv_printf_innodb_monitor(srv_monitor_file);
- flen = ftell(srv_monitor_file);
- os_file_set_eof(srv_monitor_file);
-
- if (flen < 0) {
- flen = 0;
- }
-
- if (flen > MAX_STATUS_SIZE) {
- usable_len = MAX_STATUS_SIZE;
- } else {
- usable_len = flen;
- }
-
- /* allocate buffer for the string, and
- read the contents of the temporary file */
-
- if (!(str = (char*) my_malloc(usable_len + 1, MYF(0)))) {
- mutex_exit_noninline(&srv_monitor_file_mutex);
- DBUG_RETURN(TRUE);
- }
-
- rewind(srv_monitor_file);
- if (flen < MAX_STATUS_SIZE) {
- /* Display the entire output. */
- flen = (long) fread(str, 1, flen, srv_monitor_file);
- } else if (trx_list_end < (ulint) flen
- && trx_list_start < trx_list_end
- && trx_list_start + (flen - trx_list_end)
- < MAX_STATUS_SIZE - sizeof truncated_msg - 1) {
- /* Omit the beginning of the list of active transactions. */
- long len = (long) fread(str, 1, trx_list_start, srv_monitor_file);
- memcpy(str + len, truncated_msg, sizeof truncated_msg - 1);
- len += sizeof truncated_msg - 1;
- usable_len = (MAX_STATUS_SIZE - 1) - len;
- fseek(srv_monitor_file, flen - usable_len, SEEK_SET);
- len += (long) fread(str + len, 1, usable_len, srv_monitor_file);
- flen = len;
- } else {
- /* Omit the end of the output. */
- flen = (long) fread(str, 1, MAX_STATUS_SIZE - 1, srv_monitor_file);
- }
-
- mutex_exit_noninline(&srv_monitor_file_mutex);
-
- bool result = FALSE;
-
- if (stat_print(thd, innobase_hton_name, (uint) strlen(innobase_hton_name),
- STRING_WITH_LEN(""), str, flen)) {
- result= TRUE;
- }
- my_free(str, MYF(0));
-
- DBUG_RETURN(FALSE);
-}
-
-/****************************************************************************
-Implements the SHOW MUTEX STATUS command. . */
-static
-bool
-innodb_mutex_show_status(
-/*=====================*/
- handlerton* hton, /* in: the innodb handlerton */
- THD* thd, /* in: the MySQL query thread of the
- caller */
- stat_print_fn* stat_print)
-{
- char buf1[IO_SIZE], buf2[IO_SIZE];
- mutex_t* mutex;
- rw_lock_t* lock;
-#ifdef UNIV_DEBUG
- ulint rw_lock_count= 0;
- ulint rw_lock_count_spin_loop= 0;
- ulint rw_lock_count_spin_rounds= 0;
- ulint rw_lock_count_os_wait= 0;
- ulint rw_lock_count_os_yield= 0;
- ulonglong rw_lock_wait_time= 0;
-#endif /* UNIV_DEBUG */
- uint hton_name_len= (uint) strlen(innobase_hton_name), buf1len, buf2len;
- DBUG_ENTER("innodb_mutex_show_status");
-
- mutex_enter_noninline(&mutex_list_mutex);
-
- mutex = UT_LIST_GET_FIRST(mutex_list);
-
- while (mutex != NULL) {
-#ifdef UNIV_DEBUG
- if (mutex->mutex_type != 1) {
- if (mutex->count_using > 0) {
- buf1len= my_snprintf(buf1, sizeof(buf1),
- "%s:%s",
- mutex->cmutex_name, mutex->cfile_name);
- buf2len= my_snprintf(buf2, sizeof(buf2),
- "count=%lu, spin_waits=%lu,"
- " spin_rounds=%lu, "
- "os_waits=%lu, os_yields=%lu,"
- " os_wait_times=%lu",
- mutex->count_using,
- mutex->count_spin_loop,
- mutex->count_spin_rounds,
- mutex->count_os_wait,
- mutex->count_os_yield,
- (ulong) (mutex->lspent_time/1000));
-
- if (stat_print(thd, innobase_hton_name,
- hton_name_len, buf1, buf1len,
- buf2, buf2len)) {
- mutex_exit_noninline(
- &mutex_list_mutex);
- DBUG_RETURN(1);
- }
- }
- }
- else {
- rw_lock_count += mutex->count_using;
- rw_lock_count_spin_loop += mutex->count_spin_loop;
- rw_lock_count_spin_rounds += mutex->count_spin_rounds;
- rw_lock_count_os_wait += mutex->count_os_wait;
- rw_lock_count_os_yield += mutex->count_os_yield;
- rw_lock_wait_time += mutex->lspent_time;
- }
-#else /* UNIV_DEBUG */
- buf1len= (uint) my_snprintf(buf1, sizeof(buf1), "%s:%lu",
- mutex->cfile_name, (ulong) mutex->cline);
- buf2len= (uint) my_snprintf(buf2, sizeof(buf2), "os_waits=%lu",
- mutex->count_os_wait);
-
- if (stat_print(thd, innobase_hton_name,
- hton_name_len, buf1, buf1len,
- buf2, buf2len)) {
- mutex_exit_noninline(&mutex_list_mutex);
- DBUG_RETURN(1);
- }
-#endif /* UNIV_DEBUG */
-
- mutex = UT_LIST_GET_NEXT(list, mutex);
- }
-
- mutex_exit_noninline(&mutex_list_mutex);
-
- mutex_enter_noninline(&rw_lock_list_mutex);
-
- lock = UT_LIST_GET_FIRST(rw_lock_list);
-
- while (lock != NULL)
- {
- if (lock->count_os_wait)
- {
- buf1len= my_snprintf(buf1, sizeof(buf1), "%s:%lu",
- lock->cfile_name, (ulong) lock->cline);
- buf2len= my_snprintf(buf2, sizeof(buf2),
- "os_waits=%lu", lock->count_os_wait);
-
- if (stat_print(thd, innobase_hton_name,
- hton_name_len, buf1, buf1len,
- buf2, buf2len)) {
- mutex_exit_noninline(&rw_lock_list_mutex);
- DBUG_RETURN(1);
- }
- }
- lock = UT_LIST_GET_NEXT(list, lock);
- }
-
- mutex_exit_noninline(&rw_lock_list_mutex);
-
-#ifdef UNIV_DEBUG
- buf2len= my_snprintf(buf2, sizeof(buf2),
- "count=%lu, spin_waits=%lu, spin_rounds=%lu, "
- "os_waits=%lu, os_yields=%lu, os_wait_times=%lu",
- rw_lock_count, rw_lock_count_spin_loop,
- rw_lock_count_spin_rounds,
- rw_lock_count_os_wait, rw_lock_count_os_yield,
- (ulong) (rw_lock_wait_time/1000));
-
- if (stat_print(thd, innobase_hton_name, hton_name_len,
- STRING_WITH_LEN("rw_lock_mutexes"), buf2, buf2len)) {
- DBUG_RETURN(1);
- }
-#endif /* UNIV_DEBUG */
-
- DBUG_RETURN(FALSE);
-}
-
-static
-bool innobase_show_status(handlerton *hton, THD* thd,
- stat_print_fn* stat_print,
- enum ha_stat_type stat_type)
-{
- switch (stat_type) {
- case HA_ENGINE_STATUS:
- return innodb_show_status(hton, thd, stat_print);
- case HA_ENGINE_MUTEX:
- return innodb_mutex_show_status(hton, thd, stat_print);
- default:
- return FALSE;
- }
-}
- rw_lock_t* lock;
-
-
-/****************************************************************************
- Handling the shared INNOBASE_SHARE structure that is needed to provide table
- locking.
-****************************************************************************/
-
-static uchar* innobase_get_key(INNOBASE_SHARE* share, size_t *length,
- my_bool not_used __attribute__((unused)))
-{
- *length=share->table_name_length;
-
- return (uchar*) share->table_name;
-}
-
-static INNOBASE_SHARE* get_share(const char* table_name)
-{
- INNOBASE_SHARE *share;
- pthread_mutex_lock(&innobase_share_mutex);
- uint length=(uint) strlen(table_name);
-
- if (!(share=(INNOBASE_SHARE*) hash_search(&innobase_open_tables,
- (uchar*) table_name,
- length))) {
-
- share = (INNOBASE_SHARE *) my_malloc(sizeof(*share)+length+1,
- MYF(MY_FAE | MY_ZEROFILL));
-
- share->table_name_length=length;
- share->table_name=(char*) (share+1);
- strmov(share->table_name,table_name);
-
- if (my_hash_insert(&innobase_open_tables,
- (uchar*) share)) {
- pthread_mutex_unlock(&innobase_share_mutex);
- my_free(share,0);
-
- return 0;
- }
-
- thr_lock_init(&share->lock);
- pthread_mutex_init(&share->mutex,MY_MUTEX_INIT_FAST);
- }
-
- share->use_count++;
- pthread_mutex_unlock(&innobase_share_mutex);
-
- return share;
-}
-
-static void free_share(INNOBASE_SHARE* share)
-{
- pthread_mutex_lock(&innobase_share_mutex);
-
- if (!--share->use_count) {
- hash_delete(&innobase_open_tables, (uchar*) share);
- thr_lock_delete(&share->lock);
- pthread_mutex_destroy(&share->mutex);
- my_free(share, MYF(0));
- }
-
- pthread_mutex_unlock(&innobase_share_mutex);
-}
-
-/*********************************************************************
-Converts a MySQL table lock stored in the 'lock' field of the handle to
-a proper type before storing pointer to the lock into an array of pointers.
-MySQL also calls this if it wants to reset some table locks to a not-locked
-state during the processing of an SQL query. An example is that during a
-SELECT the read lock is released early on the 'const' tables where we only
-fetch one row. MySQL does not call this when it releases all locks at the
-end of an SQL statement. */
-
-THR_LOCK_DATA**
-ha_innobase::store_lock(
-/*====================*/
- /* out: pointer to the next
- element in the 'to' array */
- THD* thd, /* in: user thread handle */
- THR_LOCK_DATA** to, /* in: pointer to an array
- of pointers to lock structs;
- pointer to the 'lock' field
- of current handle is stored
- next to this array */
- enum thr_lock_type lock_type) /* in: lock type to store in
- 'lock'; this may also be
- TL_IGNORE */
-{
- trx_t* trx;
-
- /* Note that trx in this function is NOT necessarily prebuilt->trx
- because we call update_thd() later, in ::external_lock()! Failure to
- understand this caused a serious memory corruption bug in 5.1.11. */
-
- trx = check_trx_exists(thd);
-
- /* NOTE: MySQL can call this function with lock 'type' TL_IGNORE!
- Be careful to ignore TL_IGNORE if we are going to do something with
- only 'real' locks! */
-
- /* If no MySQL table is in use, we need to set the isolation level
- of the transaction. */
-
- if (lock_type != TL_IGNORE
- && trx->n_mysql_tables_in_use == 0) {
- trx->isolation_level = innobase_map_isolation_level(
- (enum_tx_isolation) thd_tx_isolation(thd));
-
- if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
- && trx->global_read_view) {
-
- /* At low transaction isolation levels we let
- each consistent read set its own snapshot */
-
- read_view_close_for_mysql(trx);
- }
- }
-
- DBUG_ASSERT(thd == current_thd);
- const bool in_lock_tables = thd_in_lock_tables(thd);
- const uint sql_command = thd_sql_command(thd);
-
- if (sql_command == SQLCOM_DROP_TABLE) {
-
- /* MySQL calls this function in DROP TABLE though this table
- handle may belong to another thd that is running a query. Let
- us in that case skip any changes to the prebuilt struct. */
-
- } else if ((lock_type == TL_READ && in_lock_tables)
- || (lock_type == TL_READ_HIGH_PRIORITY && in_lock_tables)
- || lock_type == TL_READ_WITH_SHARED_LOCKS
- || lock_type == TL_READ_NO_INSERT
- || (lock_type != TL_IGNORE
- && sql_command != SQLCOM_SELECT)) {
-
- /* The OR cases above are in this order:
- 1) MySQL is doing LOCK TABLES ... READ LOCAL, or we
- are processing a stored procedure or function, or
- 2) (we do not know when TL_READ_HIGH_PRIORITY is used), or
- 3) this is a SELECT ... IN SHARE MODE, or
- 4) we are doing a complex SQL statement like
- INSERT INTO ... SELECT ... and the logical logging (MySQL
- binlog) requires the use of a locking read, or
- MySQL is doing LOCK TABLES ... READ.
- 5) we let InnoDB do locking reads for all SQL statements that
- are not simple SELECTs; note that select_lock_type in this
- case may get strengthened in ::external_lock() to LOCK_X.
- Note that we MUST use a locking read in all data modifying
- SQL statements, because otherwise the execution would not be
- serializable, and also the results from the update could be
- unexpected if an obsolete consistent read view would be
- used. */
-
- ulint isolation_level;
-
- isolation_level = trx->isolation_level;
-
- if ((srv_locks_unsafe_for_binlog
- || isolation_level == TRX_ISO_READ_COMMITTED)
- && isolation_level != TRX_ISO_SERIALIZABLE
- && (lock_type == TL_READ || lock_type == TL_READ_NO_INSERT)
- && (sql_command == SQLCOM_INSERT_SELECT
- || sql_command == SQLCOM_UPDATE
- || sql_command == SQLCOM_CREATE_TABLE)) {
-
- /* If we either have innobase_locks_unsafe_for_binlog
- option set or this session is using READ COMMITTED
- isolation level and isolation level of the transaction
- is not set to serializable and MySQL is doing
- INSERT INTO...SELECT or UPDATE ... = (SELECT ...) or
- CREATE ... SELECT... without FOR UPDATE or
- IN SHARE MODE in select, then we use consistent
- read for select. */
-
- prebuilt->select_lock_type = LOCK_NONE;
- prebuilt->stored_select_lock_type = LOCK_NONE;
- } else if (sql_command == SQLCOM_CHECKSUM) {
- /* Use consistent read for checksum table */
-
- prebuilt->select_lock_type = LOCK_NONE;
- prebuilt->stored_select_lock_type = LOCK_NONE;
- } else {
- prebuilt->select_lock_type = LOCK_S;
- prebuilt->stored_select_lock_type = LOCK_S;
- }
-
- } else if (lock_type != TL_IGNORE) {
-
- /* We set possible LOCK_X value in external_lock, not yet
- here even if this would be SELECT ... FOR UPDATE */
-
- prebuilt->select_lock_type = LOCK_NONE;
- prebuilt->stored_select_lock_type = LOCK_NONE;
- }
-
- if (lock_type != TL_IGNORE && lock.type == TL_UNLOCK) {
-
- /* Starting from 5.0.7, we weaken also the table locks
- set at the start of a MySQL stored procedure call, just like
- we weaken the locks set at the start of an SQL statement.
- MySQL does set in_lock_tables TRUE there, but in reality
- we do not need table locks to make the execution of a
- single transaction stored procedure call deterministic
- (if it does not use a consistent read). */
-
- if (lock_type == TL_READ
- && sql_command == SQLCOM_LOCK_TABLES) {
- /* We come here if MySQL is processing LOCK TABLES
- ... READ LOCAL. MyISAM under that table lock type
- reads the table as it was at the time the lock was
- granted (new inserts are allowed, but not seen by the
- reader). To get a similar effect on an InnoDB table,
- we must use LOCK TABLES ... READ. We convert the lock
- type here, so that for InnoDB, READ LOCAL is
- equivalent to READ. This will change the InnoDB
- behavior in mysqldump, so that dumps of InnoDB tables
- are consistent with dumps of MyISAM tables. */
-
- lock_type = TL_READ_NO_INSERT;
- }
-
- /* If we are not doing a LOCK TABLE, DISCARD/IMPORT
- TABLESPACE or TRUNCATE TABLE then allow multiple
- writers. Note that ALTER TABLE uses a TL_WRITE_ALLOW_READ
- < TL_WRITE_CONCURRENT_INSERT.
-
- We especially allow multiple writers if MySQL is at the
- start of a stored procedure call (SQLCOM_CALL) or a
- stored function call (MySQL does have in_lock_tables
- TRUE there). */
-
- if ((lock_type >= TL_WRITE_CONCURRENT_INSERT
- && lock_type <= TL_WRITE)
- && !(in_lock_tables
- && sql_command == SQLCOM_LOCK_TABLES)
- && !thd_tablespace_op(thd)
- && sql_command != SQLCOM_TRUNCATE
- && sql_command != SQLCOM_OPTIMIZE
- && sql_command != SQLCOM_CREATE_TABLE) {
-
- lock_type = TL_WRITE_ALLOW_WRITE;
- }
-
- /* In queries of type INSERT INTO t1 SELECT ... FROM t2 ...
- MySQL would use the lock TL_READ_NO_INSERT on t2, and that
- would conflict with TL_WRITE_ALLOW_WRITE, blocking all inserts
- to t2. Convert the lock to a normal read lock to allow
- concurrent inserts to t2.
-
- We especially allow concurrent inserts if MySQL is at the
- start of a stored procedure call (SQLCOM_CALL)
- (MySQL does have thd_in_lock_tables() TRUE there). */
-
- if (lock_type == TL_READ_NO_INSERT
- && sql_command != SQLCOM_LOCK_TABLES) {
-
- lock_type = TL_READ;
- }
-
- lock.type = lock_type;
- }
-
- *to++= &lock;
-
- return(to);
-}
-
-/*******************************************************************************
-Read the next autoinc value. Acquire the relevant locks before reading
-the AUTOINC value. If SUCCESS then the table AUTOINC mutex will be locked
-on return and all relevant locks acquired. */
-
-ulong
-ha_innobase::innobase_get_autoinc(
-/*==============================*/
- /* out: DB_SUCCESS or error code */
- ulonglong* value) /* out: autoinc value */
-{
- *value = 0;
-
- prebuilt->autoinc_error = innobase_lock_autoinc();
-
- if (prebuilt->autoinc_error == DB_SUCCESS) {
-
- /* Determine the first value of the interval */
- *value = dict_table_autoinc_read(prebuilt->table);
-
- /* It should have been initialized during open. */
- ut_a(*value != 0);
- }
-
- return(ulong(prebuilt->autoinc_error));
-}
-
-/***********************************************************************
-This function reads the global auto-inc counter. It doesn't use the
-AUTOINC lock even if the lock mode is set to TRADITIONAL. */
-
-ulonglong
-ha_innobase::innobase_peek_autoinc()
-/*================================*/
- /* out: the autoinc value */
-{
- ulonglong auto_inc;
- dict_table_t* innodb_table;
-
- ut_a(prebuilt != NULL);
- ut_a(prebuilt->table != NULL);
-
- innodb_table = prebuilt->table;
-
- dict_table_autoinc_lock(innodb_table);
-
- auto_inc = dict_table_autoinc_read(innodb_table);
-
- ut_a(auto_inc > 0);
-
- dict_table_autoinc_unlock(innodb_table);
-
- return(auto_inc);
-}
-
-/*******************************************************************************
-This function initializes the auto-inc counter if it has not been
-initialized yet. This function does not change the value of the auto-inc
-counter if it already has been initialized. Returns the value of the
-auto-inc counter in *first_value, and ULONGLONG_MAX in *nb_reserved_values (as
-we have a table-level lock). offset, increment, nb_desired_values are ignored.
-*first_value is set to -1 if error (deadlock or lock wait timeout) */
-
-void
-ha_innobase::get_auto_increment(
-/*============================*/
- ulonglong offset, /* in: */
- ulonglong increment, /* in: table autoinc increment */
- ulonglong nb_desired_values, /* in: number of values reqd */
- ulonglong *first_value, /* out: the autoinc value */
- ulonglong *nb_reserved_values) /* out: count of reserved values */
-{
- trx_t* trx;
- ulint error;
- ulonglong autoinc = 0;
-
- /* Prepare prebuilt->trx in the table handle */
- update_thd(ha_thd());
-
- error = innobase_get_autoinc(&autoinc);
-
- if (error != DB_SUCCESS) {
- *first_value = (~(ulonglong) 0);
- return;
- }
-
- /* This is a hack, since nb_desired_values seems to be accurate only
- for the first call to get_auto_increment() for multi-row INSERT and
- meaningless for other statements e.g, LOAD etc. Subsequent calls to
- this method for the same statement results in different values which
- don't make sense. Therefore we store the value the first time we are
- called and count down from that as rows are written (see write_row()).
- */
-
- trx = prebuilt->trx;
-
- /* Note: We can't rely on *first_value since some MySQL engines,
- in particular the partition engine, don't initialize it to 0 when
- invoking this method. So we are not sure if it's guaranteed to
- be 0 or not. */
-
- /* Called for the first time ? */
- if (trx->n_autoinc_rows == 0) {
-
- trx->n_autoinc_rows = (ulint) nb_desired_values;
-
- /* It's possible for nb_desired_values to be 0:
- e.g., INSERT INTO T1(C) SELECT C FROM T2; */
- if (nb_desired_values == 0) {
-
- trx->n_autoinc_rows = 1;
- }
-
- set_if_bigger(*first_value, autoinc);
- /* Not in the middle of a mult-row INSERT. */
- } else if (prebuilt->autoinc_last_value == 0) {
- set_if_bigger(*first_value, autoinc);
- }
-
- *nb_reserved_values = trx->n_autoinc_rows;
-
- /* With old style AUTOINC locking we only update the table's
- AUTOINC counter after attempting to insert the row. */
- if (innobase_autoinc_lock_mode != AUTOINC_OLD_STYLE_LOCKING) {
- ulonglong need;
- ulonglong next_value;
- ulonglong col_max_value;
-
- /* We need the upper limit of the col type to check for
- whether we update the table autoinc counter or not. */
- col_max_value = innobase_get_int_col_max_value(
- table->next_number_field);
-
- need = *nb_reserved_values * increment;
-
- /* Compute the last value in the interval */
- next_value = innobase_next_autoinc(
- *first_value, need, offset, col_max_value);
-
- prebuilt->autoinc_last_value = next_value;
-
- if (prebuilt->autoinc_last_value < *first_value) {
- *first_value = (~(ulonglong) 0);
- } else {
- /* Update the table autoinc variable */
- dict_table_autoinc_update_if_greater(
- prebuilt->table, prebuilt->autoinc_last_value);
- }
- } else {
- /* This will force write_row() into attempting an update
- of the table's AUTOINC counter. */
- prebuilt->autoinc_last_value = 0;
- }
-
- /* The increment to be used to increase the AUTOINC value, we use
- this in write_row() and update_row() to increase the autoinc counter
- for columns that are filled by the user. We need the offset and
- the increment. */
- prebuilt->autoinc_offset = offset;
- prebuilt->autoinc_increment = increment;
-
- dict_table_autoinc_unlock(prebuilt->table);
-}
-
-/* See comment in handler.h */
-int
-ha_innobase::reset_auto_increment(
-/*==============================*/
- ulonglong value) /* in: new value for table autoinc */
-{
- DBUG_ENTER("ha_innobase::reset_auto_increment");
-
- int error;
-
- update_thd(ha_thd());
-
- error = row_lock_table_autoinc_for_mysql(prebuilt);
-
- if (error != DB_SUCCESS) {
- error = convert_error_code_to_mysql(error, user_thd);
-
- DBUG_RETURN(error);
- }
-
- /* The next value can never be 0. */
- if (value == 0) {
- value = 1;
- }
-
- innobase_reset_autoinc(value);
-
- DBUG_RETURN(0);
-}
-
-/* See comment in handler.cc */
-bool
-ha_innobase::get_error_message(int error, String *buf)
-{
- trx_t* trx = check_trx_exists(ha_thd());
-
- buf->copy(trx->detailed_error, (uint) strlen(trx->detailed_error),
- system_charset_info);
-
- return FALSE;
-}
-
-/***********************************************************************
-Compares two 'refs'. A 'ref' is the (internal) primary key value of the row.
-If there is no explicitly declared non-null unique key or a primary key, then
-InnoDB internally uses the row id as the primary key. */
-
-int
-ha_innobase::cmp_ref(
-/*=================*/
- /* out: < 0 if ref1 < ref2, 0 if equal, else
- > 0 */
- const uchar* ref1, /* in: an (internal) primary key value in the
- MySQL key value format */
- const uchar* ref2) /* in: an (internal) primary key value in the
- MySQL key value format */
-{
- enum_field_types mysql_type;
- Field* field;
- KEY_PART_INFO* key_part;
- KEY_PART_INFO* key_part_end;
- uint len1;
- uint len2;
- int result;
-
- if (prebuilt->clust_index_was_generated) {
- /* The 'ref' is an InnoDB row id */
-
- return(memcmp(ref1, ref2, DATA_ROW_ID_LEN));
- }
-
- /* Do a type-aware comparison of primary key fields. PK fields
- are always NOT NULL, so no checks for NULL are performed. */
-
- key_part = table->key_info[table->s->primary_key].key_part;
-
- key_part_end = key_part
- + table->key_info[table->s->primary_key].key_parts;
-
- for (; key_part != key_part_end; ++key_part) {
- field = key_part->field;
- mysql_type = field->type();
-
- if (mysql_type == MYSQL_TYPE_TINY_BLOB
- || mysql_type == MYSQL_TYPE_MEDIUM_BLOB
- || mysql_type == MYSQL_TYPE_BLOB
- || mysql_type == MYSQL_TYPE_LONG_BLOB) {
-
- /* In the MySQL key value format, a column prefix of
- a BLOB is preceded by a 2-byte length field */
-
- len1 = innobase_read_from_2_little_endian(ref1);
- len2 = innobase_read_from_2_little_endian(ref2);
-
- ref1 += 2;
- ref2 += 2;
- result = ((Field_blob*)field)->cmp( ref1, len1,
- ref2, len2);
- } else {
- result = field->key_cmp(ref1, ref2);
- }
-
- if (result) {
-
- return(result);
- }
-
- ref1 += key_part->store_length;
- ref2 += key_part->store_length;
- }
-
- return(0);
-}
-
-/***********************************************************************
-Ask InnoDB if a query to a table can be cached. */
-
-my_bool
-ha_innobase::register_query_cache_table(
-/*====================================*/
- /* out: TRUE if query caching
- of the table is permitted */
- THD* thd, /* in: user thread handle */
- char* table_key, /* in: concatenation of database name,
- the null character '\0',
- and the table name */
- uint key_length, /* in: length of the full name, i.e.
- len(dbname) + len(tablename) + 1 */
- qc_engine_callback*
- call_back, /* out: pointer to function for
- checking if query caching
- is permitted */
- ulonglong *engine_data) /* in/out: data to call_back */
-{
- *call_back = innobase_query_caching_of_table_permitted;
- *engine_data = 0;
- return(innobase_query_caching_of_table_permitted(thd, table_key,
- key_length,
- engine_data));
-}
-
-char*
-ha_innobase::get_mysql_bin_log_name()
-{
- return(trx_sys_mysql_bin_log_name);
-}
-
-ulonglong
-ha_innobase::get_mysql_bin_log_pos()
-{
- /* trx... is ib_longlong, which is a typedef for a 64-bit integer
- (__int64 or longlong) so it's ok to cast it to ulonglong. */
-
- return(trx_sys_mysql_bin_log_pos);
-}
-
-/**********************************************************************
-This function is used to find the storage length in bytes of the first n
-characters for prefix indexes using a multibyte character set. The function
-finds charset information and returns length of prefix_len characters in the
-index field in bytes.
-
-NOTE: the prototype of this function is copied to data0type.c! If you change
-this function, you MUST change also data0type.c! */
-extern "C"
-ulint
-innobase_get_at_most_n_mbchars(
-/*===========================*/
- /* out: number of bytes occupied by the first
- n characters */
- ulint charset_id, /* in: character set id */
- ulint prefix_len, /* in: prefix length in bytes of the index
- (this has to be divided by mbmaxlen to get the
- number of CHARACTERS n in the prefix) */
- ulint data_len, /* in: length of the string in bytes */
- const char* str) /* in: character string */
-{
- ulint char_length; /* character length in bytes */
- ulint n_chars; /* number of characters in prefix */
- CHARSET_INFO* charset; /* charset used in the field */
-
- charset = get_charset((uint) charset_id, MYF(MY_WME));
-
- ut_ad(charset);
- ut_ad(charset->mbmaxlen);
-
- /* Calculate how many characters at most the prefix index contains */
-
- n_chars = prefix_len / charset->mbmaxlen;
-
- /* If the charset is multi-byte, then we must find the length of the
- first at most n chars in the string. If the string contains less
- characters than n, then we return the length to the end of the last
- character. */
-
- if (charset->mbmaxlen > 1) {
- /* my_charpos() returns the byte length of the first n_chars
- characters, or a value bigger than the length of str, if
- there were not enough full characters in str.
-
- Why does the code below work:
- Suppose that we are looking for n UTF-8 characters.
-
- 1) If the string is long enough, then the prefix contains at
- least n complete UTF-8 characters + maybe some extra
- characters + an incomplete UTF-8 character. No problem in
- this case. The function returns the pointer to the
- end of the nth character.
-
- 2) If the string is not long enough, then the string contains
- the complete value of a column, that is, only complete UTF-8
- characters, and we can store in the column prefix index the
- whole string. */
-
- char_length = my_charpos(charset, str,
- str + data_len, (int) n_chars);
- if (char_length > data_len) {
- char_length = data_len;
- }
- } else {
- if (data_len < prefix_len) {
- char_length = data_len;
- } else {
- char_length = prefix_len;
- }
- }
-
- return(char_length);
-}
-
-/***********************************************************************
-This function is used to prepare X/Open XA distributed transaction */
-static
-int
-innobase_xa_prepare(
-/*================*/
- /* out: 0 or error number */
- handlerton *hton,
- THD* thd, /* in: handle to the MySQL thread of the user
- whose XA transaction should be prepared */
- bool all) /* in: TRUE - commit transaction
- FALSE - the current SQL statement ended */
-{
- int error = 0;
- trx_t* trx = check_trx_exists(thd);
-
- if (thd_sql_command(thd) != SQLCOM_XA_PREPARE &&
- (all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
- {
-
- /* For ibbackup to work the order of transactions in binlog
- and InnoDB must be the same. Consider the situation
-
- thread1> prepare; write to binlog; ...
- <context switch>
- thread2> prepare; write to binlog; commit
- thread1> ... commit
-
- To ensure this will not happen we're taking the mutex on
- prepare, and releasing it on commit.
-
- Note: only do it for normal commits, done via ha_commit_trans.
- If 2pc protocol is executed by external transaction
- coordinator, it will be just a regular MySQL client
- executing XA PREPARE and XA COMMIT commands.
- In this case we cannot know how many minutes or hours
- will be between XA PREPARE and XA COMMIT, and we don't want
- to block for undefined period of time.
- */
- pthread_mutex_lock(&prepare_commit_mutex);
- trx->active_trans = 2;
- }
-
- if (!THDVAR(thd, support_xa)) {
-
- return(0);
- }
-
- thd_get_xid(thd, (MYSQL_XID*) &trx->xid);
-
- /* Release a possible FIFO ticket and search latch. Since we will
- reserve the kernel mutex, we have to release the search system latch
- first to obey the latching order. */
-
- innobase_release_stat_resources(trx);
-
- if (trx->active_trans == 0 && trx->conc_state != TRX_NOT_STARTED) {
-
- sql_print_error("trx->active_trans == 0, but trx->conc_state != "
- "TRX_NOT_STARTED");
- }
-
- if (all
- || (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
-
- /* We were instructed to prepare the whole transaction, or
- this is an SQL statement end and autocommit is on */
-
- ut_ad(trx->active_trans);
-
- error = (int) trx_prepare_for_mysql(trx);
- } else {
- /* We just mark the SQL statement ended and do not do a
- transaction prepare */
-
- /* If we had reserved the auto-inc lock for some
- table in this SQL statement we release it now */
-
- row_unlock_table_autoinc_for_mysql(trx);
-
- /* Store the current undo_no of the transaction so that we
- know where to roll back if we have to roll back the next
- SQL statement */
-
- trx_mark_sql_stat_end(trx);
- }
-
- /* Tell the InnoDB server that there might be work for utility
- threads: */
-
- srv_active_wake_master_thread();
-
- return error;
-}
-
-/***********************************************************************
-This function is used to recover X/Open XA distributed transactions */
-static
-int
-innobase_xa_recover(
-/*================*/
- /* out: number of prepared transactions
- stored in xid_list */
- handlerton *hton,
- XID* xid_list, /* in/out: prepared transactions */
- uint len) /* in: number of slots in xid_list */
-{
- if (len == 0 || xid_list == NULL) {
-
- return(0);
- }
-
- return(trx_recover_for_mysql(xid_list, len));
-}
-
-/***********************************************************************
-This function is used to commit one X/Open XA distributed transaction
-which is in the prepared state */
-static
-int
-innobase_commit_by_xid(
-/*===================*/
- /* out: 0 or error number */
- handlerton *hton,
- XID* xid) /* in: X/Open XA transaction identification */
-{
- trx_t* trx;
-
- trx = trx_get_trx_by_xid(xid);
-
- if (trx) {
- innobase_commit_low(trx);
-
- return(XA_OK);
- } else {
- return(XAER_NOTA);
- }
-}
-
-/***********************************************************************
-This function is used to rollback one X/Open XA distributed transaction
-which is in the prepared state */
-static
-int
-innobase_rollback_by_xid(
-/*=====================*/
- /* out: 0 or error number */
- handlerton *hton,
- XID *xid) /* in: X/Open XA transaction identification */
-{
- trx_t* trx;
-
- trx = trx_get_trx_by_xid(xid);
-
- if (trx) {
- return(innobase_rollback_trx(trx));
- } else {
- return(XAER_NOTA);
- }
-}
-
-/***********************************************************************
-Create a consistent view for a cursor based on current transaction
-which is created if the corresponding MySQL thread still lacks one.
-This consistent view is then used inside of MySQL when accessing records
-using a cursor. */
-static
-void*
-innobase_create_cursor_view(
-/*========================*/
- /* out: pointer to cursor view or NULL */
- handlerton *hton, /* in: innobase hton */
- THD* thd) /* in: user thread handle */
-{
- return(read_cursor_view_create_for_mysql(check_trx_exists(thd)));
-}
-
-/***********************************************************************
-Close the given consistent cursor view of a transaction and restore
-global read view to a transaction read view. Transaction is created if the
-corresponding MySQL thread still lacks one. */
-static
-void
-innobase_close_cursor_view(
-/*=======================*/
- handlerton *hton,
- THD* thd, /* in: user thread handle */
- void* curview)/* in: Consistent read view to be closed */
-{
- read_cursor_view_close_for_mysql(check_trx_exists(thd),
- (cursor_view_t*) curview);
-}
-
-/***********************************************************************
-Set the given consistent cursor view to a transaction which is created
-if the corresponding MySQL thread still lacks one. If the given
-consistent cursor view is NULL global read view of a transaction is
-restored to a transaction read view. */
-static
-void
-innobase_set_cursor_view(
-/*=====================*/
- handlerton *hton,
- THD* thd, /* in: user thread handle */
- void* curview)/* in: Consistent cursor view to be set */
-{
- read_cursor_set_for_mysql(check_trx_exists(thd),
- (cursor_view_t*) curview);
-}
-
-
-bool ha_innobase::check_if_incompatible_data(
- HA_CREATE_INFO* info,
- uint table_changes)
-{
- if (table_changes != IS_EQUAL_YES) {
-
- return COMPATIBLE_DATA_NO;
- }
-
- /* Check that auto_increment value was not changed */
- if ((info->used_fields & HA_CREATE_USED_AUTO) &&
- info->auto_increment_value != 0) {
-
- return COMPATIBLE_DATA_NO;
- }
-
- /* Check that row format didn't change */
- if ((info->used_fields & HA_CREATE_USED_ROW_FORMAT) &&
- get_row_type() != info->row_type) {
-
- return COMPATIBLE_DATA_NO;
- }
-
- return COMPATIBLE_DATA_YES;
-}
-
-static int show_innodb_vars(THD *thd, SHOW_VAR *var, char *buff)
-{
- innodb_export_status();
- var->type= SHOW_ARRAY;
- var->value= (char *) &innodb_status_variables;
- return 0;
-}
-
-static SHOW_VAR innodb_status_variables_export[]= {
- {"Innodb", (char*) &show_innodb_vars, SHOW_FUNC},
- {NullS, NullS, SHOW_LONG}
-};
-
-static struct st_mysql_storage_engine innobase_storage_engine=
-{ MYSQL_HANDLERTON_INTERFACE_VERSION };
-
-/* plugin options */
-static MYSQL_SYSVAR_BOOL(checksums, innobase_use_checksums,
- PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
- "Enable InnoDB checksums validation (enabled by default). "
- "Disable with --skip-innodb-checksums.",
- NULL, NULL, TRUE);
-
-static MYSQL_SYSVAR_STR(data_home_dir, innobase_data_home_dir,
- PLUGIN_VAR_READONLY,
- "The common part for InnoDB table spaces.",
- NULL, NULL, NULL);
-
-static MYSQL_SYSVAR_BOOL(doublewrite, innobase_use_doublewrite,
- PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
- "Enable InnoDB doublewrite buffer (enabled by default). "
- "Disable with --skip-innodb-doublewrite.",
- NULL, NULL, TRUE);
-
-static MYSQL_SYSVAR_BOOL(extra_dirty_writes, innobase_extra_dirty_writes,
- PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
- "Flush dirty buffer pages when dirty max pct is not exceeded",
- NULL, NULL, TRUE);
-
-static MYSQL_SYSVAR_LONG(io_capacity, innobase_io_capacity,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Number of IOPs the server can do. Tunes the background IO rate",
- NULL, NULL, (long)200, (long)100, LONG_MAX, (long)0);
-
-static MYSQL_SYSVAR_ULONG(fast_shutdown, innobase_fast_shutdown,
- PLUGIN_VAR_OPCMDARG,
- "Speeds up the shutdown process of the InnoDB storage engine. Possible "
- "values are 0, 1 (faster)"
- /*
- NetWare can't close unclosed files, can't automatically kill remaining
- threads, etc, so on this OS we disable the crash-like InnoDB shutdown.
- */
- IF_NETWARE("", " or 2 (fastest - crash-like)")
- ".",
- NULL, NULL, (unsigned long)1, (unsigned long)0,
- (unsigned long)IF_NETWARE(1,2), (unsigned long)0);
-
-static MYSQL_SYSVAR_BOOL(file_per_table, innobase_file_per_table,
- PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
- "Stores each InnoDB table to an .ibd file in the database dir.",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
- PLUGIN_VAR_OPCMDARG,
- "Set to 0 (write and flush once per second),"
- " 1 (write and flush at each commit)"
- " or 2 (write at commit, flush once per second).",
- NULL, NULL, (unsigned long)1, (unsigned long)0, (unsigned long)2,
- (unsigned long)0);
-
-static MYSQL_SYSVAR_STR(flush_method, innobase_unix_file_flush_method,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "With which method to flush data.", NULL, NULL, NULL);
-
-static MYSQL_SYSVAR_BOOL(locks_unsafe_for_binlog, innobase_locks_unsafe_for_binlog,
- PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
- "Force InnoDB to not use next-key locking, to use only row-level locking.",
- NULL, NULL, FALSE);
-
-#ifdef UNIV_LOG_ARCHIVE
-static MYSQL_SYSVAR_STR(log_arch_dir, innobase_log_arch_dir,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Where full logs should be archived.", NULL, NULL, NULL);
-
-static MYSQL_SYSVAR_BOOL(log_archive, innobase_log_archive,
- PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
- "Set to 1 if you want to have logs archived.", NULL, NULL, FALSE);
-#endif /* UNIV_LOG_ARCHIVE */
-
-static MYSQL_SYSVAR_STR(log_group_home_dir, innobase_log_group_home_dir,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Path to InnoDB log files.", NULL, NULL, NULL);
-
-static MYSQL_SYSVAR_ULONG(max_dirty_pages_pct, srv_max_buf_pool_modified_pct,
- PLUGIN_VAR_RQCMDARG,
- "Percentage of dirty pages allowed in bufferpool.",
- NULL, NULL, (unsigned long)75, (unsigned long)0, (unsigned long)99,
- (unsigned long)0);
-
-static MYSQL_SYSVAR_ULONG(max_purge_lag, srv_max_purge_lag,
- PLUGIN_VAR_RQCMDARG,
- "Desired maximum length of the purge queue (0 = no limit)",
- NULL, NULL, (unsigned long)0, (unsigned long)0, (unsigned long)~0L,
- (unsigned long)0);
-
-static MYSQL_SYSVAR_BOOL(rollback_on_timeout, innobase_rollback_on_timeout,
- PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
- "Roll back the complete transaction on lock wait timeout, for 4.x compatibility (disabled by default)",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_BOOL(status_file, innobase_create_status_file,
- PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_NOSYSVAR,
- "Enable SHOW INNODB STATUS output in the innodb_status.<pid> file",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_BOOL(stats_on_metadata, innobase_stats_on_metadata,
- PLUGIN_VAR_OPCMDARG,
- "Enable statistics gathering for metadata commands such as SHOW TABLE STATUS (on by default)",
- NULL, NULL, TRUE);
-
-static MYSQL_SYSVAR_BOOL(use_legacy_cardinality_algorithm,
- srv_use_legacy_cardinality_algorithm,
- PLUGIN_VAR_OPCMDARG,
- "Use legacy algorithm for picking random pages during index cardinality "
- "estimation. Disable this to use a better algorithm, but note that your "
- "query plans may change (enabled by default).",
- NULL, NULL, TRUE);
-
-static MYSQL_SYSVAR_BOOL(adaptive_hash_index, innobase_adaptive_hash_index,
- PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
- "Enable InnoDB adaptive hash index (enabled by default). "
- "Disable with --skip-innodb-adaptive-hash-index.",
- NULL, NULL, TRUE);
-
-static MYSQL_SYSVAR_LONG(additional_mem_pool_size, innobase_additional_mem_pool_size,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Size of a memory pool InnoDB uses to store data dictionary information and other internal data structures.",
- NULL, NULL, (long)8*1024*1024L, (long)2*1024*1024L, LONG_MAX, (long)1024);
-
-static MYSQL_SYSVAR_ULONG(autoextend_increment, srv_auto_extend_increment,
- PLUGIN_VAR_RQCMDARG,
- "Data file autoextend increment in megabytes",
- NULL, NULL, (unsigned long)64L, (unsigned long)1L, (unsigned long)1000L,
- (unsigned long)0);
-
-static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.",
- NULL, NULL, (long long)1024*1024*1024L, (long long)64*1024*1024L,
- LONGLONG_MAX, (long long)1024*1024L);
-
-static MYSQL_SYSVAR_ULONG(commit_concurrency, srv_commit_concurrency,
- PLUGIN_VAR_RQCMDARG,
- "Helps in performance tuning in heavily concurrent environments.",
- NULL, NULL, (unsigned long)0, (unsigned long)0, (unsigned long)1000,
- (unsigned long)0);
-
-static MYSQL_SYSVAR_ULONG(concurrency_tickets, srv_n_free_tickets_to_enter,
- PLUGIN_VAR_RQCMDARG,
- "Number of times a thread is allowed to enter InnoDB within the same SQL query after it has once got the ticket",
- NULL, NULL, (unsigned long)500L, (unsigned long)1L, (unsigned long)~0L,
- (unsigned long)0);
-
-static MYSQL_SYSVAR_LONG(write_io_threads, innobase_write_io_threads,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Number of write I/O threads in InnoDB.",
- NULL, NULL, (long)8, (long)1, (long)64, (long)0);
-
-static MYSQL_SYSVAR_LONG(read_io_threads, innobase_read_io_threads,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Number of read I/O threads in InnoDB.",
- NULL, NULL, (long)8, (long)1, (long)64, (long)0);
-
-static MYSQL_SYSVAR_LONG(max_merged_io, innobase_max_merged_io,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Max number of adjacent IO requests to merge in InnoDB.",
- NULL, NULL, (long)64, (long)1, (long)64, (long)0);
-
-static MYSQL_SYSVAR_LONG(force_recovery, innobase_force_recovery,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Helps to save your data in case the disk image of the database becomes corrupt.",
- NULL, NULL, (long)0, (long)0, (long)6, (long)0);
-
-static MYSQL_SYSVAR_LONG(lock_wait_timeout, innobase_lock_wait_timeout,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back.",
- NULL, NULL, (long)50, (long)1, (long)(1024*1024*1024), (long)0);
-
-static MYSQL_SYSVAR_LONG(log_buffer_size, innobase_log_buffer_size,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "The size of the buffer which InnoDB uses to write log to the log files on disk.",
- NULL, NULL, (long)16*1024*1024L, (long)2*1024*1024L, LONG_MAX, (long)1024);
-
-static MYSQL_SYSVAR_LONGLONG(log_file_size, innobase_log_file_size,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Size of each log file in a log group.",
- NULL, NULL, (long long)128*1024*1024L, (long long)32*1024*1024L,
- LONGLONG_MAX, (long long)1024*1024L);
-
-static MYSQL_SYSVAR_LONG(log_files_in_group, innobase_log_files_in_group,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Number of log files in the log group. InnoDB writes to the files in a circular fashion. Value 3 is recommended here.",
- NULL, NULL, (long)3, (long)2, (long)100, (long)0);
-
-static MYSQL_SYSVAR_LONG(mirrored_log_groups, innobase_mirrored_log_groups,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Number of identical copies of log groups we keep for the database. Currently this should be set to 1.",
- NULL, NULL, (long)1, (long)1, (long)10, (long)0);
-
-static MYSQL_SYSVAR_LONG(open_files, innobase_open_files,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "How many files at the maximum InnoDB keeps open at the same time.",
- NULL, NULL, (long)300L, (long)10L, LONG_MAX, (long)0L);
-
-static MYSQL_SYSVAR_ULONG(sync_spin_loops, srv_n_spin_wait_rounds,
- PLUGIN_VAR_RQCMDARG,
- "Count of spin-loop rounds in InnoDB mutexes",
- NULL, NULL, (unsigned long)20L, (unsigned long)0L, (unsigned long)~0L,
- (unsigned long)0L);
-
-static MYSQL_SYSVAR_BOOL(thread_concurrency_timer_based,
- innobase_thread_concurrency_timer_based,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Use InnoDB timer based concurrency throttling. ",
- NULL, NULL, TRUE);
-
-static MYSQL_SYSVAR_ULONG(thread_concurrency, srv_thread_concurrency,
- PLUGIN_VAR_RQCMDARG,
- "Helps in performance tuning in heavily concurrent environments. Sets the maximum number of threads allowed inside InnoDB. Value 0 will disable the thread throttling.",
- NULL, NULL, (unsigned long)0, (unsigned long)0, (unsigned long)1000,
- (unsigned long)0);
-
-static MYSQL_SYSVAR_ULONG(thread_sleep_delay, srv_thread_sleep_delay,
- PLUGIN_VAR_RQCMDARG,
- "Time of innodb thread sleeping before joining InnoDB queue (usec). Value 0 disable a sleep",
- NULL, NULL, (unsigned long)10000L, (unsigned long)0L, (unsigned long)~0L,
- (unsigned long)0);
-
-static MYSQL_SYSVAR_STR(data_file_path, innobase_data_file_path,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Path to individual files and their sizes.",
- NULL, NULL, NULL);
-
-static MYSQL_SYSVAR_LONG(autoinc_lock_mode, innobase_autoinc_lock_mode,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "The AUTOINC lock modes supported by InnoDB: "
- "0 => Old style AUTOINC locking (for backward"
- " compatibility) "
- "1 => New style AUTOINC locking "
- "2 => No AUTOINC locking (unsafe for SBR)",
- NULL, NULL,
- AUTOINC_NEW_STYLE_LOCKING, /* Default setting */
- AUTOINC_OLD_STYLE_LOCKING, /* Minimum value */
- AUTOINC_NO_LOCKING, (long)0); /* Maximum value */
-
-static struct st_mysql_sys_var* innobase_system_variables[]= {
- MYSQL_SYSVAR(additional_mem_pool_size),
- MYSQL_SYSVAR(autoextend_increment),
- MYSQL_SYSVAR(buffer_pool_size),
- MYSQL_SYSVAR(checksums),
- MYSQL_SYSVAR(commit_concurrency),
- MYSQL_SYSVAR(concurrency_tickets),
- MYSQL_SYSVAR(data_file_path),
- MYSQL_SYSVAR(data_home_dir),
- MYSQL_SYSVAR(doublewrite),
- MYSQL_SYSVAR(fast_shutdown),
- MYSQL_SYSVAR(read_io_threads),
- MYSQL_SYSVAR(write_io_threads),
- MYSQL_SYSVAR(max_merged_io),
- MYSQL_SYSVAR(thread_concurrency_timer_based),
- MYSQL_SYSVAR(file_per_table),
- MYSQL_SYSVAR(flush_log_at_trx_commit),
- MYSQL_SYSVAR(flush_method),
- MYSQL_SYSVAR(force_recovery),
- MYSQL_SYSVAR(locks_unsafe_for_binlog),
- MYSQL_SYSVAR(lock_wait_timeout),
-#ifdef UNIV_LOG_ARCHIVE
- MYSQL_SYSVAR(log_arch_dir),
- MYSQL_SYSVAR(log_archive),
-#endif /* UNIV_LOG_ARCHIVE */
- MYSQL_SYSVAR(log_buffer_size),
- MYSQL_SYSVAR(log_file_size),
- MYSQL_SYSVAR(log_files_in_group),
- MYSQL_SYSVAR(log_group_home_dir),
- MYSQL_SYSVAR(max_dirty_pages_pct),
- MYSQL_SYSVAR(max_purge_lag),
- MYSQL_SYSVAR(mirrored_log_groups),
- MYSQL_SYSVAR(open_files),
- MYSQL_SYSVAR(rollback_on_timeout),
- MYSQL_SYSVAR(stats_on_metadata),
- MYSQL_SYSVAR(use_legacy_cardinality_algorithm),
- MYSQL_SYSVAR(adaptive_hash_index),
- MYSQL_SYSVAR(status_file),
- MYSQL_SYSVAR(support_xa),
- MYSQL_SYSVAR(sync_spin_loops),
- MYSQL_SYSVAR(table_locks),
- MYSQL_SYSVAR(thread_concurrency),
- MYSQL_SYSVAR(thread_sleep_delay),
- MYSQL_SYSVAR(autoinc_lock_mode),
- MYSQL_SYSVAR(extra_dirty_writes),
- MYSQL_SYSVAR(io_capacity),
- NULL
-};
-
-mysql_declare_plugin(innobase)
-{
- MYSQL_STORAGE_ENGINE_PLUGIN,
- &innobase_storage_engine,
- innobase_hton_name,
- "Innobase OY",
- "Supports transactions, row-level locking, and foreign keys",
- PLUGIN_LICENSE_GPL,
- innobase_init, /* Plugin Init */
- NULL, /* Plugin Deinit */
- 0x0100 /* 1.0 */,
- innodb_status_variables_export,/* status variables */
- innobase_system_variables, /* system variables */
- NULL /* reserved */
-}
-mysql_declare_plugin_end;
diff --git a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h
deleted file mode 100644
index 8ca72ee1a60..00000000000
--- a/storage/innobase/handler/ha_innodb.h
+++ /dev/null
@@ -1,255 +0,0 @@
-/* Copyright (C) 2000-2005 MySQL AB && Innobase Oy
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; version 2 of the License.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
-
-/*
- This file is based on ha_berkeley.h of MySQL distribution
-
- This file defines the Innodb handler: the interface between MySQL and
- Innodb
-*/
-
-#ifdef USE_PRAGMA_INTERFACE
-#pragma interface /* gcc class implementation */
-#endif
-
-typedef struct st_innobase_share {
- THR_LOCK lock;
- pthread_mutex_t mutex;
- char *table_name;
- uint table_name_length,use_count;
-} INNOBASE_SHARE;
-
-
-struct dict_index_struct;
-struct row_prebuilt_struct;
-
-typedef struct dict_index_struct dict_index_t;
-typedef struct row_prebuilt_struct row_prebuilt_t;
-
-/* The class defining a handle to an Innodb table */
-class ha_innobase: public handler
-{
- row_prebuilt_t* prebuilt; /* prebuilt struct in InnoDB, used
- to save CPU time with prebuilt data
- structures*/
- THD* user_thd; /* the thread handle of the user
- currently using the handle; this is
- set in external_lock function */
- THR_LOCK_DATA lock;
- INNOBASE_SHARE *share;
-
- uchar* upd_buff; /* buffer used in updates */
- uchar* key_val_buff; /* buffer used in converting
- search key values from MySQL format
- to Innodb format */
- ulong upd_and_key_val_buff_len;
- /* the length of each of the previous
- two buffers */
- Table_flags int_table_flags;
- uint primary_key;
- ulong start_of_scan; /* this is set to 1 when we are
- starting a table scan but have not
- yet fetched any row, else 0 */
- uint last_match_mode;/* match mode of the latest search:
- ROW_SEL_EXACT, ROW_SEL_EXACT_PREFIX,
- or undefined */
- uint num_write_row; /* number of write_row() calls */
-
- uint store_key_val_for_row(uint keynr, char* buff, uint buff_len,
- const uchar* record);
- int update_thd(THD* thd);
- int change_active_index(uint keynr);
- int general_fetch(uchar* buf, uint direction, uint match_mode);
- ulong innobase_lock_autoinc();
- ulonglong innobase_peek_autoinc();
- ulong innobase_set_max_autoinc(ulonglong auto_inc);
- ulong innobase_reset_autoinc(ulonglong auto_inc);
- ulong innobase_get_autoinc(ulonglong* value);
- ulong innobase_update_autoinc(ulonglong auto_inc);
- ulong innobase_initialize_autoinc();
- dict_index_t* innobase_get_index(uint keynr);
- ulonglong innobase_get_int_col_max_value(const Field* field);
-
- /* Init values for the class: */
- public:
- ha_innobase(handlerton *hton, TABLE_SHARE *table_arg);
- ~ha_innobase() {}
- /*
- Get the row type from the storage engine. If this method returns
- ROW_TYPE_NOT_USED, the information in HA_CREATE_INFO should be used.
- */
- enum row_type get_row_type() const;
-
- const char* table_type() const { return("InnoDB");}
- const char *index_type(uint key_number) { return "BTREE"; }
- const char** bas_ext() const;
- Table_flags table_flags() const;
- ulong index_flags(uint idx, uint part, bool all_parts) const
- {
- return (HA_READ_NEXT |
- HA_READ_PREV |
- HA_READ_ORDER |
- HA_READ_RANGE |
- HA_KEYREAD_ONLY);
- }
- uint max_supported_keys() const { return MAX_KEY; }
- /* An InnoDB page must store >= 2 keys;
- a secondary key record must also contain the
- primary key value:
- max key length is therefore set to slightly
- less than 1 / 4 of page size which is 16 kB;
- but currently MySQL does not work with keys
- whose size is > MAX_KEY_LENGTH */
- uint max_supported_key_length() const { return 3500; }
- uint max_supported_key_part_length() const;
- const key_map *keys_to_use_for_scanning() { return &key_map_full; }
-
- int open(const char *name, int mode, uint test_if_locked);
- int close(void);
- double scan_time();
- double read_time(uint index, uint ranges, ha_rows rows);
-
- int write_row(uchar * buf);
- int update_row(const uchar * old_data, uchar * new_data);
- int delete_row(const uchar * buf);
- bool was_semi_consistent_read();
- void try_semi_consistent_read(bool yes);
- void unlock_row();
-
- int index_init(uint index, bool sorted);
- int index_end();
- int index_read(uchar * buf, const uchar * key,
- uint key_len, enum ha_rkey_function find_flag);
- int index_read_idx(uchar * buf, uint index, const uchar * key,
- uint key_len, enum ha_rkey_function find_flag);
- int index_read_last(uchar * buf, const uchar * key, uint key_len);
- int index_next(uchar * buf);
- int index_next_same(uchar * buf, const uchar *key, uint keylen);
- int index_prev(uchar * buf);
- int index_first(uchar * buf);
- int index_last(uchar * buf);
-
- int rnd_init(bool scan);
- int rnd_end();
- int rnd_next(uchar *buf);
- int rnd_pos(uchar * buf, uchar *pos);
-
- void position(const uchar *record);
- int info(uint);
- int analyze(THD* thd,HA_CHECK_OPT* check_opt);
- int optimize(THD* thd,HA_CHECK_OPT* check_opt);
- int discard_or_import_tablespace(my_bool discard);
- int extra(enum ha_extra_function operation);
- int reset();
- int external_lock(THD *thd, int lock_type);
- int transactional_table_lock(THD *thd, int lock_type);
- int start_stmt(THD *thd, thr_lock_type lock_type);
- void position(uchar *record);
- ha_rows records_in_range(uint inx, key_range *min_key, key_range
- *max_key);
- ha_rows estimate_rows_upper_bound();
-
- void update_create_info(HA_CREATE_INFO* create_info);
- int create(const char *name, register TABLE *form,
- HA_CREATE_INFO *create_info);
- int delete_all_rows();
- int delete_table(const char *name);
- int rename_table(const char* from, const char* to);
- int check(THD* thd, HA_CHECK_OPT* check_opt);
- char* update_table_comment(const char* comment);
- char* get_foreign_key_create_info();
- int get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list);
- bool can_switch_engines();
- uint referenced_by_foreign_key();
- void free_foreign_key_create_info(char* str);
- THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to,
- enum thr_lock_type lock_type);
- void init_table_handle_for_HANDLER();
- virtual void get_auto_increment(ulonglong offset, ulonglong increment,
- ulonglong nb_desired_values,
- ulonglong *first_value,
- ulonglong *nb_reserved_values);
- int reset_auto_increment(ulonglong value);
-
- virtual bool get_error_message(int error, String *buf);
-
- uint8 table_cache_type() { return HA_CACHE_TBL_ASKTRANSACT; }
- /*
- ask handler about permission to cache table during query registration
- */
- my_bool register_query_cache_table(THD *thd, char *table_key,
- uint key_length,
- qc_engine_callback *call_back,
- ulonglong *engine_data);
- static char *get_mysql_bin_log_name();
- static ulonglong get_mysql_bin_log_pos();
- bool primary_key_is_clustered() { return true; }
- int cmp_ref(const uchar *ref1, const uchar *ref2);
- bool check_if_incompatible_data(HA_CREATE_INFO *info,
- uint table_changes);
-};
-
-/* Some accessor functions which the InnoDB plugin needs, but which
-can not be added to mysql/plugin.h as part of the public interface;
-the definitions are bracketed with #ifdef INNODB_COMPATIBILITY_HOOKS */
-
-#ifndef INNODB_COMPATIBILITY_HOOKS
-#error InnoDB needs MySQL to be built with #define INNODB_COMPATIBILITY_HOOKS
-#endif
-
-extern "C" {
-struct charset_info_st *thd_charset(MYSQL_THD thd);
-char **thd_query(MYSQL_THD thd);
-
-/** Get the file name of the MySQL binlog.
- * @return the name of the binlog file
- */
-const char* mysql_bin_log_file_name(void);
-
-/** Get the current position of the MySQL binlog.
- * @return byte offset from the beginning of the binlog
- */
-ulonglong mysql_bin_log_file_pos(void);
-
-/**
- Check if a user thread is a replication slave thread
- @param thd user thread
- @retval 0 the user thread is not a replication slave thread
- @retval 1 the user thread is a replication slave thread
-*/
-int thd_slave_thread(const MYSQL_THD thd);
-
-/**
- Check if a user thread is running a non-transactional update
- @param thd user thread
- @retval 0 the user thread is not running a non-transactional update
- @retval 1 the user thread is running a non-transactional update
-*/
-int thd_non_transactional_update(const MYSQL_THD thd);
-
-/**
- Get the user thread's binary logging format
- @param thd user thread
- @return Value to be used as index into the binlog_format_names array
-*/
-int thd_binlog_format(const MYSQL_THD thd);
-
-/**
- Mark transaction to rollback and mark error as fatal to a sub-statement.
- @param thd Thread handle
- @param all TRUE <=> rollback main transaction.
-*/
-void thd_mark_transaction_to_rollback(MYSQL_THD thd, bool all);
-}
diff --git a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c
deleted file mode 100644
index d54a3378993..00000000000
--- a/storage/innobase/ibuf/ibuf0ibuf.c
+++ /dev/null
@@ -1,3580 +0,0 @@
-/******************************************************
-Insert buffer
-
-(c) 1997 Innobase Oy
-
-Created 7/19/1997 Heikki Tuuri
-*******************************************************/
-
-#include "ibuf0ibuf.h"
-
-#ifdef UNIV_NONINL
-#include "ibuf0ibuf.ic"
-#endif
-
-#include "buf0buf.h"
-#include "buf0rea.h"
-#include "fsp0fsp.h"
-#include "trx0sys.h"
-#include "fil0fil.h"
-#include "thr0loc.h"
-#include "rem0rec.h"
-#include "btr0cur.h"
-#include "btr0pcur.h"
-#include "btr0btr.h"
-#include "sync0sync.h"
-#include "dict0boot.h"
-#include "fut0lst.h"
-#include "lock0lock.h"
-#include "log0recv.h"
-#include "que0que.h"
-
-/* STRUCTURE OF AN INSERT BUFFER RECORD
-
-In versions < 4.1.x:
-
-1. The first field is the page number.
-2. The second field is an array which stores type info for each subsequent
- field. We store the information which affects the ordering of records, and
- also the physical storage size of an SQL NULL value. E.g., for CHAR(10) it
- is 10 bytes.
-3. Next we have the fields of the actual index record.
-
-In versions >= 4.1.x:
-
-Note that contary to what we planned in the 1990's, there will only be one
-insert buffer tree, and that is in the system tablespace of InnoDB.
-
-1. The first field is the space id.
-2. The second field is a one-byte marker (0) which differentiates records from
- the < 4.1.x storage format.
-3. The third field is the page number.
-4. The fourth field contains the type info, where we have also added 2 bytes to
- store the charset. In the compressed table format of 5.0.x we must add more
- information here so that we can build a dummy 'index' struct which 5.0.x
- can use in the binary search on the index page in the ibuf merge phase.
-5. The rest of the fields contain the fields of the actual index record.
-
-In versions >= 5.0.3:
-
-The first byte of the fourth field is an additional marker (0) if the record
-is in the compact format. The presence of this marker can be detected by
-looking at the length of the field modulo DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE.
-
-The high-order bit of the character set field in the type info is the
-"nullable" flag for the field. */
-
-
-/* PREVENTING DEADLOCKS IN THE INSERT BUFFER SYSTEM
-
-If an OS thread performs any operation that brings in disk pages from
-non-system tablespaces into the buffer pool, or creates such a page there,
-then the operation may have as a side effect an insert buffer index tree
-compression. Thus, the tree latch of the insert buffer tree may be acquired
-in the x-mode, and also the file space latch of the system tablespace may
-be acquired in the x-mode.
-
-Also, an insert to an index in a non-system tablespace can have the same
-effect. How do we know this cannot lead to a deadlock of OS threads? There
-is a problem with the i\o-handler threads: they break the latching order
-because they own x-latches to pages which are on a lower level than the
-insert buffer tree latch, its page latches, and the tablespace latch an
-insert buffer operation can reserve.
-
-The solution is the following: Let all the tree and page latches connected
-with the insert buffer be later in the latching order than the fsp latch and
-fsp page latches.
-
-Insert buffer pages must be such that the insert buffer is never invoked
-when these pages are accessed as this would result in a recursion violating
-the latching order. We let a special i/o-handler thread take care of i/o to
-the insert buffer pages and the ibuf bitmap pages, as well as the fsp bitmap
-pages and the first inode page, which contains the inode of the ibuf tree: let
-us call all these ibuf pages. To prevent deadlocks, we do not let a read-ahead
-access both non-ibuf and ibuf pages.
-
-Then an i/o-handler for the insert buffer never needs to access recursively the
-insert buffer tree and thus obeys the latching order. On the other hand, other
-i/o-handlers for other tablespaces may require access to the insert buffer,
-but because all kinds of latches they need to access there are later in the
-latching order, no violation of the latching order occurs in this case,
-either.
-
-A problem is how to grow and contract an insert buffer tree. As it is later
-in the latching order than the fsp management, we have to reserve the fsp
-latch first, before adding or removing pages from the insert buffer tree.
-We let the insert buffer tree have its own file space management: a free
-list of pages linked to the tree root. To prevent recursive using of the
-insert buffer when adding pages to the tree, we must first load these pages
-to memory, obtaining a latch on them, and only after that add them to the
-free list of the insert buffer tree. More difficult is removing of pages
-from the free list. If there is an excess of pages in the free list of the
-ibuf tree, they might be needed if some thread reserves the fsp latch,
-intending to allocate more file space. So we do the following: if a thread
-reserves the fsp latch, we check the writer count field of the latch. If
-this field has value 1, it means that the thread did not own the latch
-before entering the fsp system, and the mtr of the thread contains no
-modifications to the fsp pages. Now we are free to reserve the ibuf latch,
-and check if there is an excess of pages in the free list. We can then, in a
-separate mini-transaction, take them out of the free list and free them to
-the fsp system.
-
-To avoid deadlocks in the ibuf system, we divide file pages into three levels:
-
-(1) non-ibuf pages,
-(2) ibuf tree pages and the pages in the ibuf tree free list, and
-(3) ibuf bitmap pages.
-
-No OS thread is allowed to access higher level pages if it has latches to
-lower level pages; even if the thread owns a B-tree latch it must not access
-the B-tree non-leaf pages if it has latches on lower level pages. Read-ahead
-is only allowed for level 1 and 2 pages. Dedicated i/o-handler threads handle
-exclusively level 1 i/o. A dedicated i/o handler thread handles exclusively
-level 2 i/o. However, if an OS thread does the i/o handling for itself, i.e.,
-it uses synchronous aio, it can access any pages, as long as it obeys the
-access order rules. */
-
-/* Buffer pool size per the maximum insert buffer size */
-#define IBUF_POOL_SIZE_PER_MAX_SIZE 2
-
-/* The insert buffer control structure */
-ibuf_t* ibuf = NULL;
-
-static ulint ibuf_rnd = 986058871;
-
-ulint ibuf_flush_count = 0;
-
-#ifdef UNIV_IBUF_DEBUG
-/* Dimensions for the ibuf_count array */
-#define IBUF_COUNT_N_SPACES 500
-#define IBUF_COUNT_N_PAGES 2000
-
-/* Buffered entry counts for file pages, used in debugging */
-static ulint ibuf_counts[IBUF_COUNT_N_SPACES][IBUF_COUNT_N_PAGES];
-
-/**********************************************************************
-Checks that the indexes to ibuf_counts[][] are within limits. */
-UNIV_INLINE
-void
-ibuf_count_check(
-/*=============*/
- ulint space_id, /* in: space identifier */
- ulint page_no) /* in: page number */
-{
- if (space_id < IBUF_COUNT_N_SPACES && page_no < IBUF_COUNT_N_PAGES) {
- return;
- }
-
- fprintf(stderr,
- "InnoDB: UNIV_IBUF_DEBUG limits space_id and page_no\n"
- "InnoDB: and breaks crash recovery.\n"
- "InnoDB: space_id=%lu, should be 0<=space_id<%lu\n"
- "InnoDB: page_no=%lu, should be 0<=page_no<%lu\n",
- (ulint) space_id, (ulint) IBUF_COUNT_N_SPACES,
- (ulint) page_no, (ulint) IBUF_COUNT_N_PAGES);
- ut_error;
-}
-#endif
-
-/* The start address for an insert buffer bitmap page bitmap */
-#define IBUF_BITMAP PAGE_DATA
-
-/* Offsets in bits for the bits describing a single page in the bitmap */
-#define IBUF_BITMAP_FREE 0
-#define IBUF_BITMAP_BUFFERED 2
-#define IBUF_BITMAP_IBUF 3 /* TRUE if page is a part of the ibuf
- tree, excluding the root page, or is
- in the free list of the ibuf */
-
-/* Number of bits describing a single page */
-#define IBUF_BITS_PER_PAGE 4
-#if IBUF_BITS_PER_PAGE % 2
-# error "IBUF_BITS_PER_PAGE must be an even number!"
-#endif
-
-/* The mutex used to block pessimistic inserts to ibuf trees */
-static mutex_t ibuf_pessimistic_insert_mutex;
-
-/* The mutex protecting the insert buffer structs */
-static mutex_t ibuf_mutex;
-
-/* The mutex protecting the insert buffer bitmaps */
-static mutex_t ibuf_bitmap_mutex;
-
-/* The area in pages from which contract looks for page numbers for merge */
-#define IBUF_MERGE_AREA 8
-
-/* Inside the merge area, pages which have at most 1 per this number less
-buffered entries compared to maximum volume that can buffered for a single
-page are merged along with the page whose buffer became full */
-#define IBUF_MERGE_THRESHOLD 4
-
-/* In ibuf_contract at most this number of pages is read to memory in one
-batch, in order to merge the entries for them in the insert buffer */
-#define IBUF_MAX_N_PAGES_MERGED IBUF_MERGE_AREA
-
-/* If the combined size of the ibuf trees exceeds ibuf->max_size by this
-many pages, we start to contract it in connection to inserts there, using
-non-synchronous contract */
-#define IBUF_CONTRACT_ON_INSERT_NON_SYNC 0
-
-/* Same as above, but use synchronous contract */
-#define IBUF_CONTRACT_ON_INSERT_SYNC 5
-
-/* Same as above, but no insert is done, only contract is called */
-#define IBUF_CONTRACT_DO_NOT_INSERT 10
-
-/* TODO: how to cope with drop table if there are records in the insert
-buffer for the indexes of the table? Is there actually any problem,
-because ibuf merge is done to a page when it is read in, and it is
-still physically like the index page even if the index would have been
-dropped! So, there seems to be no problem. */
-
-/**********************************************************************
-Validates the ibuf data structures when the caller owns ibuf_mutex. */
-
-ibool
-ibuf_validate_low(void);
-/*===================*/
- /* out: TRUE if ok */
-
-/**********************************************************************
-Sets the flag in the current OS thread local storage denoting that it is
-inside an insert buffer routine. */
-UNIV_INLINE
-void
-ibuf_enter(void)
-/*============*/
-{
- ibool* ptr;
-
- ptr = thr_local_get_in_ibuf_field();
-
- ut_ad(*ptr == FALSE);
-
- *ptr = TRUE;
-}
-
-/**********************************************************************
-Sets the flag in the current OS thread local storage denoting that it is
-exiting an insert buffer routine. */
-UNIV_INLINE
-void
-ibuf_exit(void)
-/*===========*/
-{
- ibool* ptr;
-
- ptr = thr_local_get_in_ibuf_field();
-
- ut_ad(*ptr == TRUE);
-
- *ptr = FALSE;
-}
-
-/**********************************************************************
-Returns TRUE if the current OS thread is performing an insert buffer
-routine. */
-
-ibool
-ibuf_inside(void)
-/*=============*/
- /* out: TRUE if inside an insert buffer routine: for instance,
- a read-ahead of non-ibuf pages is then forbidden */
-{
- return(*thr_local_get_in_ibuf_field());
-}
-
-/**********************************************************************
-Gets the ibuf header page and x-latches it. */
-static
-page_t*
-ibuf_header_page_get(
-/*=================*/
- /* out: insert buffer header page */
- ulint space, /* in: space id */
- mtr_t* mtr) /* in: mtr */
-{
- page_t* page;
-
- ut_a(space == 0);
-
- ut_ad(!ibuf_inside());
-
- page = buf_page_get(space, FSP_IBUF_HEADER_PAGE_NO, RW_X_LATCH, mtr);
-
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_IBUF_HEADER);
-#endif /* UNIV_SYNC_DEBUG */
-
- return(page);
-}
-
-/**********************************************************************
-Gets the root page and x-latches it. */
-static
-page_t*
-ibuf_tree_root_get(
-/*===============*/
- /* out: insert buffer tree root page */
- ibuf_data_t* data, /* in: ibuf data */
- ulint space, /* in: space id */
- mtr_t* mtr) /* in: mtr */
-{
- page_t* page;
-
- ut_a(space == 0);
- ut_ad(ibuf_inside());
-
- mtr_x_lock(dict_index_get_lock(data->index), mtr);
-
- page = buf_page_get(space, FSP_IBUF_TREE_ROOT_PAGE_NO, RW_X_LATCH,
- mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_TREE_NODE);
-#endif /* UNIV_SYNC_DEBUG */
-
- return(page);
-}
-
-#ifdef UNIV_IBUF_DEBUG
-/**********************************************************************
-Gets the ibuf count for a given page. */
-
-ulint
-ibuf_count_get(
-/*===========*/
- /* out: number of entries in the insert buffer
- currently buffered for this page */
- ulint space, /* in: space id */
- ulint page_no)/* in: page number */
-{
- ibuf_count_check(space, page_no);
-
- return(ibuf_counts[space][page_no]);
-}
-
-/**********************************************************************
-Sets the ibuf count for a given page. */
-static
-void
-ibuf_count_set(
-/*===========*/
- ulint space, /* in: space id */
- ulint page_no,/* in: page number */
- ulint val) /* in: value to set */
-{
- ibuf_count_check(space, page_no);
- ut_a(val < UNIV_PAGE_SIZE);
-
- ibuf_counts[space][page_no] = val;
-}
-#endif
-
-/**********************************************************************
-Creates the insert buffer data structure at a database startup and initializes
-the data structures for the insert buffer. */
-
-void
-ibuf_init_at_db_start(void)
-/*=======================*/
-{
- ibuf = mem_alloc(sizeof(ibuf_t));
-
- /* Note that also a pessimistic delete can sometimes make a B-tree
- grow in size, as the references on the upper levels of the tree can
- change */
-
- ibuf->max_size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE
- / IBUF_POOL_SIZE_PER_MAX_SIZE;
-
- UT_LIST_INIT(ibuf->data_list);
-
- ibuf->size = 0;
-
- mutex_create(&ibuf_pessimistic_insert_mutex,
- SYNC_IBUF_PESS_INSERT_MUTEX);
-
- mutex_create(&ibuf_mutex, SYNC_IBUF_MUTEX);
-
- mutex_create(&ibuf_bitmap_mutex, SYNC_IBUF_BITMAP_MUTEX);
-
- fil_ibuf_init_at_db_start();
-}
-
-/**********************************************************************
-Updates the size information in an ibuf data, assuming the segment size has
-not changed. */
-static
-void
-ibuf_data_sizes_update(
-/*===================*/
- ibuf_data_t* data, /* in: ibuf data struct */
- page_t* root, /* in: ibuf tree root */
- mtr_t* mtr) /* in: mtr */
-{
- ulint old_size;
-
- ut_ad(mutex_own(&ibuf_mutex));
-
- old_size = data->size;
-
- data->free_list_len = flst_get_len(root + PAGE_HEADER
- + PAGE_BTR_IBUF_FREE_LIST, mtr);
-
- data->height = 1 + btr_page_get_level(root, mtr);
-
- data->size = data->seg_size - (1 + data->free_list_len);
- /* the '1 +' is the ibuf header page */
- ut_ad(data->size < data->seg_size);
-
- if (page_get_n_recs(root) == 0) {
-
- data->empty = TRUE;
- } else {
- data->empty = FALSE;
- }
-
- ut_ad(ibuf->size + data->size >= old_size);
-
- ibuf->size = ibuf->size + data->size - old_size;
-
-#if 0
- fprintf(stderr, "ibuf size %lu, space ibuf size %lu\n",
- ibuf->size, data->size);
-#endif
-}
-
-/**********************************************************************
-Creates the insert buffer data struct for a single tablespace. Reads the
-root page of the insert buffer tree in the tablespace. This function can
-be called only after the dictionary system has been initialized, as this
-creates also the insert buffer table and index into this tablespace. */
-
-ibuf_data_t*
-ibuf_data_init_for_space(
-/*=====================*/
- /* out, own: ibuf data struct, linked to the list
- in ibuf control structure */
- ulint space) /* in: space id */
-{
- ibuf_data_t* data;
- page_t* root;
- page_t* header_page;
- mtr_t mtr;
- char* buf;
- mem_heap_t* heap;
- dict_table_t* table;
- dict_index_t* index;
- ulint n_used;
-
- ut_a(space == 0);
-
- data = mem_alloc(sizeof(ibuf_data_t));
-
- data->space = space;
-
- mtr_start(&mtr);
-
- mutex_enter(&ibuf_mutex);
-
- mtr_x_lock(fil_space_get_latch(space), &mtr);
-
- header_page = ibuf_header_page_get(space, &mtr);
-
- fseg_n_reserved_pages(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER,
- &n_used, &mtr);
- ibuf_enter();
-
- ut_ad(n_used >= 2);
-
- data->seg_size = n_used;
-
- root = buf_page_get(space, FSP_IBUF_TREE_ROOT_PAGE_NO, RW_X_LATCH,
- &mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(root, SYNC_TREE_NODE);
-#endif /* UNIV_SYNC_DEBUG */
-
- data->size = 0;
- data->n_inserts = 0;
- data->n_merges = 0;
- data->n_merged_recs = 0;
-
- ibuf_data_sizes_update(data, root, &mtr);
- /*
- if (!data->empty) {
- fprintf(stderr,
- "InnoDB: index entries found in the insert buffer\n");
- } else {
- fprintf(stderr,
- "InnoDB: insert buffer empty\n");
- }
- */
- mutex_exit(&ibuf_mutex);
-
- mtr_commit(&mtr);
-
- ibuf_exit();
-
- heap = mem_heap_create(450);
- buf = mem_heap_alloc(heap, 50);
-
- sprintf(buf, "SYS_IBUF_TABLE_%lu", (ulong) space);
- /* use old-style record format for the insert buffer */
- table = dict_mem_table_create(buf, space, 2, 0);
-
- dict_mem_table_add_col(table, heap, "PAGE_NO", DATA_BINARY, 0, 0);
- dict_mem_table_add_col(table, heap, "TYPES", DATA_BINARY, 0, 0);
-
- table->id = ut_dulint_add(DICT_IBUF_ID_MIN, space);
-
- dict_table_add_to_cache(table, heap);
- mem_heap_free(heap);
-
- index = dict_mem_index_create(
- buf, "CLUST_IND", space,
- DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF, 2);
-
- dict_mem_index_add_field(index, "PAGE_NO", 0);
- dict_mem_index_add_field(index, "TYPES", 0);
-
- index->id = ut_dulint_add(DICT_IBUF_ID_MIN, space);
-
- dict_index_add_to_cache(table, index, FSP_IBUF_TREE_ROOT_PAGE_NO);
-
- data->index = dict_table_get_first_index(table);
-
- mutex_enter(&ibuf_mutex);
-
- UT_LIST_ADD_LAST(data_list, ibuf->data_list, data);
-
- mutex_exit(&ibuf_mutex);
-
- return(data);
-}
-
-/*************************************************************************
-Initializes an ibuf bitmap page. */
-
-void
-ibuf_bitmap_page_init(
-/*==================*/
- page_t* page, /* in: bitmap page */
- mtr_t* mtr) /* in: mtr */
-{
- ulint bit_offset;
- ulint byte_offset;
-
- /* Write all zeros to the bitmap */
-
- bit_offset = XDES_DESCRIBED_PER_PAGE * IBUF_BITS_PER_PAGE;
-
- byte_offset = bit_offset / 8 + 1;
- /* better: byte_offset = UT_BITS_IN_BYTES(bit_offset); */
-
- fil_page_set_type(page, FIL_PAGE_IBUF_BITMAP);
-
- memset(page + IBUF_BITMAP, 0, byte_offset);
-
- /* The remaining area (up to the page trailer) is uninitialized. */
-
- mlog_write_initial_log_record(page, MLOG_IBUF_BITMAP_INIT, mtr);
-}
-
-/*************************************************************************
-Parses a redo log record of an ibuf bitmap page init. */
-
-byte*
-ibuf_parse_bitmap_init(
-/*===================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr __attribute__((unused)), /* in: buffer end */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr) /* in: mtr or NULL */
-{
- ut_ad(ptr && end_ptr);
-
- if (page) {
- ibuf_bitmap_page_init(page, mtr);
- }
-
- return(ptr);
-}
-
-/************************************************************************
-Gets the desired bits for a given page from a bitmap page. */
-UNIV_INLINE
-ulint
-ibuf_bitmap_page_get_bits(
-/*======================*/
- /* out: value of bits */
- page_t* page, /* in: bitmap page */
- ulint page_no,/* in: page whose bits to get */
- ulint bit, /* in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ... */
- mtr_t* mtr __attribute__((unused))) /* in: mtr containing an
- x-latch to the bitmap
- page */
-{
- ulint byte_offset;
- ulint bit_offset;
- ulint map_byte;
- ulint value;
-
- ut_ad(bit < IBUF_BITS_PER_PAGE);
-#if IBUF_BITS_PER_PAGE % 2
-# error "IBUF_BITS_PER_PAGE % 2 != 0"
-#endif
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
-
- bit_offset = (page_no % XDES_DESCRIBED_PER_PAGE) * IBUF_BITS_PER_PAGE
- + bit;
-
- byte_offset = bit_offset / 8;
- bit_offset = bit_offset % 8;
-
- ut_ad(byte_offset + IBUF_BITMAP < UNIV_PAGE_SIZE);
-
- map_byte = mach_read_from_1(page + IBUF_BITMAP + byte_offset);
-
- value = ut_bit_get_nth(map_byte, bit_offset);
-
- if (bit == IBUF_BITMAP_FREE) {
- ut_ad(bit_offset + 1 < 8);
-
- value = value * 2 + ut_bit_get_nth(map_byte, bit_offset + 1);
- }
-
- return(value);
-}
-
-/************************************************************************
-Sets the desired bit for a given page in a bitmap page. */
-static
-void
-ibuf_bitmap_page_set_bits(
-/*======================*/
- page_t* page, /* in: bitmap page */
- ulint page_no,/* in: page whose bits to set */
- ulint bit, /* in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ... */
- ulint val, /* in: value to set */
- mtr_t* mtr) /* in: mtr containing an x-latch to the bitmap page */
-{
- ulint byte_offset;
- ulint bit_offset;
- ulint map_byte;
-
- ut_ad(bit < IBUF_BITS_PER_PAGE);
-#if IBUF_BITS_PER_PAGE % 2
-# error "IBUF_BITS_PER_PAGE % 2 != 0"
-#endif
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX));
-#ifdef UNIV_IBUF_DEBUG
- ut_a((bit != IBUF_BITMAP_BUFFERED) || (val != FALSE)
- || (0 == ibuf_count_get(buf_frame_get_space_id(page),
- page_no)));
-#endif
- bit_offset = (page_no % XDES_DESCRIBED_PER_PAGE) * IBUF_BITS_PER_PAGE
- + bit;
-
- byte_offset = bit_offset / 8;
- bit_offset = bit_offset % 8;
-
- ut_ad(byte_offset + IBUF_BITMAP < UNIV_PAGE_SIZE);
-
- map_byte = mach_read_from_1(page + IBUF_BITMAP + byte_offset);
-
- if (bit == IBUF_BITMAP_FREE) {
- ut_ad(bit_offset + 1 < 8);
- ut_ad(val <= 3);
-
- map_byte = ut_bit_set_nth(map_byte, bit_offset, val / 2);
- map_byte = ut_bit_set_nth(map_byte, bit_offset + 1, val % 2);
- } else {
- ut_ad(val <= 1);
- map_byte = ut_bit_set_nth(map_byte, bit_offset, val);
- }
-
- mlog_write_ulint(page + IBUF_BITMAP + byte_offset, map_byte,
- MLOG_1BYTE, mtr);
-}
-
-/************************************************************************
-Calculates the bitmap page number for a given page number. */
-UNIV_INLINE
-ulint
-ibuf_bitmap_page_no_calc(
-/*=====================*/
- /* out: the bitmap page number where
- the file page is mapped */
- ulint page_no) /* in: tablespace page number */
-{
- return(FSP_IBUF_BITMAP_OFFSET
- + XDES_DESCRIBED_PER_PAGE
- * (page_no / XDES_DESCRIBED_PER_PAGE));
-}
-
-/************************************************************************
-Gets the ibuf bitmap page where the bits describing a given file page are
-stored. */
-static
-page_t*
-ibuf_bitmap_get_map_page(
-/*=====================*/
- /* out: bitmap page where the file page is mapped,
- that is, the bitmap page containing the descriptor
- bits for the file page; the bitmap page is
- x-latched */
- ulint space, /* in: space id of the file page */
- ulint page_no,/* in: page number of the file page */
- mtr_t* mtr) /* in: mtr */
-{
- page_t* page;
-
- page = buf_page_get(space, ibuf_bitmap_page_no_calc(page_no),
- RW_X_LATCH, mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_IBUF_BITMAP);
-#endif /* UNIV_SYNC_DEBUG */
-
- return(page);
-}
-
-/****************************************************************************
-Sets the free bits of the page in the ibuf bitmap. This is done in a separate
-mini-transaction, hence this operation does not restrict further work to only
-ibuf bitmap operations, which would result if the latch to the bitmap page
-were kept. */
-UNIV_INLINE
-void
-ibuf_set_free_bits_low(
-/*===================*/
- ulint type, /* in: index type */
- page_t* page, /* in: index page; free bit is set if the index is
- non-clustered and page level is 0 */
- ulint val, /* in: value to set: < 4 */
- mtr_t* mtr) /* in: mtr */
-{
- page_t* bitmap_page;
-
- if (type & DICT_CLUSTERED) {
-
- return;
- }
-
- if (btr_page_get_level_low(page) != 0) {
-
- return;
- }
-
- bitmap_page = ibuf_bitmap_get_map_page(
- buf_frame_get_space_id(page),
- buf_frame_get_page_no(page), mtr);
-#ifdef UNIV_IBUF_DEBUG
-# if 0
- fprintf(stderr,
- "Setting page no %lu free bits to %lu should be %lu\n",
- buf_frame_get_page_no(page), val,
- ibuf_index_page_calc_free(page));
-# endif
-
- ut_a(val <= ibuf_index_page_calc_free(page));
-#endif /* UNIV_IBUF_DEBUG */
- ibuf_bitmap_page_set_bits(bitmap_page, buf_frame_get_page_no(page),
- IBUF_BITMAP_FREE, val, mtr);
-
-}
-
-/****************************************************************************
-Sets the free bit of the page in the ibuf bitmap. This is done in a separate
-mini-transaction, hence this operation does not restrict further work to only
-ibuf bitmap operations, which would result if the latch to the bitmap page
-were kept. */
-
-void
-ibuf_set_free_bits(
-/*===============*/
- ulint type, /* in: index type */
- page_t* page, /* in: index page; free bit is set if the index is
- non-clustered and page level is 0 */
- ulint val, /* in: value to set: < 4 */
- ulint max_val)/* in: ULINT_UNDEFINED or a maximum value which
- the bits must have before setting; this is for
- debugging */
-{
- mtr_t mtr;
- page_t* bitmap_page;
-
- if (type & DICT_CLUSTERED) {
-
- return;
- }
-
- if (btr_page_get_level_low(page) != 0) {
-
- return;
- }
-
- mtr_start(&mtr);
-
- bitmap_page = ibuf_bitmap_get_map_page(
- buf_frame_get_space_id(page), buf_frame_get_page_no(page),
- &mtr);
-
- if (max_val != ULINT_UNDEFINED) {
-#ifdef UNIV_IBUF_DEBUG
- ulint old_val;
-
- old_val = ibuf_bitmap_page_get_bits(
- bitmap_page, buf_frame_get_page_no(page),
- IBUF_BITMAP_FREE, &mtr);
-# if 0
- if (old_val != max_val) {
- fprintf(stderr,
- "Ibuf: page %lu old val %lu max val %lu\n",
- buf_frame_get_page_no(page),
- old_val, max_val);
- }
-# endif
-
- ut_a(old_val <= max_val);
-#endif
- }
-#ifdef UNIV_IBUF_DEBUG
-# if 0
- fprintf(stderr, "Setting page no %lu free bits to %lu should be %lu\n",
- buf_frame_get_page_no(page), val,
- ibuf_index_page_calc_free(page));
-# endif
-
- ut_a(val <= ibuf_index_page_calc_free(page));
-#endif
- ibuf_bitmap_page_set_bits(bitmap_page, buf_frame_get_page_no(page),
- IBUF_BITMAP_FREE, val, &mtr);
- mtr_commit(&mtr);
-}
-
-/****************************************************************************
-Resets the free bits of the page in the ibuf bitmap. This is done in a
-separate mini-transaction, hence this operation does not restrict further
-work to only ibuf bitmap operations, which would result if the latch to the
-bitmap page were kept. */
-
-void
-ibuf_reset_free_bits_with_type(
-/*===========================*/
- ulint type, /* in: index type */
- page_t* page) /* in: index page; free bits are set to 0 if the index
- is non-clustered and non-unique and the page level is
- 0 */
-{
- ibuf_set_free_bits(type, page, 0, ULINT_UNDEFINED);
-}
-
-/****************************************************************************
-Resets the free bits of the page in the ibuf bitmap. This is done in a
-separate mini-transaction, hence this operation does not restrict further
-work to solely ibuf bitmap operations, which would result if the latch to
-the bitmap page were kept. */
-
-void
-ibuf_reset_free_bits(
-/*=================*/
- dict_index_t* index, /* in: index */
- page_t* page) /* in: index page; free bits are set to 0 if
- the index is non-clustered and non-unique and
- the page level is 0 */
-{
- ibuf_set_free_bits(index->type, page, 0, ULINT_UNDEFINED);
-}
-
-/**************************************************************************
-Updates the free bits for a page to reflect the present state. Does this
-in the mtr given, which means that the latching order rules virtually prevent
-any further operations for this OS thread until mtr is committed. */
-
-void
-ibuf_update_free_bits_low(
-/*======================*/
- dict_index_t* index, /* in: index */
- page_t* page, /* in: index page */
- ulint max_ins_size, /* in: value of maximum insert size
- with reorganize before the latest
- operation performed to the page */
- mtr_t* mtr) /* in: mtr */
-{
- ulint before;
- ulint after;
-
- before = ibuf_index_page_calc_free_bits(max_ins_size);
-
- after = ibuf_index_page_calc_free(page);
-
- if (before != after) {
- ibuf_set_free_bits_low(index->type, page, after, mtr);
- }
-}
-
-/**************************************************************************
-Updates the free bits for the two pages to reflect the present state. Does
-this in the mtr given, which means that the latching order rules virtually
-prevent any further operations until mtr is committed. */
-
-void
-ibuf_update_free_bits_for_two_pages_low(
-/*====================================*/
- dict_index_t* index, /* in: index */
- page_t* page1, /* in: index page */
- page_t* page2, /* in: index page */
- mtr_t* mtr) /* in: mtr */
-{
- ulint state;
-
- /* As we have to x-latch two random bitmap pages, we have to acquire
- the bitmap mutex to prevent a deadlock with a similar operation
- performed by another OS thread. */
-
- mutex_enter(&ibuf_bitmap_mutex);
-
- state = ibuf_index_page_calc_free(page1);
-
- ibuf_set_free_bits_low(index->type, page1, state, mtr);
-
- state = ibuf_index_page_calc_free(page2);
-
- ibuf_set_free_bits_low(index->type, page2, state, mtr);
-
- mutex_exit(&ibuf_bitmap_mutex);
-}
-
-/**************************************************************************
-Returns TRUE if the page is one of the fixed address ibuf pages. */
-UNIV_INLINE
-ibool
-ibuf_fixed_addr_page(
-/*=================*/
- /* out: TRUE if a fixed address ibuf i/o page */
- ulint space, /* in: space id */
- ulint page_no)/* in: page number */
-{
- return((space == 0 && page_no == IBUF_TREE_ROOT_PAGE_NO)
- || ibuf_bitmap_page(page_no));
-}
-
-/***************************************************************************
-Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */
-
-ibool
-ibuf_page(
-/*======*/
- /* out: TRUE if level 2 or level 3 page */
- ulint space, /* in: space id */
- ulint page_no)/* in: page number */
-{
- page_t* bitmap_page;
- mtr_t mtr;
- ibool ret;
-
- if (recv_no_ibuf_operations) {
- /* Recovery is running: no ibuf operations should be
- performed */
-
- return(FALSE);
- }
-
- if (ibuf_fixed_addr_page(space, page_no)) {
-
- return(TRUE);
- }
-
- if (space != 0) {
- /* Currently we only have an ibuf tree in space 0 */
-
- return(FALSE);
- }
-
- ut_ad(fil_space_get_type(space) == FIL_TABLESPACE);
-
- mtr_start(&mtr);
-
- bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr);
-
- ret = ibuf_bitmap_page_get_bits(bitmap_page, page_no, IBUF_BITMAP_IBUF,
- &mtr);
- mtr_commit(&mtr);
-
- return(ret);
-}
-
-/***************************************************************************
-Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */
-
-ibool
-ibuf_page_low(
-/*==========*/
- /* out: TRUE if level 2 or level 3 page */
- ulint space, /* in: space id */
- ulint page_no,/* in: page number */
- mtr_t* mtr) /* in: mtr which will contain an x-latch to the
- bitmap page if the page is not one of the fixed
- address ibuf pages */
-{
- page_t* bitmap_page;
- ibool ret;
-
- if (ibuf_fixed_addr_page(space, page_no)) {
-
- return(TRUE);
- }
-
- bitmap_page = ibuf_bitmap_get_map_page(space, page_no, mtr);
-
- ret = ibuf_bitmap_page_get_bits(bitmap_page, page_no, IBUF_BITMAP_IBUF,
- mtr);
- return(ret);
-}
-
-/************************************************************************
-Returns the page number field of an ibuf record. */
-static
-ulint
-ibuf_rec_get_page_no(
-/*=================*/
- /* out: page number */
- rec_t* rec) /* in: ibuf record */
-{
- byte* field;
- ulint len;
-
- ut_ad(ibuf_inside());
- ut_ad(rec_get_n_fields_old(rec) > 2);
-
- field = rec_get_nth_field_old(rec, 1, &len);
-
- if (len == 1) {
- /* This is of the >= 4.1.x record format */
- ut_a(trx_sys_multiple_tablespace_format);
-
- field = rec_get_nth_field_old(rec, 2, &len);
- } else {
- ut_a(trx_doublewrite_must_reset_space_ids);
- ut_a(!trx_sys_multiple_tablespace_format);
-
- field = rec_get_nth_field_old(rec, 0, &len);
- }
-
- ut_a(len == 4);
-
- return(mach_read_from_4(field));
-}
-
-/************************************************************************
-Returns the space id field of an ibuf record. For < 4.1.x format records
-returns 0. */
-static
-ulint
-ibuf_rec_get_space(
-/*===============*/
- /* out: space id */
- rec_t* rec) /* in: ibuf record */
-{
- byte* field;
- ulint len;
-
- ut_ad(ibuf_inside());
- ut_ad(rec_get_n_fields_old(rec) > 2);
-
- field = rec_get_nth_field_old(rec, 1, &len);
-
- if (len == 1) {
- /* This is of the >= 4.1.x record format */
-
- ut_a(trx_sys_multiple_tablespace_format);
- field = rec_get_nth_field_old(rec, 0, &len);
- ut_a(len == 4);
-
- return(mach_read_from_4(field));
- }
-
- ut_a(trx_doublewrite_must_reset_space_ids);
- ut_a(!trx_sys_multiple_tablespace_format);
-
- return(0);
-}
-
-/************************************************************************
-Creates a dummy index for inserting a record to a non-clustered index.
-*/
-static
-dict_index_t*
-ibuf_dummy_index_create(
-/*====================*/
- /* out: dummy index */
- ulint n, /* in: number of fields */
- ibool comp) /* in: TRUE=use compact record format */
-{
- dict_table_t* table;
- dict_index_t* index;
-
- table = dict_mem_table_create("IBUF_DUMMY",
- DICT_HDR_SPACE, n,
- comp ? DICT_TF_COMPACT : 0);
-
- index = dict_mem_index_create("IBUF_DUMMY", "IBUF_DUMMY",
- DICT_HDR_SPACE, 0, n);
-
- index->table = table;
-
- /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
- index->cached = TRUE;
-
- return(index);
-}
-/************************************************************************
-Add a column to the dummy index */
-static
-void
-ibuf_dummy_index_add_col(
-/*=====================*/
- dict_index_t* index, /* in: dummy index */
- dtype_t* type, /* in: the data type of the column */
- ulint len) /* in: length of the column */
-{
- ulint i = index->table->n_def;
- dict_mem_table_add_col(index->table, NULL, NULL,
- dtype_get_mtype(type),
- dtype_get_prtype(type),
- dtype_get_len(type));
- dict_index_add_col(index, index->table, (dict_col_t*)
- dict_table_get_nth_col(index->table, i), len);
-}
-/************************************************************************
-Deallocates a dummy index for inserting a record to a non-clustered index.
-*/
-static
-void
-ibuf_dummy_index_free(
-/*==================*/
- dict_index_t* index) /* in: dummy index */
-{
- dict_table_t* table = index->table;
-
- dict_mem_index_free(index);
- dict_mem_table_free(table);
-}
-
-/*************************************************************************
-Builds the entry to insert into a non-clustered index when we have the
-corresponding record in an ibuf index. */
-static
-dtuple_t*
-ibuf_build_entry_from_ibuf_rec(
-/*===========================*/
- /* out, own: entry to insert to
- a non-clustered index; NOTE that
- as we copy pointers to fields in
- ibuf_rec, the caller must hold a
- latch to the ibuf_rec page as long
- as the entry is used! */
- rec_t* ibuf_rec, /* in: record in an insert buffer */
- mem_heap_t* heap, /* in: heap where built */
- dict_index_t** pindex) /* out, own: dummy index that
- describes the entry */
-{
- dtuple_t* tuple;
- dfield_t* field;
- ulint n_fields;
- byte* types;
- const byte* data;
- ulint len;
- ulint i;
- dict_index_t* index;
-
- data = rec_get_nth_field_old(ibuf_rec, 1, &len);
-
- if (len > 1) {
- /* This a < 4.1.x format record */
-
- ut_a(trx_doublewrite_must_reset_space_ids);
- ut_a(!trx_sys_multiple_tablespace_format);
-
- n_fields = rec_get_n_fields_old(ibuf_rec) - 2;
- tuple = dtuple_create(heap, n_fields);
- types = rec_get_nth_field_old(ibuf_rec, 1, &len);
-
- ut_a(len == n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE);
-
- for (i = 0; i < n_fields; i++) {
- field = dtuple_get_nth_field(tuple, i);
-
- data = rec_get_nth_field_old(ibuf_rec, i + 2, &len);
-
- dfield_set_data(field, data, len);
-
- dtype_read_for_order_and_null_size(
- dfield_get_type(field),
- types + i * DATA_ORDER_NULL_TYPE_BUF_SIZE);
- }
-
- *pindex = ibuf_dummy_index_create(n_fields, FALSE);
- return(tuple);
- }
-
- /* This a >= 4.1.x format record */
-
- ut_a(trx_sys_multiple_tablespace_format);
- ut_a(*data == 0);
- ut_a(rec_get_n_fields_old(ibuf_rec) > 4);
-
- n_fields = rec_get_n_fields_old(ibuf_rec) - 4;
-
- tuple = dtuple_create(heap, n_fields);
-
- types = rec_get_nth_field_old(ibuf_rec, 3, &len);
-
- ut_a(len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE <= 1);
- index = ibuf_dummy_index_create(
- n_fields, len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
-
- if (len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE) {
- /* compact record format */
- len--;
- ut_a(*types == 0);
- types++;
- }
-
- ut_a(len == n_fields * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
-
- for (i = 0; i < n_fields; i++) {
- field = dtuple_get_nth_field(tuple, i);
-
- data = rec_get_nth_field_old(ibuf_rec, i + 4, &len);
-
- dfield_set_data(field, data, len);
-
- dtype_new_read_for_order_and_null_size(
- dfield_get_type(field),
- types + i * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
-
- ibuf_dummy_index_add_col(index, dfield_get_type(field), len);
- }
-
- *pindex = index;
- return(tuple);
-}
-
-/************************************************************************
-Returns the space taken by a stored non-clustered index entry if converted to
-an index record. */
-static
-ulint
-ibuf_rec_get_volume(
-/*================*/
- /* out: size of index record in bytes + an upper
- limit of the space taken in the page directory */
- rec_t* ibuf_rec)/* in: ibuf record */
-{
- dtype_t dtype;
- ibool new_format = FALSE;
- ulint data_size = 0;
- ulint n_fields;
- byte* types;
- byte* data;
- ulint len;
- ulint i;
-
- ut_ad(ibuf_inside());
- ut_ad(rec_get_n_fields_old(ibuf_rec) > 2);
-
- data = rec_get_nth_field_old(ibuf_rec, 1, &len);
-
- if (len > 1) {
- /* < 4.1.x format record */
-
- ut_a(trx_doublewrite_must_reset_space_ids);
- ut_a(!trx_sys_multiple_tablespace_format);
-
- n_fields = rec_get_n_fields_old(ibuf_rec) - 2;
-
- types = rec_get_nth_field_old(ibuf_rec, 1, &len);
-
- ut_ad(len == n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE);
- } else {
- /* >= 4.1.x format record */
-
- ut_a(trx_sys_multiple_tablespace_format);
- ut_a(*data == 0);
-
- types = rec_get_nth_field_old(ibuf_rec, 3, &len);
-
- ut_a(len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE <= 1);
- if (len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE) {
- /* compact record format */
- ulint volume;
- dict_index_t* dummy_index;
- mem_heap_t* heap = mem_heap_create(500);
- dtuple_t* entry = ibuf_build_entry_from_ibuf_rec(
- ibuf_rec, heap, &dummy_index);
- volume = rec_get_converted_size(dummy_index, entry);
- ibuf_dummy_index_free(dummy_index);
- mem_heap_free(heap);
- return(volume + page_dir_calc_reserved_space(1));
- }
-
- n_fields = rec_get_n_fields_old(ibuf_rec) - 4;
-
- new_format = TRUE;
- }
-
- for (i = 0; i < n_fields; i++) {
- if (new_format) {
- data = rec_get_nth_field_old(ibuf_rec, i + 4, &len);
-
- dtype_new_read_for_order_and_null_size(
- &dtype, types + i
- * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
- } else {
- data = rec_get_nth_field_old(ibuf_rec, i + 2, &len);
-
- dtype_read_for_order_and_null_size(
- &dtype, types + i
- * DATA_ORDER_NULL_TYPE_BUF_SIZE);
- }
-
- if (len == UNIV_SQL_NULL) {
- data_size += dtype_get_sql_null_size(&dtype);
- } else {
- data_size += len;
- }
- }
-
- return(data_size + rec_get_converted_extra_size(data_size, n_fields)
- + page_dir_calc_reserved_space(1));
-}
-
-/*************************************************************************
-Builds the tuple to insert to an ibuf tree when we have an entry for a
-non-clustered index. */
-static
-dtuple_t*
-ibuf_entry_build(
-/*=============*/
- /* out, own: entry to insert into an ibuf
- index tree; NOTE that the original entry
- must be kept because we copy pointers to its
- fields */
- dict_index_t* index, /* in: non-clustered index */
- dtuple_t* entry, /* in: entry for a non-clustered index */
- ulint space, /* in: space id */
- ulint page_no,/* in: index page number where entry should
- be inserted */
- mem_heap_t* heap) /* in: heap into which to build */
-{
- dtuple_t* tuple;
- dfield_t* field;
- dfield_t* entry_field;
- ulint n_fields;
- byte* buf;
- byte* buf2;
- ulint i;
-
- /* Starting from 4.1.x, we have to build a tuple whose
- (1) first field is the space id,
- (2) the second field a single marker byte (0) to tell that this
- is a new format record,
- (3) the third contains the page number, and
- (4) the fourth contains the relevent type information of each data
- field; the length of this field % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE is
- (a) 0 for b-trees in the old format, and
- (b) 1 for b-trees in the compact format, the first byte of the field
- being the marker (0);
- (5) and the rest of the fields are copied from entry. All fields
- in the tuple are ordered like the type binary in our insert buffer
- tree. */
-
- n_fields = dtuple_get_n_fields(entry);
-
- tuple = dtuple_create(heap, n_fields + 4);
-
- /* Store the space id in tuple */
-
- field = dtuple_get_nth_field(tuple, 0);
-
- buf = mem_heap_alloc(heap, 4);
-
- mach_write_to_4(buf, space);
-
- dfield_set_data(field, buf, 4);
-
- /* Store the marker byte field in tuple */
-
- field = dtuple_get_nth_field(tuple, 1);
-
- buf = mem_heap_alloc(heap, 1);
-
- /* We set the marker byte zero */
-
- mach_write_to_1(buf, 0);
-
- dfield_set_data(field, buf, 1);
-
- /* Store the page number in tuple */
-
- field = dtuple_get_nth_field(tuple, 2);
-
- buf = mem_heap_alloc(heap, 4);
-
- mach_write_to_4(buf, page_no);
-
- dfield_set_data(field, buf, 4);
-
- /* Store the type info in buf2, and add the fields from entry to
- tuple */
- buf2 = mem_heap_alloc(heap, n_fields
- * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
- + dict_table_is_comp(index->table));
- if (dict_table_is_comp(index->table)) {
- *buf2++ = 0; /* write the compact format indicator */
- }
- for (i = 0; i < n_fields; i++) {
- ulint fixed_len;
- const dict_field_t* ifield;
-
- /* We add 4 below because we have the 4 extra fields at the
- start of an ibuf record */
-
- field = dtuple_get_nth_field(tuple, i + 4);
- entry_field = dtuple_get_nth_field(entry, i);
- dfield_copy(field, entry_field);
-
- ifield = dict_index_get_nth_field(index, i);
- /* Prefix index columns of fixed-length columns are of
- fixed length. However, in the function call below,
- dfield_get_type(entry_field) contains the fixed length
- of the column in the clustered index. Replace it with
- the fixed length of the secondary index column. */
- fixed_len = ifield->fixed_len;
-
-#ifdef UNIV_DEBUG
- if (fixed_len) {
- /* dict_index_add_col() should guarantee these */
- ut_ad(fixed_len <= (ulint) entry_field->type.len);
- if (ifield->prefix_len) {
- ut_ad(ifield->prefix_len == fixed_len);
- } else {
- ut_ad(fixed_len
- == (ulint) entry_field->type.len);
- }
- }
-#endif /* UNIV_DEBUG */
-
- dtype_new_store_for_order_and_null_size(
- buf2 + i * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE,
- dfield_get_type(entry_field), fixed_len);
- }
-
- /* Store the type info in buf2 to field 3 of tuple */
-
- field = dtuple_get_nth_field(tuple, 3);
-
- if (dict_table_is_comp(index->table)) {
- buf2--;
- }
-
- dfield_set_data(field, buf2, n_fields
- * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
- + dict_table_is_comp(index->table));
- /* Set all the types in the new tuple binary */
-
- dtuple_set_types_binary(tuple, n_fields + 4);
-
- return(tuple);
-}
-
-/*************************************************************************
-Builds a search tuple used to search buffered inserts for an index page.
-This is for < 4.1.x format records */
-static
-dtuple_t*
-ibuf_search_tuple_build(
-/*====================*/
- /* out, own: search tuple */
- ulint space, /* in: space id */
- ulint page_no,/* in: index page number */
- mem_heap_t* heap) /* in: heap into which to build */
-{
- dtuple_t* tuple;
- dfield_t* field;
- byte* buf;
-
- ut_a(space == 0);
- ut_a(trx_doublewrite_must_reset_space_ids);
- ut_a(!trx_sys_multiple_tablespace_format);
-
- tuple = dtuple_create(heap, 1);
-
- /* Store the page number in tuple */
-
- field = dtuple_get_nth_field(tuple, 0);
-
- buf = mem_heap_alloc(heap, 4);
-
- mach_write_to_4(buf, page_no);
-
- dfield_set_data(field, buf, 4);
-
- dtuple_set_types_binary(tuple, 1);
-
- return(tuple);
-}
-
-/*************************************************************************
-Builds a search tuple used to search buffered inserts for an index page.
-This is for >= 4.1.x format records. */
-static
-dtuple_t*
-ibuf_new_search_tuple_build(
-/*========================*/
- /* out, own: search tuple */
- ulint space, /* in: space id */
- ulint page_no,/* in: index page number */
- mem_heap_t* heap) /* in: heap into which to build */
-{
- dtuple_t* tuple;
- dfield_t* field;
- byte* buf;
-
- ut_a(trx_sys_multiple_tablespace_format);
-
- tuple = dtuple_create(heap, 3);
-
- /* Store the space id in tuple */
-
- field = dtuple_get_nth_field(tuple, 0);
-
- buf = mem_heap_alloc(heap, 4);
-
- mach_write_to_4(buf, space);
-
- dfield_set_data(field, buf, 4);
-
- /* Store the new format record marker byte */
-
- field = dtuple_get_nth_field(tuple, 1);
-
- buf = mem_heap_alloc(heap, 1);
-
- mach_write_to_1(buf, 0);
-
- dfield_set_data(field, buf, 1);
-
- /* Store the page number in tuple */
-
- field = dtuple_get_nth_field(tuple, 2);
-
- buf = mem_heap_alloc(heap, 4);
-
- mach_write_to_4(buf, page_no);
-
- dfield_set_data(field, buf, 4);
-
- dtuple_set_types_binary(tuple, 3);
-
- return(tuple);
-}
-
-/*************************************************************************
-Checks if there are enough pages in the free list of the ibuf tree that we
-dare to start a pessimistic insert to the insert buffer. */
-UNIV_INLINE
-ibool
-ibuf_data_enough_free_for_insert(
-/*=============================*/
- /* out: TRUE if enough free pages in list */
- ibuf_data_t* data) /* in: ibuf data for the space */
-{
- ut_ad(mutex_own(&ibuf_mutex));
-
- /* We want a big margin of free pages, because a B-tree can sometimes
- grow in size also if records are deleted from it, as the node pointers
- can change, and we must make sure that we are able to delete the
- inserts buffered for pages that we read to the buffer pool, without
- any risk of running out of free space in the insert buffer. */
-
- if (data->free_list_len >= data->size / 2 + 3 * data->height) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*************************************************************************
-Checks if there are enough pages in the free list of the ibuf tree that we
-should remove them and free to the file space management. */
-UNIV_INLINE
-ibool
-ibuf_data_too_much_free(
-/*====================*/
- /* out: TRUE if enough free pages in list */
- ibuf_data_t* data) /* in: ibuf data for the space */
-{
- ut_ad(mutex_own(&ibuf_mutex));
-
- return(data->free_list_len >= 3 + data->size / 2 + 3 * data->height);
-}
-
-/*************************************************************************
-Allocates a new page from the ibuf file segment and adds it to the free
-list. */
-static
-ulint
-ibuf_add_free_page(
-/*===============*/
- /* out: DB_SUCCESS, or DB_STRONG_FAIL
- if no space left */
- ulint space, /* in: space id */
- ibuf_data_t* ibuf_data) /* in: ibuf data for the space */
-{
- mtr_t mtr;
- page_t* header_page;
- ulint page_no;
- page_t* page;
- page_t* root;
- page_t* bitmap_page;
-
- ut_a(space == 0);
-
- mtr_start(&mtr);
-
- /* Acquire the fsp latch before the ibuf header, obeying the latching
- order */
- mtr_x_lock(fil_space_get_latch(space), &mtr);
-
- header_page = ibuf_header_page_get(space, &mtr);
-
- /* Allocate a new page: NOTE that if the page has been a part of a
- non-clustered index which has subsequently been dropped, then the
- page may have buffered inserts in the insert buffer, and these
- should be deleted from there. These get deleted when the page
- allocation creates the page in buffer. Thus the call below may end
- up calling the insert buffer routines and, as we yet have no latches
- to insert buffer tree pages, these routines can run without a risk
- of a deadlock. This is the reason why we created a special ibuf
- header page apart from the ibuf tree. */
-
- page_no = fseg_alloc_free_page(header_page + IBUF_HEADER
- + IBUF_TREE_SEG_HEADER, 0, FSP_UP,
- &mtr);
- if (page_no == FIL_NULL) {
- mtr_commit(&mtr);
-
- return(DB_STRONG_FAIL);
- }
-
- page = buf_page_get(space, page_no, RW_X_LATCH, &mtr);
-
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_TREE_NODE_NEW);
-#endif /* UNIV_SYNC_DEBUG */
-
- ibuf_enter();
-
- mutex_enter(&ibuf_mutex);
-
- root = ibuf_tree_root_get(ibuf_data, space, &mtr);
-
- /* Add the page to the free list and update the ibuf size data */
-
- flst_add_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
- page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr);
-
- mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_IBUF_FREE_LIST,
- MLOG_2BYTES, &mtr);
-
- ibuf_data->seg_size++;
- ibuf_data->free_list_len++;
-
- /* Set the bit indicating that this page is now an ibuf tree page
- (level 2 page) */
-
- bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr);
-
- ibuf_bitmap_page_set_bits(bitmap_page, page_no, IBUF_BITMAP_IBUF,
- TRUE, &mtr);
- mtr_commit(&mtr);
-
- mutex_exit(&ibuf_mutex);
-
- ibuf_exit();
-
- return(DB_SUCCESS);
-}
-
-/*************************************************************************
-Removes a page from the free list and frees it to the fsp system. */
-static
-void
-ibuf_remove_free_page(
-/*==================*/
- ulint space, /* in: space id */
- ibuf_data_t* ibuf_data) /* in: ibuf data for the space */
-{
- mtr_t mtr;
- mtr_t mtr2;
- page_t* header_page;
- ulint page_no;
- page_t* page;
- page_t* root;
- page_t* bitmap_page;
-
- ut_a(space == 0);
-
- mtr_start(&mtr);
-
- /* Acquire the fsp latch before the ibuf header, obeying the latching
- order */
- mtr_x_lock(fil_space_get_latch(space), &mtr);
-
- header_page = ibuf_header_page_get(space, &mtr);
-
- /* Prevent pessimistic inserts to insert buffer trees for a while */
- mutex_enter(&ibuf_pessimistic_insert_mutex);
-
- ibuf_enter();
-
- mutex_enter(&ibuf_mutex);
-
- if (!ibuf_data_too_much_free(ibuf_data)) {
-
- mutex_exit(&ibuf_mutex);
-
- ibuf_exit();
-
- mutex_exit(&ibuf_pessimistic_insert_mutex);
-
- mtr_commit(&mtr);
-
- return;
- }
-
- mtr_start(&mtr2);
-
- root = ibuf_tree_root_get(ibuf_data, space, &mtr2);
-
- page_no = flst_get_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
- &mtr2)
- .page;
-
- /* NOTE that we must release the latch on the ibuf tree root
- because in fseg_free_page we access level 1 pages, and the root
- is a level 2 page. */
-
- mtr_commit(&mtr2);
- mutex_exit(&ibuf_mutex);
-
- ibuf_exit();
-
- /* Since pessimistic inserts were prevented, we know that the
- page is still in the free list. NOTE that also deletes may take
- pages from the free list, but they take them from the start, and
- the free list was so long that they cannot have taken the last
- page from it. */
-
- fseg_free_page(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER,
- space, page_no, &mtr);
-#ifdef UNIV_DEBUG_FILE_ACCESSES
- buf_page_reset_file_page_was_freed(space, page_no);
-#endif
- ibuf_enter();
-
- mutex_enter(&ibuf_mutex);
-
- root = ibuf_tree_root_get(ibuf_data, space, &mtr);
-
- ut_ad(page_no == flst_get_last(root + PAGE_HEADER
- + PAGE_BTR_IBUF_FREE_LIST, &mtr)
- .page);
-
- page = buf_page_get(space, page_no, RW_X_LATCH, &mtr);
-
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_TREE_NODE);
-#endif /* UNIV_SYNC_DEBUG */
-
- /* Remove the page from the free list and update the ibuf size data */
-
- flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
- page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr);
-
- ibuf_data->seg_size--;
- ibuf_data->free_list_len--;
-
- mutex_exit(&ibuf_pessimistic_insert_mutex);
-
- /* Set the bit indicating that this page is no more an ibuf tree page
- (level 2 page) */
-
- bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr);
-
- ibuf_bitmap_page_set_bits(bitmap_page, page_no, IBUF_BITMAP_IBUF,
- FALSE, &mtr);
-#ifdef UNIV_DEBUG_FILE_ACCESSES
- buf_page_set_file_page_was_freed(space, page_no);
-#endif
- mtr_commit(&mtr);
-
- mutex_exit(&ibuf_mutex);
-
- ibuf_exit();
-}
-
-/***************************************************************************
-Frees excess pages from the ibuf free list. This function is called when an OS
-thread calls fsp services to allocate a new file segment, or a new page to a
-file segment, and the thread did not own the fsp latch before this call. */
-
-void
-ibuf_free_excess_pages(
-/*===================*/
- ulint space) /* in: space id */
-{
- ibuf_data_t* ibuf_data;
- ulint i;
-
- if (space != 0) {
- fprintf(stderr,
- "InnoDB: Error: calling ibuf_free_excess_pages"
- " for space %lu\n", (ulong) space);
- return;
- }
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(fil_space_get_latch(space), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(rw_lock_get_x_lock_count(fil_space_get_latch(space)) == 1);
- ut_ad(!ibuf_inside());
-
- /* NOTE: We require that the thread did not own the latch before,
- because then we know that we can obey the correct latching order
- for ibuf latches */
-
- ibuf_data = fil_space_get_ibuf_data(space);
-
- if (ibuf_data == NULL) {
- /* Not yet initialized */
-
-#if 0 /* defined UNIV_DEBUG */
- fprintf(stderr,
- "Ibuf for space %lu not yet initialized\n", space);
-#endif
-
- return;
- }
-
- /* Free at most a few pages at a time, so that we do not delay the
- requested service too much */
-
- for (i = 0; i < 4; i++) {
-
- mutex_enter(&ibuf_mutex);
-
- if (!ibuf_data_too_much_free(ibuf_data)) {
-
- mutex_exit(&ibuf_mutex);
-
- return;
- }
-
- mutex_exit(&ibuf_mutex);
-
- ibuf_remove_free_page(space, ibuf_data);
- }
-}
-
-/*************************************************************************
-Reads page numbers from a leaf in an ibuf tree. */
-static
-ulint
-ibuf_get_merge_page_nos(
-/*====================*/
- /* out: a lower limit for the combined volume
- of records which will be merged */
- ibool contract,/* in: TRUE if this function is called to
- contract the tree, FALSE if this is called
- when a single page becomes full and we look
- if it pays to read also nearby pages */
- rec_t* rec, /* in: record from which we read up and down
- in the chain of records */
- ulint* space_ids,/* in/out: space id's of the pages */
- ib_longlong* space_versions,/* in/out: tablespace version
- timestamps; used to prevent reading in old
- pages after DISCARD + IMPORT tablespace */
- ulint* page_nos,/* in/out: buffer for at least
- IBUF_MAX_N_PAGES_MERGED many page numbers;
- the page numbers are in an ascending order */
- ulint* n_stored)/* out: number of page numbers stored to
- page_nos in this function */
-{
- ulint prev_page_no;
- ulint prev_space_id;
- ulint first_page_no;
- ulint first_space_id;
- ulint rec_page_no;
- ulint rec_space_id;
- ulint sum_volumes;
- ulint volume_for_page;
- ulint rec_volume;
- ulint limit;
- ulint n_pages;
-
- *n_stored = 0;
-
- limit = ut_min(IBUF_MAX_N_PAGES_MERGED, buf_pool->curr_size / 4);
-
- if (page_rec_is_supremum(rec)) {
-
- rec = page_rec_get_prev(rec);
- }
-
- if (page_rec_is_infimum(rec)) {
-
- rec = page_rec_get_next(rec);
- }
-
- if (page_rec_is_supremum(rec)) {
-
- return(0);
- }
-
- first_page_no = ibuf_rec_get_page_no(rec);
- first_space_id = ibuf_rec_get_space(rec);
- n_pages = 0;
- prev_page_no = 0;
- prev_space_id = 0;
-
- /* Go backwards from the first rec until we reach the border of the
- 'merge area', or the page start or the limit of storeable pages is
- reached */
-
- while (!page_rec_is_infimum(rec) && UNIV_LIKELY(n_pages < limit)) {
-
- rec_page_no = ibuf_rec_get_page_no(rec);
- rec_space_id = ibuf_rec_get_space(rec);
-
- if (rec_space_id != first_space_id
- || rec_page_no / IBUF_MERGE_AREA
- != first_page_no / IBUF_MERGE_AREA) {
-
- break;
- }
-
- if (rec_page_no != prev_page_no
- || rec_space_id != prev_space_id) {
- n_pages++;
- }
-
- prev_page_no = rec_page_no;
- prev_space_id = rec_space_id;
-
- rec = page_rec_get_prev(rec);
- }
-
- rec = page_rec_get_next(rec);
-
- /* At the loop start there is no prev page; we mark this with a pair
- of space id, page no (0, 0) for which there can never be entries in
- the insert buffer */
-
- prev_page_no = 0;
- prev_space_id = 0;
- sum_volumes = 0;
- volume_for_page = 0;
-
- while (*n_stored < limit) {
- if (page_rec_is_supremum(rec)) {
- /* When no more records available, mark this with
- another 'impossible' pair of space id, page no */
- rec_page_no = 1;
- rec_space_id = 0;
- } else {
- rec_page_no = ibuf_rec_get_page_no(rec);
- rec_space_id = ibuf_rec_get_space(rec);
- ut_ad(rec_page_no > IBUF_TREE_ROOT_PAGE_NO);
- }
-
-#ifdef UNIV_IBUF_DEBUG
- ut_a(*n_stored < IBUF_MAX_N_PAGES_MERGED);
-#endif
- if ((rec_space_id != prev_space_id
- || rec_page_no != prev_page_no)
- && (prev_space_id != 0 || prev_page_no != 0)) {
-
- if ((prev_page_no == first_page_no
- && prev_space_id == first_space_id)
- || contract
- || (volume_for_page
- > ((IBUF_MERGE_THRESHOLD - 1)
- * 4 * UNIV_PAGE_SIZE
- / IBUF_PAGE_SIZE_PER_FREE_SPACE)
- / IBUF_MERGE_THRESHOLD)) {
-
- space_ids[*n_stored] = prev_space_id;
- space_versions[*n_stored]
- = fil_space_get_version(prev_space_id);
- page_nos[*n_stored] = prev_page_no;
-
- (*n_stored)++;
-
- sum_volumes += volume_for_page;
- }
-
- if (rec_space_id != first_space_id
- || rec_page_no / IBUF_MERGE_AREA
- != first_page_no / IBUF_MERGE_AREA) {
-
- break;
- }
-
- volume_for_page = 0;
- }
-
- if (rec_page_no == 1 && rec_space_id == 0) {
- /* Supremum record */
-
- break;
- }
-
- rec_volume = ibuf_rec_get_volume(rec);
-
- volume_for_page += rec_volume;
-
- prev_page_no = rec_page_no;
- prev_space_id = rec_space_id;
-
- rec = page_rec_get_next(rec);
- }
-
-#ifdef UNIV_IBUF_DEBUG
- ut_a(*n_stored <= IBUF_MAX_N_PAGES_MERGED);
-#endif
-#if 0
- fprintf(stderr, "Ibuf merge batch %lu pages %lu volume\n",
- *n_stored, sum_volumes);
-#endif
- return(sum_volumes);
-}
-
-/*************************************************************************
-Contracts insert buffer trees by reading pages to the buffer pool. */
-static
-ulint
-ibuf_contract_ext(
-/*==============*/
- /* out: a lower limit for the combined size in bytes
- of entries which will be merged from ibuf trees to the
- pages read, 0 if ibuf is empty */
- ulint* n_pages,/* out: number of pages to which merged */
- ibool sync) /* in: TRUE if the caller wants to wait for the
- issued read with the highest tablespace address
- to complete */
-{
- ulint rnd_pos;
- ibuf_data_t* data;
- btr_pcur_t pcur;
- ulint space;
- ibool all_trees_empty;
- ulint page_nos[IBUF_MAX_N_PAGES_MERGED];
- ulint space_ids[IBUF_MAX_N_PAGES_MERGED];
- ib_longlong space_versions[IBUF_MAX_N_PAGES_MERGED];
- ulint n_stored;
- ulint sum_sizes;
- mtr_t mtr;
-
- *n_pages = 0;
-loop:
- ut_ad(!ibuf_inside());
-
- mutex_enter(&ibuf_mutex);
-
- ut_ad(ibuf_validate_low());
-
- /* Choose an ibuf tree at random (though there really is only one tree
- in the current implementation) */
- ibuf_rnd += 865558671;
-
- rnd_pos = ibuf_rnd % ibuf->size;
-
- all_trees_empty = TRUE;
-
- data = UT_LIST_GET_FIRST(ibuf->data_list);
-
- for (;;) {
- if (!data->empty) {
- all_trees_empty = FALSE;
-
- if (rnd_pos < data->size) {
-
- break;
- }
-
- rnd_pos -= data->size;
- }
-
- data = UT_LIST_GET_NEXT(data_list, data);
-
- if (data == NULL) {
- if (all_trees_empty) {
- mutex_exit(&ibuf_mutex);
-
- return(0);
- }
-
- data = UT_LIST_GET_FIRST(ibuf->data_list);
- }
- }
-
- ut_ad(data);
-
- space = data->index->space;
-
- ut_a(space == 0); /* We currently only have an ibuf tree in
- space 0 */
- mtr_start(&mtr);
-
- ibuf_enter();
-
- /* Open a cursor to a randomly chosen leaf of the tree, at a random
- position within the leaf */
-
- btr_pcur_open_at_rnd_pos(data->index, BTR_SEARCH_LEAF, &pcur, &mtr);
-
- if (0 == page_get_n_recs(btr_pcur_get_page(&pcur))) {
-
- /* This tree is empty */
-
- data->empty = TRUE;
-
- ibuf_exit();
-
- mtr_commit(&mtr);
- btr_pcur_close(&pcur);
-
- mutex_exit(&ibuf_mutex);
-
- goto loop;
- }
-
- mutex_exit(&ibuf_mutex);
-
- sum_sizes = ibuf_get_merge_page_nos(TRUE, btr_pcur_get_rec(&pcur),
- space_ids, space_versions,
- page_nos, &n_stored);
-#if 0 /* defined UNIV_IBUF_DEBUG */
- fprintf(stderr, "Ibuf contract sync %lu pages %lu volume %lu\n",
- sync, n_stored, sum_sizes);
-#endif
- ibuf_exit();
-
- mtr_commit(&mtr);
- btr_pcur_close(&pcur);
-
- buf_read_ibuf_merge_pages(sync, space_ids, space_versions, page_nos,
- n_stored);
- *n_pages = n_stored;
-
- return(sum_sizes + 1);
-}
-
-/*************************************************************************
-Contracts insert buffer trees by reading pages to the buffer pool. */
-
-ulint
-ibuf_contract(
-/*==========*/
- /* out: a lower limit for the combined size in bytes
- of entries which will be merged from ibuf trees to the
- pages read, 0 if ibuf is empty */
- ibool sync) /* in: TRUE if the caller wants to wait for the
- issued read with the highest tablespace address
- to complete */
-{
- ulint n_pages;
-
- return(ibuf_contract_ext(&n_pages, sync));
-}
-
-/*************************************************************************
-Contracts insert buffer trees by reading pages to the buffer pool. */
-
-ulint
-ibuf_contract_for_n_pages(
-/*======================*/
- /* out: a lower limit for the combined size in bytes
- of entries which will be merged from ibuf trees to the
- pages read, 0 if ibuf is empty */
- ibool sync, /* in: TRUE if the caller wants to wait for the
- issued read with the highest tablespace address
- to complete */
- ulint n_pages)/* in: try to read at least this many pages to
- the buffer pool and merge the ibuf contents to
- them */
-{
- ulint sum_bytes = 0;
- ulint sum_pages = 0;
- ulint n_bytes;
- ulint n_pag2;
-
- while (sum_pages < n_pages) {
- n_bytes = ibuf_contract_ext(&n_pag2, sync);
-
- if (n_bytes == 0) {
- return(sum_bytes);
- }
-
- sum_bytes += n_bytes;
- sum_pages += n_pag2;
- }
-
- return(sum_bytes);
-}
-
-/*************************************************************************
-Contract insert buffer trees after insert if they are too big. */
-UNIV_INLINE
-void
-ibuf_contract_after_insert(
-/*=======================*/
- ulint entry_size) /* in: size of a record which was inserted
- into an ibuf tree */
-{
- ibool sync;
- ulint sum_sizes;
- ulint size;
-
- mutex_enter(&ibuf_mutex);
-
- if (ibuf->size < ibuf->max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) {
- mutex_exit(&ibuf_mutex);
-
- return;
- }
-
- sync = FALSE;
-
- if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_ON_INSERT_SYNC) {
-
- sync = TRUE;
- }
-
- mutex_exit(&ibuf_mutex);
-
- /* Contract at least entry_size many bytes */
- sum_sizes = 0;
- size = 1;
-
- while ((size > 0) && (sum_sizes < entry_size)) {
-
- size = ibuf_contract(sync);
- sum_sizes += size;
- }
-}
-
-/*************************************************************************
-Gets an upper limit for the combined size of entries buffered in the insert
-buffer for a given page. */
-
-ulint
-ibuf_get_volume_buffered(
-/*=====================*/
- /* out: upper limit for the volume of
- buffered inserts for the index page, in bytes;
- we may also return UNIV_PAGE_SIZE, if the
- entries for the index page span on several
- pages in the insert buffer */
- btr_pcur_t* pcur, /* in: pcur positioned at a place in an
- insert buffer tree where we would insert an
- entry for the index page whose number is
- page_no, latch mode has to be BTR_MODIFY_PREV
- or BTR_MODIFY_TREE */
- ulint space, /* in: space id */
- ulint page_no,/* in: page number of an index page */
- mtr_t* mtr) /* in: mtr */
-{
- ulint volume;
- rec_t* rec;
- page_t* page;
- ulint prev_page_no;
- page_t* prev_page;
- ulint next_page_no;
- page_t* next_page;
-
- ut_a(trx_sys_multiple_tablespace_format);
-
- ut_ad((pcur->latch_mode == BTR_MODIFY_PREV)
- || (pcur->latch_mode == BTR_MODIFY_TREE));
-
- /* Count the volume of records earlier in the alphabetical order than
- pcur */
-
- volume = 0;
-
- rec = btr_pcur_get_rec(pcur);
-
- page = buf_frame_align(rec);
-
- if (page_rec_is_supremum(rec)) {
- rec = page_rec_get_prev(rec);
- }
-
- for (;;) {
- if (page_rec_is_infimum(rec)) {
-
- break;
- }
-
- if (page_no != ibuf_rec_get_page_no(rec)
- || space != ibuf_rec_get_space(rec)) {
-
- goto count_later;
- }
-
- volume += ibuf_rec_get_volume(rec);
-
- rec = page_rec_get_prev(rec);
- }
-
- /* Look at the previous page */
-
- prev_page_no = btr_page_get_prev(page, mtr);
-
- if (prev_page_no == FIL_NULL) {
-
- goto count_later;
- }
-
- prev_page = buf_page_get(0, prev_page_no, RW_X_LATCH, mtr);
-#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_next(prev_page, mtr)
- == buf_frame_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(prev_page, SYNC_TREE_NODE);
-#endif /* UNIV_SYNC_DEBUG */
-
- rec = page_get_supremum_rec(prev_page);
- rec = page_rec_get_prev(rec);
-
- for (;;) {
- if (page_rec_is_infimum(rec)) {
-
- /* We cannot go to yet a previous page, because we
- do not have the x-latch on it, and cannot acquire one
- because of the latching order: we have to give up */
-
- return(UNIV_PAGE_SIZE);
- }
-
- if (page_no != ibuf_rec_get_page_no(rec)
- || space != ibuf_rec_get_space(rec)) {
-
- goto count_later;
- }
-
- volume += ibuf_rec_get_volume(rec);
-
- rec = page_rec_get_prev(rec);
- }
-
-count_later:
- rec = btr_pcur_get_rec(pcur);
-
- if (!page_rec_is_supremum(rec)) {
- rec = page_rec_get_next(rec);
- }
-
- for (;;) {
- if (page_rec_is_supremum(rec)) {
-
- break;
- }
-
- if (page_no != ibuf_rec_get_page_no(rec)
- || space != ibuf_rec_get_space(rec)) {
-
- return(volume);
- }
-
- volume += ibuf_rec_get_volume(rec);
-
- rec = page_rec_get_next(rec);
- }
-
- /* Look at the next page */
-
- next_page_no = btr_page_get_next(page, mtr);
-
- if (next_page_no == FIL_NULL) {
-
- return(volume);
- }
-
- next_page = buf_page_get(0, next_page_no, RW_X_LATCH, mtr);
-#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_prev(next_page, mtr)
- == buf_frame_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(next_page, SYNC_TREE_NODE);
-#endif /* UNIV_SYNC_DEBUG */
-
- rec = page_get_infimum_rec(next_page);
- rec = page_rec_get_next(rec);
-
- for (;;) {
- if (page_rec_is_supremum(rec)) {
-
- /* We give up */
-
- return(UNIV_PAGE_SIZE);
- }
-
- if (page_no != ibuf_rec_get_page_no(rec)
- || space != ibuf_rec_get_space(rec)) {
-
- return(volume);
- }
-
- volume += ibuf_rec_get_volume(rec);
-
- rec = page_rec_get_next(rec);
- }
-}
-
-/*************************************************************************
-Reads the biggest tablespace id from the high end of the insert buffer
-tree and updates the counter in fil_system. */
-
-void
-ibuf_update_max_tablespace_id(void)
-/*===============================*/
-{
- ulint max_space_id;
- rec_t* rec;
- byte* field;
- ulint len;
- ibuf_data_t* ibuf_data;
- dict_index_t* ibuf_index;
- btr_pcur_t pcur;
- mtr_t mtr;
-
- ibuf_data = fil_space_get_ibuf_data(0);
-
- ibuf_index = ibuf_data->index;
- ut_a(!dict_table_is_comp(ibuf_index->table));
-
- ibuf_enter();
-
- mtr_start(&mtr);
-
- btr_pcur_open_at_index_side(FALSE, ibuf_index, BTR_SEARCH_LEAF,
- &pcur, TRUE, &mtr);
- btr_pcur_move_to_prev(&pcur, &mtr);
-
- if (btr_pcur_is_before_first_on_page(&pcur, &mtr)) {
- /* The tree is empty */
-
- max_space_id = 0;
- } else {
- rec = btr_pcur_get_rec(&pcur);
-
- field = rec_get_nth_field_old(rec, 0, &len);
-
- ut_a(len == 4);
-
- max_space_id = mach_read_from_4(field);
- }
-
- mtr_commit(&mtr);
- ibuf_exit();
-
- /* printf("Maximum space id in insert buffer %lu\n", max_space_id); */
-
- fil_set_max_space_id_if_bigger(max_space_id);
-}
-
-/*************************************************************************
-Makes an index insert to the insert buffer, instead of directly to the disk
-page, if this is possible. */
-static
-ulint
-ibuf_insert_low(
-/*============*/
- /* out: DB_SUCCESS, DB_FAIL, DB_STRONG_FAIL */
- ulint mode, /* in: BTR_MODIFY_PREV or BTR_MODIFY_TREE */
- dtuple_t* entry, /* in: index entry to insert */
- dict_index_t* index, /* in: index where to insert; must not be
- unique or clustered */
- ulint space, /* in: space id where to insert */
- ulint page_no,/* in: page number where to insert */
- que_thr_t* thr) /* in: query thread */
-{
- big_rec_t* dummy_big_rec;
- ulint entry_size;
- btr_pcur_t pcur;
- btr_cur_t* cursor;
- dtuple_t* ibuf_entry;
- mem_heap_t* heap;
- ulint buffered;
- rec_t* ins_rec;
- ibool old_bit_value;
- page_t* bitmap_page;
- ibuf_data_t* ibuf_data;
- dict_index_t* ibuf_index;
- page_t* root;
- ulint err;
- ibool do_merge;
- ulint space_ids[IBUF_MAX_N_PAGES_MERGED];
- ib_longlong space_versions[IBUF_MAX_N_PAGES_MERGED];
- ulint page_nos[IBUF_MAX_N_PAGES_MERGED];
- ulint n_stored;
- ulint bits;
- mtr_t mtr;
- mtr_t bitmap_mtr;
-
- ut_a(!(index->type & DICT_CLUSTERED));
- ut_ad(dtuple_check_typed(entry));
-
- ut_a(trx_sys_multiple_tablespace_format);
-
- do_merge = FALSE;
-
- /* Currently the insert buffer of space 0 takes care of inserts to all
- tablespaces */
-
- ibuf_data = fil_space_get_ibuf_data(0);
-
- ibuf_index = ibuf_data->index;
-
- mutex_enter(&ibuf_mutex);
-
- if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_DO_NOT_INSERT) {
- /* Insert buffer is now too big, contract it but do not try
- to insert */
-
- mutex_exit(&ibuf_mutex);
-
-#ifdef UNIV_IBUF_DEBUG
- fputs("Ibuf too big\n", stderr);
-#endif
- /* Use synchronous contract (== TRUE) */
- ibuf_contract(TRUE);
-
- return(DB_STRONG_FAIL);
- }
-
- mutex_exit(&ibuf_mutex);
-
- if (mode == BTR_MODIFY_TREE) {
- mutex_enter(&ibuf_pessimistic_insert_mutex);
-
- ibuf_enter();
-
- mutex_enter(&ibuf_mutex);
-
- while (!ibuf_data_enough_free_for_insert(ibuf_data)) {
-
- mutex_exit(&ibuf_mutex);
-
- ibuf_exit();
-
- mutex_exit(&ibuf_pessimistic_insert_mutex);
-
- err = ibuf_add_free_page(0, ibuf_data);
-
- if (err == DB_STRONG_FAIL) {
-
- return(err);
- }
-
- mutex_enter(&ibuf_pessimistic_insert_mutex);
-
- ibuf_enter();
-
- mutex_enter(&ibuf_mutex);
- }
- } else {
- ibuf_enter();
- }
-
- entry_size = rec_get_converted_size(index, entry);
-
- heap = mem_heap_create(512);
-
- /* Build the entry which contains the space id and the page number as
- the first fields and the type information for other fields, and which
- will be inserted to the insert buffer. */
-
- ibuf_entry = ibuf_entry_build(index, entry, space, page_no, heap);
-
- /* Open a cursor to the insert buffer tree to calculate if we can add
- the new entry to it without exceeding the free space limit for the
- page. */
-
- mtr_start(&mtr);
-
- btr_pcur_open(ibuf_index, ibuf_entry, PAGE_CUR_LE, mode, &pcur, &mtr);
-
- /* Find out the volume of already buffered inserts for the same index
- page */
- buffered = ibuf_get_volume_buffered(&pcur, space, page_no, &mtr);
-
-#ifdef UNIV_IBUF_DEBUG
- ut_a((buffered == 0) || ibuf_count_get(space, page_no));
-#endif
- mtr_start(&bitmap_mtr);
-
- bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &bitmap_mtr);
-
- /* We check if the index page is suitable for buffered entries */
-
- if (buf_page_peek(space, page_no)
- || lock_rec_expl_exist_on_page(space, page_no)) {
- err = DB_STRONG_FAIL;
-
- mtr_commit(&bitmap_mtr);
-
- goto function_exit;
- }
-
- bits = ibuf_bitmap_page_get_bits(bitmap_page, page_no,
- IBUF_BITMAP_FREE, &bitmap_mtr);
-
- if (buffered + entry_size + page_dir_calc_reserved_space(1)
- > ibuf_index_page_calc_free_from_bits(bits)) {
- mtr_commit(&bitmap_mtr);
-
- /* It may not fit */
- err = DB_STRONG_FAIL;
-
- do_merge = TRUE;
-
- ibuf_get_merge_page_nos(FALSE, btr_pcur_get_rec(&pcur),
- space_ids, space_versions,
- page_nos, &n_stored);
- goto function_exit;
- }
-
- /* Set the bitmap bit denoting that the insert buffer contains
- buffered entries for this index page, if the bit is not set yet */
-
- old_bit_value = ibuf_bitmap_page_get_bits(bitmap_page, page_no,
- IBUF_BITMAP_BUFFERED,
- &bitmap_mtr);
- if (!old_bit_value) {
- ibuf_bitmap_page_set_bits(bitmap_page, page_no,
- IBUF_BITMAP_BUFFERED, TRUE,
- &bitmap_mtr);
- }
-
- mtr_commit(&bitmap_mtr);
-
- cursor = btr_pcur_get_btr_cur(&pcur);
-
- if (mode == BTR_MODIFY_PREV) {
- err = btr_cur_optimistic_insert(BTR_NO_LOCKING_FLAG, cursor,
- ibuf_entry, &ins_rec,
- &dummy_big_rec, thr,
- &mtr);
- if (err == DB_SUCCESS) {
- /* Update the page max trx id field */
- page_update_max_trx_id(buf_frame_align(ins_rec),
- thr_get_trx(thr)->id);
- }
- } else {
- ut_ad(mode == BTR_MODIFY_TREE);
-
- /* We acquire an x-latch to the root page before the insert,
- because a pessimistic insert releases the tree x-latch,
- which would cause the x-latching of the root after that to
- break the latching order. */
-
- root = ibuf_tree_root_get(ibuf_data, 0, &mtr);
-
- err = btr_cur_pessimistic_insert(BTR_NO_LOCKING_FLAG
- | BTR_NO_UNDO_LOG_FLAG,
- cursor,
- ibuf_entry, &ins_rec,
- &dummy_big_rec, thr,
- &mtr);
- if (err == DB_SUCCESS) {
- /* Update the page max trx id field */
- page_update_max_trx_id(buf_frame_align(ins_rec),
- thr_get_trx(thr)->id);
- }
-
- ibuf_data_sizes_update(ibuf_data, root, &mtr);
- }
-
-function_exit:
-#ifdef UNIV_IBUF_DEBUG
- if (err == DB_SUCCESS) {
- fprintf(stderr,
- "Incrementing ibuf count of space %lu page %lu\n"
- "from %lu by 1\n", space, page_no,
- ibuf_count_get(space, page_no));
-
- ibuf_count_set(space, page_no,
- ibuf_count_get(space, page_no) + 1);
- }
-#endif
- if (mode == BTR_MODIFY_TREE) {
- ut_ad(ibuf_validate_low());
-
- mutex_exit(&ibuf_mutex);
- mutex_exit(&ibuf_pessimistic_insert_mutex);
- }
-
- mtr_commit(&mtr);
- btr_pcur_close(&pcur);
- ibuf_exit();
-
- mem_heap_free(heap);
-
- mutex_enter(&ibuf_mutex);
-
- if (err == DB_SUCCESS) {
- ibuf_data->empty = FALSE;
- ibuf_data->n_inserts++;
- }
-
- mutex_exit(&ibuf_mutex);
-
- if ((mode == BTR_MODIFY_TREE) && (err == DB_SUCCESS)) {
- ibuf_contract_after_insert(entry_size);
- }
-
- if (do_merge) {
-#ifdef UNIV_IBUF_DEBUG
- ut_a(n_stored <= IBUF_MAX_N_PAGES_MERGED);
-#endif
- buf_read_ibuf_merge_pages(FALSE, space_ids, space_versions,
- page_nos, n_stored);
- }
-
- return(err);
-}
-
-/*************************************************************************
-Makes an index insert to the insert buffer, instead of directly to the disk
-page, if this is possible. Does not do insert if the index is clustered
-or unique. */
-
-ibool
-ibuf_insert(
-/*========*/
- /* out: TRUE if success */
- dtuple_t* entry, /* in: index entry to insert */
- dict_index_t* index, /* in: index where to insert */
- ulint space, /* in: space id where to insert */
- ulint page_no,/* in: page number where to insert */
- que_thr_t* thr) /* in: query thread */
-{
- ulint err;
-
- ut_a(trx_sys_multiple_tablespace_format);
- ut_ad(dtuple_check_typed(entry));
-
- ut_a(!(index->type & DICT_CLUSTERED));
-
- if (rec_get_converted_size(index, entry)
- >= (page_get_free_space_of_empty(dict_table_is_comp(index->table))
- / 2)) {
- return(FALSE);
- }
-
- err = ibuf_insert_low(BTR_MODIFY_PREV, entry, index, space, page_no,
- thr);
- if (err == DB_FAIL) {
- err = ibuf_insert_low(BTR_MODIFY_TREE, entry, index, space,
- page_no, thr);
- }
-
- if (err == DB_SUCCESS) {
-#ifdef UNIV_IBUF_DEBUG
- /* fprintf(stderr, "Ibuf insert for page no %lu of index %s\n",
- page_no, index->name); */
-#endif
- return(TRUE);
-
- } else {
- ut_a(err == DB_STRONG_FAIL);
-
- return(FALSE);
- }
-}
-
-/************************************************************************
-During merge, inserts to an index page a secondary index entry extracted
-from the insert buffer. */
-static
-void
-ibuf_insert_to_index_page(
-/*======================*/
- dtuple_t* entry, /* in: buffered entry to insert */
- page_t* page, /* in: index page where the buffered entry
- should be placed */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr) /* in: mtr */
-{
- page_cur_t page_cur;
- ulint low_match;
- rec_t* rec;
- page_t* bitmap_page;
- ulint old_bits;
-
- ut_ad(ibuf_inside());
- ut_ad(dtuple_check_typed(entry));
-
- if (UNIV_UNLIKELY(dict_table_is_comp(index->table)
- != (ibool)!!page_is_comp(page))) {
- fputs("InnoDB: Trying to insert a record from"
- " the insert buffer to an index page\n"
- "InnoDB: but the 'compact' flag does not match!\n",
- stderr);
- goto dump;
- }
-
- rec = page_rec_get_next(page_get_infimum_rec(page));
-
- if (UNIV_UNLIKELY(rec_get_n_fields(rec, index)
- != dtuple_get_n_fields(entry))) {
- fputs("InnoDB: Trying to insert a record from"
- " the insert buffer to an index page\n"
- "InnoDB: but the number of fields does not match!\n",
- stderr);
-dump:
- buf_page_print(page);
-
- dtuple_print(stderr, entry);
-
- fputs("InnoDB: The table where where"
- " this index record belongs\n"
- "InnoDB: is now probably corrupt."
- " Please run CHECK TABLE on\n"
- "InnoDB: your tables.\n"
- "InnoDB: Submit a detailed bug report to"
- " http://bugs.mysql.com!\n", stderr);
-
- return;
- }
-
- low_match = page_cur_search(page, index, entry,
- PAGE_CUR_LE, &page_cur);
-
- if (low_match == dtuple_get_n_fields(entry)) {
- rec = page_cur_get_rec(&page_cur);
-
- btr_cur_del_unmark_for_ibuf(rec, mtr);
- } else {
- rec = page_cur_tuple_insert(&page_cur, entry, index, mtr);
-
- if (rec == NULL) {
- /* If the record did not fit, reorganize */
-
- btr_page_reorganize(page, index, mtr);
-
- page_cur_search(page, index, entry,
- PAGE_CUR_LE, &page_cur);
-
- /* This time the record must fit */
- if (UNIV_UNLIKELY(!page_cur_tuple_insert(
- &page_cur, entry, index,
- mtr))) {
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Error: Insert buffer insert"
- " fails; page free %lu,"
- " dtuple size %lu\n",
- (ulong) page_get_max_insert_size(
- page, 1),
- (ulong) rec_get_converted_size(
- index, entry));
- fputs("InnoDB: Cannot insert index record ",
- stderr);
- dtuple_print(stderr, entry);
- fputs("\nInnoDB: The table where"
- " this index record belongs\n"
- "InnoDB: is now probably corrupt."
- " Please run CHECK TABLE on\n"
- "InnoDB: that table.\n", stderr);
-
- bitmap_page = ibuf_bitmap_get_map_page(
- buf_frame_get_space_id(page),
- buf_frame_get_page_no(page),
- mtr);
- old_bits = ibuf_bitmap_page_get_bits(
- bitmap_page,
- buf_frame_get_page_no(page),
- IBUF_BITMAP_FREE, mtr);
-
- fprintf(stderr, "InnoDB: Bitmap bits %lu\n",
- (ulong) old_bits);
-
- fputs("InnoDB: Submit a detailed bug report"
- " to http://bugs.mysql.com\n", stderr);
- }
- }
- }
-}
-
-/*************************************************************************
-Deletes from ibuf the record on which pcur is positioned. If we have to
-resort to a pessimistic delete, this function commits mtr and closes
-the cursor. */
-static
-ibool
-ibuf_delete_rec(
-/*============*/
- /* out: TRUE if mtr was committed and pcur
- closed in this operation */
- ulint space, /* in: space id */
- ulint page_no,/* in: index page number where the record
- should belong */
- btr_pcur_t* pcur, /* in: pcur positioned on the record to
- delete, having latch mode BTR_MODIFY_LEAF */
- dtuple_t* search_tuple,
- /* in: search tuple for entries of page_no */
- mtr_t* mtr) /* in: mtr */
-{
- ibool success;
- ibuf_data_t* ibuf_data;
- page_t* root;
- ulint err;
-
- ut_ad(ibuf_inside());
-
- success = btr_cur_optimistic_delete(btr_pcur_get_btr_cur(pcur), mtr);
-
- if (success) {
-#ifdef UNIV_IBUF_DEBUG
- fprintf(stderr,
- "Decrementing ibuf count of space %lu page %lu\n"
- "from %lu by 1\n", space, page_no,
- ibuf_count_get(space, page_no));
- ibuf_count_set(space, page_no,
- ibuf_count_get(space, page_no) - 1);
-#endif
- return(FALSE);
- }
-
- /* We have to resort to a pessimistic delete from ibuf */
- btr_pcur_store_position(pcur, mtr);
-
- btr_pcur_commit_specify_mtr(pcur, mtr);
-
- /* Currently the insert buffer of space 0 takes care of inserts to all
- tablespaces */
-
- ibuf_data = fil_space_get_ibuf_data(0);
-
- mutex_enter(&ibuf_mutex);
-
- mtr_start(mtr);
-
- success = btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr);
-
- if (!success) {
- if (fil_space_get_version(space) == -1) {
- /* The tablespace has been dropped. It is possible
- that another thread has deleted the insert buffer
- entry. Do not complain. */
- goto commit_and_exit;
- }
-
- fprintf(stderr,
- "InnoDB: ERROR: Submit the output to"
- " http://bugs.mysql.com\n"
- "InnoDB: ibuf cursor restoration fails!\n"
- "InnoDB: ibuf record inserted to page %lu\n",
- (ulong) page_no);
- fflush(stderr);
-
- rec_print_old(stderr, btr_pcur_get_rec(pcur));
- rec_print_old(stderr, pcur->old_rec);
- dtuple_print(stderr, search_tuple);
-
- rec_print_old(stderr,
- page_rec_get_next(btr_pcur_get_rec(pcur)));
- fflush(stderr);
-
- btr_pcur_commit_specify_mtr(pcur, mtr);
-
- fputs("InnoDB: Validating insert buffer tree:\n", stderr);
- if (!btr_validate_index(ibuf_data->index, NULL)) {
- ut_error;
- }
-
- fprintf(stderr, "InnoDB: ibuf tree ok\n");
- fflush(stderr);
-
- goto func_exit;
- }
-
- root = ibuf_tree_root_get(ibuf_data, 0, mtr);
-
- btr_cur_pessimistic_delete(&err, TRUE, btr_pcur_get_btr_cur(pcur),
- FALSE, mtr);
- ut_a(err == DB_SUCCESS);
-
-#ifdef UNIV_IBUF_DEBUG
- ibuf_count_set(space, page_no, ibuf_count_get(space, page_no) - 1);
-#endif
- ibuf_data_sizes_update(ibuf_data, root, mtr);
-
- ut_ad(ibuf_validate_low());
-
-commit_and_exit:
- btr_pcur_commit_specify_mtr(pcur, mtr);
-
-func_exit:
- btr_pcur_close(pcur);
-
- mutex_exit(&ibuf_mutex);
-
- return(TRUE);
-}
-
-/*************************************************************************
-When an index page is read from a disk to the buffer pool, this function
-inserts to the page the possible index entries buffered in the insert buffer.
-The entries are deleted from the insert buffer. If the page is not read, but
-created in the buffer pool, this function deletes its buffered entries from
-the insert buffer; there can exist entries for such a page if the page
-belonged to an index which subsequently was dropped. */
-
-void
-ibuf_merge_or_delete_for_page(
-/*==========================*/
- page_t* page, /* in: if page has been read from disk, pointer to
- the page x-latched, else NULL */
- ulint space, /* in: space id of the index page */
- ulint page_no,/* in: page number of the index page */
- ibool update_ibuf_bitmap)/* in: normally this is set to TRUE, but if
- we have deleted or are deleting the tablespace, then we
- naturally do not want to update a non-existent bitmap
- page */
-{
- mem_heap_t* heap;
- btr_pcur_t pcur;
- dtuple_t* entry;
- dtuple_t* search_tuple;
- rec_t* ibuf_rec;
- buf_block_t* block;
- page_t* bitmap_page;
- ibuf_data_t* ibuf_data;
- ulint n_inserts;
-#ifdef UNIV_IBUF_DEBUG
- ulint volume;
-#endif
- ibool tablespace_being_deleted = FALSE;
- ibool corruption_noticed = FALSE;
- mtr_t mtr;
-
- if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
-
- return;
- }
-
- if (ibuf_fixed_addr_page(space, page_no) || fsp_descr_page(page_no)
- || trx_sys_hdr_page(space, page_no)) {
- return;
- }
-
- if (update_ibuf_bitmap) {
- /* If the following returns FALSE, we get the counter
- incremented, and must decrement it when we leave this
- function. When the counter is > 0, that prevents tablespace
- from being dropped. */
-
- tablespace_being_deleted = fil_inc_pending_ibuf_merges(space);
-
- if (tablespace_being_deleted) {
- /* Do not try to read the bitmap page from space;
- just delete the ibuf records for the page */
-
- page = NULL;
- update_ibuf_bitmap = FALSE;
- }
- }
-
- if (update_ibuf_bitmap) {
- mtr_start(&mtr);
- bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr);
-
- if (!ibuf_bitmap_page_get_bits(bitmap_page, page_no,
- IBUF_BITMAP_BUFFERED, &mtr)) {
- /* No inserts buffered for this page */
- mtr_commit(&mtr);
-
- if (!tablespace_being_deleted) {
- fil_decr_pending_ibuf_merges(space);
- }
-
- return;
- }
- mtr_commit(&mtr);
- }
-
- /* Currently the insert buffer of space 0 takes care of inserts to all
- tablespaces */
-
- ibuf_data = fil_space_get_ibuf_data(0);
-
- ibuf_enter();
-
- heap = mem_heap_create(512);
-
- if (!trx_sys_multiple_tablespace_format) {
- ut_a(trx_doublewrite_must_reset_space_ids);
- search_tuple = ibuf_search_tuple_build(space, page_no, heap);
- } else {
- search_tuple = ibuf_new_search_tuple_build(space, page_no,
- heap);
- }
-
- if (page) {
- /* Move the ownership of the x-latch on the page to this OS
- thread, so that we can acquire a second x-latch on it. This
- is needed for the insert operations to the index page to pass
- the debug checks. */
-
- block = buf_block_align(page);
- rw_lock_x_lock_move_ownership(&(block->lock));
-
- if (fil_page_get_type(page) != FIL_PAGE_INDEX) {
-
- corruption_noticed = TRUE;
-
- ut_print_timestamp(stderr);
-
- mtr_start(&mtr);
-
- fputs(" InnoDB: Dump of the ibuf bitmap page:\n",
- stderr);
-
- bitmap_page = ibuf_bitmap_get_map_page(space, page_no,
- &mtr);
- buf_page_print(bitmap_page);
-
- mtr_commit(&mtr);
-
- fputs("\nInnoDB: Dump of the page:\n", stderr);
-
- buf_page_print(page);
-
- fprintf(stderr,
- "InnoDB: Error: corruption in the tablespace."
- " Bitmap shows insert\n"
- "InnoDB: buffer records to page n:o %lu"
- " though the page\n"
- "InnoDB: type is %lu, which is"
- " not an index page!\n"
- "InnoDB: We try to resolve the problem"
- " by skipping the insert buffer\n"
- "InnoDB: merge for this page."
- " Please run CHECK TABLE on your tables\n"
- "InnoDB: to determine if they are corrupt"
- " after this.\n\n"
- "InnoDB: Please submit a detailed bug report"
- " to http://bugs.mysql.com\n\n",
- (ulong) page_no,
- (ulong) fil_page_get_type(page));
- }
- }
-
- n_inserts = 0;
-#ifdef UNIV_IBUF_DEBUG
- volume = 0;
-#endif
-loop:
- mtr_start(&mtr);
-
- if (page) {
- ibool success = buf_page_get_known_nowait(RW_X_LATCH, page,
- BUF_KEEP_OLD,
- __FILE__, __LINE__,
- &mtr);
- ut_a(success);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_TREE_NODE);
-#endif /* UNIV_SYNC_DEBUG */
- }
-
- /* Position pcur in the insert buffer at the first entry for this
- index page */
- btr_pcur_open_on_user_rec(ibuf_data->index, search_tuple, PAGE_CUR_GE,
- BTR_MODIFY_LEAF, &pcur, &mtr);
- if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) {
- ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
-
- goto reset_bit;
- }
-
- for (;;) {
- ut_ad(btr_pcur_is_on_user_rec(&pcur, &mtr));
-
- ibuf_rec = btr_pcur_get_rec(&pcur);
-
- /* Check if the entry is for this index page */
- if (ibuf_rec_get_page_no(ibuf_rec) != page_no
- || ibuf_rec_get_space(ibuf_rec) != space) {
- if (page) {
- page_header_reset_last_insert(page, &mtr);
- }
- goto reset_bit;
- }
-
- if (corruption_noticed) {
- fputs("InnoDB: Discarding record\n ", stderr);
- rec_print_old(stderr, ibuf_rec);
- fputs("\n from the insert buffer!\n\n", stderr);
- } else if (page) {
- /* Now we have at pcur a record which should be
- inserted to the index page; NOTE that the call below
- copies pointers to fields in ibuf_rec, and we must
- keep the latch to the ibuf_rec page until the
- insertion is finished! */
- dict_index_t* dummy_index;
- dulint max_trx_id = page_get_max_trx_id(
- buf_frame_align(ibuf_rec));
- page_update_max_trx_id(page, max_trx_id);
-
- entry = ibuf_build_entry_from_ibuf_rec(
- ibuf_rec, heap, &dummy_index);
-#ifdef UNIV_IBUF_DEBUG
- volume += rec_get_converted_size(dummy_index, entry)
- + page_dir_calc_reserved_space(1);
- ut_a(volume <= 4 * UNIV_PAGE_SIZE
- / IBUF_PAGE_SIZE_PER_FREE_SPACE);
-#endif
- ibuf_insert_to_index_page(entry, page,
- dummy_index, &mtr);
- ibuf_dummy_index_free(dummy_index);
- }
-
- n_inserts++;
-
- /* Delete the record from ibuf */
- if (ibuf_delete_rec(space, page_no, &pcur, search_tuple,
- &mtr)) {
- /* Deletion was pessimistic and mtr was committed:
- we start from the beginning again */
-
- goto loop;
- }
-
- if (btr_pcur_is_after_last_on_page(&pcur, &mtr)) {
- mtr_commit(&mtr);
- btr_pcur_close(&pcur);
-
- goto loop;
- }
- }
-
-reset_bit:
-#ifdef UNIV_IBUF_DEBUG
- if (ibuf_count_get(space, page_no) > 0) {
- /* btr_print_tree(ibuf_data->index->tree, 100);
- ibuf_print(); */
- }
-#endif
- if (update_ibuf_bitmap) {
- bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr);
- ibuf_bitmap_page_set_bits(bitmap_page, page_no,
- IBUF_BITMAP_BUFFERED, FALSE, &mtr);
- if (page) {
- ulint old_bits = ibuf_bitmap_page_get_bits(
- bitmap_page, page_no, IBUF_BITMAP_FREE, &mtr);
- ulint new_bits = ibuf_index_page_calc_free(page);
-#if 0 /* defined UNIV_IBUF_DEBUG */
- fprintf(stderr, "Old bits %lu new bits %lu"
- " max size %lu\n",
- old_bits, new_bits,
- page_get_max_insert_size_after_reorganize(
- page, 1));
-#endif
- if (old_bits != new_bits) {
- ibuf_bitmap_page_set_bits(bitmap_page, page_no,
- IBUF_BITMAP_FREE,
- new_bits, &mtr);
- }
- }
- }
-#if 0 /* defined UNIV_IBUF_DEBUG */
- fprintf(stderr,
- "Ibuf merge %lu records volume %lu to page no %lu\n",
- n_inserts, volume, page_no);
-#endif
- mtr_commit(&mtr);
- btr_pcur_close(&pcur);
- mem_heap_free(heap);
-
- /* Protect our statistics keeping from race conditions */
- mutex_enter(&ibuf_mutex);
-
- ibuf_data->n_merges++;
- ibuf_data->n_merged_recs += n_inserts;
-
- mutex_exit(&ibuf_mutex);
-
- if (update_ibuf_bitmap && !tablespace_being_deleted) {
-
- fil_decr_pending_ibuf_merges(space);
- }
-
- ibuf_exit();
-#ifdef UNIV_IBUF_DEBUG
- ut_a(ibuf_count_get(space, page_no) == 0);
-#endif
-}
-
-/*************************************************************************
-Deletes all entries in the insert buffer for a given space id. This is used
-in DISCARD TABLESPACE and IMPORT TABLESPACE.
-NOTE: this does not update the page free bitmaps in the space. The space will
-become CORRUPT when you call this function! */
-
-void
-ibuf_delete_for_discarded_space(
-/*============================*/
- ulint space) /* in: space id */
-{
- mem_heap_t* heap;
- btr_pcur_t pcur;
- dtuple_t* search_tuple;
- rec_t* ibuf_rec;
- ulint page_no;
- ibool closed;
- ibuf_data_t* ibuf_data;
- ulint n_inserts;
- mtr_t mtr;
-
- /* Currently the insert buffer of space 0 takes care of inserts to all
- tablespaces */
-
- ibuf_data = fil_space_get_ibuf_data(0);
-
- heap = mem_heap_create(512);
-
- /* Use page number 0 to build the search tuple so that we get the
- cursor positioned at the first entry for this space id */
-
- search_tuple = ibuf_new_search_tuple_build(space, 0, heap);
-
- n_inserts = 0;
-loop:
- ibuf_enter();
-
- mtr_start(&mtr);
-
- /* Position pcur in the insert buffer at the first entry for the
- space */
- btr_pcur_open_on_user_rec(ibuf_data->index, search_tuple, PAGE_CUR_GE,
- BTR_MODIFY_LEAF, &pcur, &mtr);
- if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) {
- ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
-
- goto leave_loop;
- }
-
- for (;;) {
- ut_ad(btr_pcur_is_on_user_rec(&pcur, &mtr));
-
- ibuf_rec = btr_pcur_get_rec(&pcur);
-
- /* Check if the entry is for this space */
- if (ibuf_rec_get_space(ibuf_rec) != space) {
-
- goto leave_loop;
- }
-
- page_no = ibuf_rec_get_page_no(ibuf_rec);
-
- n_inserts++;
-
- /* Delete the record from ibuf */
- closed = ibuf_delete_rec(space, page_no, &pcur, search_tuple,
- &mtr);
- if (closed) {
- /* Deletion was pessimistic and mtr was committed:
- we start from the beginning again */
-
- ibuf_exit();
-
- goto loop;
- }
-
- if (btr_pcur_is_after_last_on_page(&pcur, &mtr)) {
- mtr_commit(&mtr);
- btr_pcur_close(&pcur);
-
- ibuf_exit();
-
- goto loop;
- }
- }
-
-leave_loop:
- mtr_commit(&mtr);
- btr_pcur_close(&pcur);
-
- /* Protect our statistics keeping from race conditions */
- mutex_enter(&ibuf_mutex);
-
- ibuf_data->n_merges++;
- ibuf_data->n_merged_recs += n_inserts;
-
- mutex_exit(&ibuf_mutex);
- /*
- fprintf(stderr,
- "InnoDB: Discarded %lu ibuf entries for space %lu\n",
- (ulong) n_inserts, (ulong) space);
- */
- ibuf_exit();
-
- mem_heap_free(heap);
-}
-
-
-/**********************************************************************
-Validates the ibuf data structures when the caller owns ibuf_mutex. */
-
-ibool
-ibuf_validate_low(void)
-/*===================*/
- /* out: TRUE if ok */
-{
- ibuf_data_t* data;
- ulint sum_sizes;
-
- ut_ad(mutex_own(&ibuf_mutex));
-
- sum_sizes = 0;
-
- data = UT_LIST_GET_FIRST(ibuf->data_list);
-
- while (data) {
- sum_sizes += data->size;
-
- data = UT_LIST_GET_NEXT(data_list, data);
- }
-
- ut_a(sum_sizes == ibuf->size);
-
- return(TRUE);
-}
-
-/**********************************************************************
-Looks if the insert buffer is empty. */
-
-ibool
-ibuf_is_empty(void)
-/*===============*/
- /* out: TRUE if empty */
-{
- ibuf_data_t* data;
- ibool is_empty;
- page_t* root;
- mtr_t mtr;
-
- ibuf_enter();
-
- mutex_enter(&ibuf_mutex);
-
- data = UT_LIST_GET_FIRST(ibuf->data_list);
-
- mtr_start(&mtr);
-
- root = ibuf_tree_root_get(data, 0, &mtr);
-
- if (page_get_n_recs(root) == 0) {
-
- is_empty = TRUE;
-
- if (data->empty == FALSE) {
- fprintf(stderr,
- "InnoDB: Warning: insert buffer tree is empty"
- " but the data struct does not\n"
- "InnoDB: know it. This condition is legal"
- " if the master thread has not yet\n"
- "InnoDB: run to completion.\n");
- }
- } else {
- ut_a(data->empty == FALSE);
-
- is_empty = FALSE;
- }
-
- mtr_commit(&mtr);
-
- ut_a(data->space == 0);
-
- mutex_exit(&ibuf_mutex);
-
- ibuf_exit();
-
- return(is_empty);
-}
-
-/**********************************************************************
-Prints info of ibuf. */
-
-void
-ibuf_print(
-/*=======*/
- FILE* file) /* in: file where to print */
-{
- ibuf_data_t* data;
-#ifdef UNIV_IBUF_DEBUG
- ulint i;
-#endif
-
- mutex_enter(&ibuf_mutex);
-
- data = UT_LIST_GET_FIRST(ibuf->data_list);
-
- while (data) {
- fprintf(file,
- "Ibuf: size %lu, free list len %lu, seg size %lu,\n"
- "%lu inserts, %lu merged recs, %lu merges\n",
- (ulong) data->size,
- (ulong) data->free_list_len,
- (ulong) data->seg_size,
- (ulong) data->n_inserts,
- (ulong) data->n_merged_recs,
- (ulong) data->n_merges);
-#ifdef UNIV_IBUF_DEBUG
- for (i = 0; i < IBUF_COUNT_N_PAGES; i++) {
- if (ibuf_count_get(data->space, i) > 0) {
-
- fprintf(stderr,
- "Ibuf count for page %lu is %lu\n",
- (ulong) i,
- (ulong)
- ibuf_count_get(data->space, i));
- }
- }
-#endif
- data = UT_LIST_GET_NEXT(data_list, data);
- }
-
- mutex_exit(&ibuf_mutex);
-}
diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h
deleted file mode 100644
index 1573de7e818..00000000000
--- a/storage/innobase/include/btr0btr.h
+++ /dev/null
@@ -1,451 +0,0 @@
-/******************************************************
-The B-tree
-
-(c) 1994-1996 Innobase Oy
-
-Created 6/2/1994 Heikki Tuuri
-*******************************************************/
-
-#ifndef btr0btr_h
-#define btr0btr_h
-
-#include "univ.i"
-
-#include "dict0dict.h"
-#include "data0data.h"
-#include "page0cur.h"
-#include "rem0rec.h"
-#include "mtr0mtr.h"
-#include "btr0types.h"
-
-/* Maximum record size which can be stored on a page, without using the
-special big record storage structure */
-
-#define BTR_PAGE_MAX_REC_SIZE (UNIV_PAGE_SIZE / 2 - 200)
-
-/* Maximum depth of a B-tree in InnoDB. Note that this isn't a maximum as
-such; none of the tree operations avoid producing trees bigger than this. It
-is instead a "max depth that other code must work with", useful for e.g.
-fixed-size arrays that must store some information about each level in a
-tree. In other words: if a B-tree with bigger depth than this is
-encountered, it is not acceptable for it to lead to mysterious memory
-corruption, but it is acceptable for the program to die with a clear assert
-failure. */
-#define BTR_MAX_LEVELS 100
-
-/* Latching modes for btr_cur_search_to_nth_level(). */
-#define BTR_SEARCH_LEAF RW_S_LATCH
-#define BTR_MODIFY_LEAF RW_X_LATCH
-#define BTR_NO_LATCHES RW_NO_LATCH
-#define BTR_MODIFY_TREE 33
-#define BTR_CONT_MODIFY_TREE 34
-#define BTR_SEARCH_PREV 35
-#define BTR_MODIFY_PREV 36
-
-/* If this is ORed to the latch mode, it means that the search tuple will be
-inserted to the index, at the searched position */
-#define BTR_INSERT 512
-
-/* This flag ORed to latch mode says that we do the search in query
-optimization */
-#define BTR_ESTIMATE 1024
-
-/* This flag ORed to latch mode says that we can ignore possible
-UNIQUE definition on secondary indexes when we decide if we can use the
-insert buffer to speed up inserts */
-#define BTR_IGNORE_SEC_UNIQUE 2048
-
-/******************************************************************
-Gets the root node of a tree and x-latches it. */
-
-page_t*
-btr_root_get(
-/*=========*/
- /* out: root page, x-latched */
- dict_index_t* index, /* in: index tree */
- mtr_t* mtr); /* in: mtr */
-/******************************************************************
-Gets a buffer page and declares its latching order level. */
-UNIV_INLINE
-page_t*
-btr_page_get(
-/*=========*/
- ulint space, /* in: space id */
- ulint page_no, /* in: page number */
- ulint mode, /* in: latch mode */
- mtr_t* mtr); /* in: mtr */
-/******************************************************************
-Gets the index id field of a page. */
-UNIV_INLINE
-dulint
-btr_page_get_index_id(
-/*==================*/
- /* out: index id */
- page_t* page); /* in: index page */
-/************************************************************
-Gets the node level field in an index page. */
-UNIV_INLINE
-ulint
-btr_page_get_level_low(
-/*===================*/
- /* out: level, leaf level == 0 */
- page_t* page); /* in: index page */
-/************************************************************
-Gets the node level field in an index page. */
-UNIV_INLINE
-ulint
-btr_page_get_level(
-/*===============*/
- /* out: level, leaf level == 0 */
- page_t* page, /* in: index page */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************
-Gets the next index page number. */
-UNIV_INLINE
-ulint
-btr_page_get_next(
-/*==============*/
- /* out: next page number */
- page_t* page, /* in: index page */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************
-Gets the previous index page number. */
-UNIV_INLINE
-ulint
-btr_page_get_prev(
-/*==============*/
- /* out: prev page number */
- page_t* page, /* in: index page */
- mtr_t* mtr); /* in: mini-transaction handle */
-/*****************************************************************
-Gets pointer to the previous user record in the tree. It is assumed
-that the caller has appropriate latches on the page and its neighbor. */
-
-rec_t*
-btr_get_prev_user_rec(
-/*==================*/
- /* out: previous user record, NULL if there is none */
- rec_t* rec, /* in: record on leaf level */
- mtr_t* mtr); /* in: mtr holding a latch on the page, and if
- needed, also to the previous page */
-/*****************************************************************
-Gets pointer to the next user record in the tree. It is assumed
-that the caller has appropriate latches on the page and its neighbor. */
-
-rec_t*
-btr_get_next_user_rec(
-/*==================*/
- /* out: next user record, NULL if there is none */
- rec_t* rec, /* in: record on leaf level */
- mtr_t* mtr); /* in: mtr holding a latch on the page, and if
- needed, also to the next page */
-/******************************************************************
-Releases the latch on a leaf page and bufferunfixes it. */
-UNIV_INLINE
-void
-btr_leaf_page_release(
-/*==================*/
- page_t* page, /* in: page */
- ulint latch_mode, /* in: BTR_SEARCH_LEAF or BTR_MODIFY_LEAF */
- mtr_t* mtr); /* in: mtr */
-/******************************************************************
-Gets the child node file address in a node pointer. */
-UNIV_INLINE
-ulint
-btr_node_ptr_get_child_page_no(
-/*===========================*/
- /* out: child node address */
- rec_t* rec, /* in: node pointer record */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-/****************************************************************
-Creates the root node for a new index tree. */
-
-ulint
-btr_create(
-/*=======*/
- /* out: page number of the created root, FIL_NULL if
- did not succeed */
- ulint type, /* in: type of the index */
- ulint space, /* in: space where created */
- dulint index_id,/* in: index id */
- ulint comp, /* in: nonzero=compact page format */
- mtr_t* mtr); /* in: mini-transaction handle */
-/****************************************************************
-Frees a B-tree except the root page, which MUST be freed after this
-by calling btr_free_root. */
-
-void
-btr_free_but_not_root(
-/*==================*/
- ulint space, /* in: space where created */
- ulint root_page_no); /* in: root page number */
-/****************************************************************
-Frees the B-tree root page. Other tree MUST already have been freed. */
-
-void
-btr_free_root(
-/*==========*/
- ulint space, /* in: space where created */
- ulint root_page_no, /* in: root page number */
- mtr_t* mtr); /* in: a mini-transaction which has already
- been started */
-/*****************************************************************
-Makes tree one level higher by splitting the root, and inserts
-the tuple. It is assumed that mtr contains an x-latch on the tree.
-NOTE that the operation of this function must always succeed,
-we cannot reverse it: therefore enough free disk space must be
-guaranteed to be available before this function is called. */
-
-rec_t*
-btr_root_raise_and_insert(
-/*======================*/
- /* out: inserted record */
- btr_cur_t* cursor, /* in: cursor at which to insert: must be
- on the root page; when the function returns,
- the cursor is positioned on the predecessor
- of the inserted record */
- dtuple_t* tuple, /* in: tuple to insert */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
-Reorganizes an index page. */
-
-void
-btr_page_reorganize(
-/*================*/
- page_t* page, /* in: page to be reorganized */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
-Decides if the page should be split at the convergence point of
-inserts converging to left. */
-
-ibool
-btr_page_get_split_rec_to_left(
-/*===========================*/
- /* out: TRUE if split recommended */
- btr_cur_t* cursor, /* in: cursor at which to insert */
- rec_t** split_rec);/* out: if split recommended,
- the first record on upper half page,
- or NULL if tuple should be first */
-/*****************************************************************
-Decides if the page should be split at the convergence point of
-inserts converging to right. */
-
-ibool
-btr_page_get_split_rec_to_right(
-/*============================*/
- /* out: TRUE if split recommended */
- btr_cur_t* cursor, /* in: cursor at which to insert */
- rec_t** split_rec);/* out: if split recommended,
- the first record on upper half page,
- or NULL if tuple should be first */
-/*****************************************************************
-Splits an index page to halves and inserts the tuple. It is assumed
-that mtr holds an x-latch to the index tree. NOTE: the tree x-latch
-is released within this function! NOTE that the operation of this
-function must always succeed, we cannot reverse it: therefore
-enough free disk space must be guaranteed to be available before
-this function is called. */
-
-rec_t*
-btr_page_split_and_insert(
-/*======================*/
- /* out: inserted record; NOTE: the tree
- x-latch is released! NOTE: 2 free disk
- pages must be available! */
- btr_cur_t* cursor, /* in: cursor at which to insert; when the
- function returns, the cursor is positioned
- on the predecessor of the inserted record */
- dtuple_t* tuple, /* in: tuple to insert */
- mtr_t* mtr); /* in: mtr */
-/***********************************************************
-Inserts a data tuple to a tree on a non-leaf level. It is assumed
-that mtr holds an x-latch on the tree. */
-
-void
-btr_insert_on_non_leaf_level(
-/*=========================*/
- dict_index_t* index, /* in: index */
- ulint level, /* in: level, must be > 0 */
- dtuple_t* tuple, /* in: the record to be inserted */
- mtr_t* mtr); /* in: mtr */
-/********************************************************************
-Sets a record as the predefined minimum record. */
-
-void
-btr_set_min_rec_mark(
-/*=================*/
- rec_t* rec, /* in: record */
- ulint comp, /* in: nonzero=compact page format */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
-Deletes on the upper level the node pointer to a page. */
-
-void
-btr_node_ptr_delete(
-/*================*/
- dict_index_t* index, /* in: index tree */
- page_t* page, /* in: page whose node pointer is deleted */
- mtr_t* mtr); /* in: mtr */
-#ifdef UNIV_DEBUG
-/****************************************************************
-Checks that the node pointer to a page is appropriate. */
-
-ibool
-btr_check_node_ptr(
-/*===============*/
- /* out: TRUE */
- dict_index_t* index, /* in: index tree */
- page_t* page, /* in: index page */
- mtr_t* mtr); /* in: mtr */
-#endif /* UNIV_DEBUG */
-/*****************************************************************
-Tries to merge the page first to the left immediate brother if such a
-brother exists, and the node pointers to the current page and to the
-brother reside on the same page. If the left brother does not satisfy these
-conditions, looks at the right brother. If the page is the only one on that
-level lifts the records of the page to the father page, thus reducing the
-tree height. It is assumed that mtr holds an x-latch on the tree and on the
-page. If cursor is on the leaf level, mtr must also hold x-latches to
-the brothers, if they exist. NOTE: it is assumed that the caller has reserved
-enough free extents so that the compression will always succeed if done! */
-void
-btr_compress(
-/*=========*/
- btr_cur_t* cursor, /* in: cursor on the page to merge or lift;
- the page must not be empty: in record delete
- use btr_discard_page if the page would become
- empty */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
-Discards a page from a B-tree. This is used to remove the last record from
-a B-tree page: the whole page must be removed at the same time. This cannot
-be used for the root page, which is allowed to be empty. */
-
-void
-btr_discard_page(
-/*=============*/
- btr_cur_t* cursor, /* in: cursor on the page to discard: not on
- the root page */
- mtr_t* mtr); /* in: mtr */
-/********************************************************************
-Parses the redo log record for setting an index record as the predefined
-minimum record. */
-
-byte*
-btr_parse_set_min_rec_mark(
-/*=======================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- ulint comp, /* in: nonzero=compact page format */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr); /* in: mtr or NULL */
-/***************************************************************
-Parses a redo log record of reorganizing a page. */
-
-byte*
-btr_parse_page_reorganize(
-/*======================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- dict_index_t* index, /* in: record descriptor */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr); /* in: mtr or NULL */
-/******************************************************************
-Gets the number of pages in a B-tree. */
-
-ulint
-btr_get_size(
-/*=========*/
- /* out: number of pages */
- dict_index_t* index, /* in: index */
- ulint flag); /* in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
-/******************************************************************
-Allocates a new file page to be used in an index tree. NOTE: we assume
-that the caller has made the reservation for free extents! */
-
-page_t*
-btr_page_alloc(
-/*===========*/
- /* out: new allocated page, x-latched;
- NULL if out of space */
- dict_index_t* index, /* in: index tree */
- ulint hint_page_no, /* in: hint of a good page */
- byte file_direction, /* in: direction where a possible
- page split is made */
- ulint level, /* in: level where the page is placed
- in the tree */
- mtr_t* mtr); /* in: mtr */
-/******************************************************************
-Frees a file page used in an index tree. NOTE: cannot free field external
-storage pages because the page must contain info on its level. */
-
-void
-btr_page_free(
-/*==========*/
- dict_index_t* index, /* in: index tree */
- page_t* page, /* in: page to be freed, x-latched */
- mtr_t* mtr); /* in: mtr */
-/******************************************************************
-Frees a file page used in an index tree. Can be used also to BLOB
-external storage pages, because the page level 0 can be given as an
-argument. */
-
-void
-btr_page_free_low(
-/*==============*/
- dict_index_t* index, /* in: index tree */
- page_t* page, /* in: page to be freed, x-latched */
- ulint level, /* in: page level */
- mtr_t* mtr); /* in: mtr */
-#ifdef UNIV_BTR_PRINT
-/*****************************************************************
-Prints size info of a B-tree. */
-
-void
-btr_print_size(
-/*===========*/
- dict_index_t* index); /* in: index tree */
-/******************************************************************
-Prints directories and other info of all nodes in the index. */
-
-void
-btr_print_index(
-/*============*/
- dict_index_t* index, /* in: index */
- ulint width); /* in: print this many entries from start
- and end */
-#endif /* UNIV_BTR_PRINT */
-/****************************************************************
-Checks the size and number of fields in a record based on the definition of
-the index. */
-
-ibool
-btr_index_rec_validate(
-/*===================*/
- /* out: TRUE if ok */
- rec_t* rec, /* in: index record */
- dict_index_t* index, /* in: index */
- ibool dump_on_error); /* in: TRUE if the function
- should print hex dump of record
- and page on error */
-/******************************************************************
-Checks the consistency of an index tree. */
-
-ibool
-btr_validate_index(
-/*===============*/
- /* out: TRUE if ok */
- dict_index_t* index, /* in: index */
- trx_t* trx); /* in: transaction or NULL */
-
-#define BTR_N_LEAF_PAGES 1
-#define BTR_TOTAL_SIZE 2
-
-#ifndef UNIV_NONINL
-#include "btr0btr.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/btr0btr.ic b/storage/innobase/include/btr0btr.ic
deleted file mode 100644
index 4a88f58b318..00000000000
--- a/storage/innobase/include/btr0btr.ic
+++ /dev/null
@@ -1,234 +0,0 @@
-/******************************************************
-The B-tree
-
-(c) 1994-1996 Innobase Oy
-
-Created 6/2/1994 Heikki Tuuri
-*******************************************************/
-
-#include "mach0data.h"
-#include "mtr0mtr.h"
-#include "mtr0log.h"
-
-#define BTR_MAX_NODE_LEVEL 50 /* used in debug checking */
-
-/******************************************************************
-Gets a buffer page and declares its latching order level. */
-UNIV_INLINE
-page_t*
-btr_page_get(
-/*=========*/
- ulint space, /* in: space id */
- ulint page_no, /* in: page number */
- ulint mode, /* in: latch mode */
- mtr_t* mtr) /* in: mtr */
-{
- page_t* page;
-
- page = buf_page_get(space, page_no, mode, mtr);
-#ifdef UNIV_SYNC_DEBUG
- if (mode != RW_NO_LATCH) {
-
- buf_page_dbg_add_level(page, SYNC_TREE_NODE);
- }
-#endif
- return(page);
-}
-
-/******************************************************************
-Sets the index id field of a page. */
-UNIV_INLINE
-void
-btr_page_set_index_id(
-/*==================*/
- page_t* page, /* in: page to be created */
- dulint id, /* in: index id */
- mtr_t* mtr) /* in: mtr */
-{
- mlog_write_dulint(page + PAGE_HEADER + PAGE_INDEX_ID, id, mtr);
-}
-
-/******************************************************************
-Gets the index id field of a page. */
-UNIV_INLINE
-dulint
-btr_page_get_index_id(
-/*==================*/
- /* out: index id */
- page_t* page) /* in: index page */
-{
- return(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID));
-}
-
-/************************************************************
-Gets the node level field in an index page. */
-UNIV_INLINE
-ulint
-btr_page_get_level_low(
-/*===================*/
- /* out: level, leaf level == 0 */
- page_t* page) /* in: index page */
-{
- ulint level;
-
- ut_ad(page);
-
- level = mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL);
-
- ut_ad(level <= BTR_MAX_NODE_LEVEL);
-
- return(level);
-}
-
-/************************************************************
-Gets the node level field in an index page. */
-UNIV_INLINE
-ulint
-btr_page_get_level(
-/*===============*/
- /* out: level, leaf level == 0 */
- page_t* page, /* in: index page */
- mtr_t* mtr __attribute__((unused))) /* in: mini-transaction handle */
-{
- ut_ad(page && mtr);
-
- return(btr_page_get_level_low(page));
-}
-
-/************************************************************
-Sets the node level field in an index page. */
-UNIV_INLINE
-void
-btr_page_set_level(
-/*===============*/
- page_t* page, /* in: index page */
- ulint level, /* in: level, leaf level == 0 */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- ut_ad(page && mtr);
- ut_ad(level <= BTR_MAX_NODE_LEVEL);
-
- mlog_write_ulint(page + PAGE_HEADER + PAGE_LEVEL, level,
- MLOG_2BYTES, mtr);
-}
-
-/************************************************************
-Gets the next index page number. */
-UNIV_INLINE
-ulint
-btr_page_get_next(
-/*==============*/
- /* out: next page number */
- page_t* page, /* in: index page */
- mtr_t* mtr __attribute__((unused))) /* in: mini-transaction handle */
-{
- ut_ad(page && mtr);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX)
- || mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_S_FIX));
-
- return(mach_read_from_4(page + FIL_PAGE_NEXT));
-}
-
-/************************************************************
-Sets the next index page field. */
-UNIV_INLINE
-void
-btr_page_set_next(
-/*==============*/
- page_t* page, /* in: index page */
- ulint next, /* in: next page number */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- ut_ad(page && mtr);
-
- mlog_write_ulint(page + FIL_PAGE_NEXT, next, MLOG_4BYTES, mtr);
-}
-
-/************************************************************
-Gets the previous index page number. */
-UNIV_INLINE
-ulint
-btr_page_get_prev(
-/*==============*/
- /* out: prev page number */
- page_t* page, /* in: index page */
- mtr_t* mtr __attribute__((unused))) /* in: mini-transaction handle */
-{
- ut_ad(page && mtr);
-
- return(mach_read_from_4(page + FIL_PAGE_PREV));
-}
-
-/************************************************************
-Sets the previous index page field. */
-UNIV_INLINE
-void
-btr_page_set_prev(
-/*==============*/
- page_t* page, /* in: index page */
- ulint prev, /* in: previous page number */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- ut_ad(page && mtr);
-
- mlog_write_ulint(page + FIL_PAGE_PREV, prev, MLOG_4BYTES, mtr);
-}
-
-/******************************************************************
-Gets the child node file address in a node pointer. */
-UNIV_INLINE
-ulint
-btr_node_ptr_get_child_page_no(
-/*===========================*/
- /* out: child node address */
- rec_t* rec, /* in: node pointer record */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
-{
- byte* field;
- ulint len;
- ulint page_no;
-
- ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec));
-
- /* The child address is in the last field */
- field = rec_get_nth_field(rec, offsets,
- rec_offs_n_fields(offsets) - 1, &len);
-
- ut_ad(len == 4);
-
- page_no = mach_read_from_4(field);
-
- if (UNIV_UNLIKELY(page_no == 0)) {
- fprintf(stderr,
- "InnoDB: a nonsensical page number 0"
- " in a node ptr record at offset %lu\n",
- (ulong) page_offset(rec));
- buf_page_print(buf_frame_align(rec));
- }
-
- return(page_no);
-}
-
-/******************************************************************
-Releases the latches on a leaf page and bufferunfixes it. */
-UNIV_INLINE
-void
-btr_leaf_page_release(
-/*==================*/
- page_t* page, /* in: page */
- ulint latch_mode, /* in: BTR_SEARCH_LEAF or BTR_MODIFY_LEAF */
- mtr_t* mtr) /* in: mtr */
-{
- ut_ad(!mtr_memo_contains(mtr, buf_block_align(page),
- MTR_MEMO_MODIFY));
- if (latch_mode == BTR_SEARCH_LEAF) {
- mtr_memo_release(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_S_FIX);
- } else {
- ut_ad(latch_mode == BTR_MODIFY_LEAF);
- mtr_memo_release(mtr, buf_block_align(page),
- MTR_MEMO_PAGE_X_FIX);
- }
-}
diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h
deleted file mode 100644
index 213dcb7f568..00000000000
--- a/storage/innobase/include/btr0cur.h
+++ /dev/null
@@ -1,706 +0,0 @@
-/******************************************************
-The index tree cursor
-
-(c) 1994-1996 Innobase Oy
-
-Created 10/16/1994 Heikki Tuuri
-*******************************************************/
-
-#ifndef btr0cur_h
-#define btr0cur_h
-
-#include "univ.i"
-#include "dict0dict.h"
-#include "data0data.h"
-#include "page0cur.h"
-#include "btr0types.h"
-#include "que0types.h"
-#include "row0types.h"
-#include "ha0ha.h"
-
-/* Mode flags for btr_cur operations; these can be ORed */
-#define BTR_NO_UNDO_LOG_FLAG 1 /* do no undo logging */
-#define BTR_NO_LOCKING_FLAG 2 /* do no record lock checking */
-#define BTR_KEEP_SYS_FLAG 4 /* sys fields will be found from the
- update vector or inserted entry */
-
-#define BTR_CUR_ADAPT
-#define BTR_CUR_HASH_ADAPT
-
-/*************************************************************
-Returns the page cursor component of a tree cursor. */
-UNIV_INLINE
-page_cur_t*
-btr_cur_get_page_cur(
-/*=================*/
- /* out: pointer to page cursor component */
- btr_cur_t* cursor);/* in: tree cursor */
-/*************************************************************
-Returns the record pointer of a tree cursor. */
-UNIV_INLINE
-rec_t*
-btr_cur_get_rec(
-/*============*/
- /* out: pointer to record */
- btr_cur_t* cursor);/* in: tree cursor */
-/*************************************************************
-Invalidates a tree cursor by setting record pointer to NULL. */
-UNIV_INLINE
-void
-btr_cur_invalidate(
-/*===============*/
- btr_cur_t* cursor);/* in: tree cursor */
-/*************************************************************
-Returns the page of a tree cursor. */
-UNIV_INLINE
-page_t*
-btr_cur_get_page(
-/*=============*/
- /* out: pointer to page */
- btr_cur_t* cursor);/* in: tree cursor */
-/*************************************************************
-Returns the index of a cursor. */
-UNIV_INLINE
-dict_index_t*
-btr_cur_get_index(
-/*==============*/
- /* out: index */
- btr_cur_t* cursor);/* in: B-tree cursor */
-/*************************************************************
-Positions a tree cursor at a given record. */
-UNIV_INLINE
-void
-btr_cur_position(
-/*=============*/
- dict_index_t* index, /* in: index */
- rec_t* rec, /* in: record in tree */
- btr_cur_t* cursor);/* in: cursor */
-/************************************************************************
-Searches an index tree and positions a tree cursor on a given level.
-NOTE: n_fields_cmp in tuple must be set so that it cannot be compared
-to node pointer page number fields on the upper levels of the tree!
-Note that if mode is PAGE_CUR_LE, which is used in inserts, then
-cursor->up_match and cursor->low_match both will have sensible values.
-If mode is PAGE_CUR_GE, then up_match will a have a sensible value. */
-
-void
-btr_cur_search_to_nth_level(
-/*========================*/
- dict_index_t* index, /* in: index */
- ulint level, /* in: the tree level of search */
- dtuple_t* tuple, /* in: data tuple; NOTE: n_fields_cmp in
- tuple must be set so that it cannot get
- compared to the node ptr page number field! */
- ulint mode, /* in: PAGE_CUR_L, ...;
- NOTE that if the search is made using a unique
- prefix of a record, mode should be PAGE_CUR_LE,
- not PAGE_CUR_GE, as the latter may end up on
- the previous page of the record! Inserts
- should always be made using PAGE_CUR_LE to
- search the position! */
- ulint latch_mode, /* in: BTR_SEARCH_LEAF, ..., ORed with
- BTR_INSERT and BTR_ESTIMATE;
- cursor->left_page is used to store a pointer
- to the left neighbor page, in the cases
- BTR_SEARCH_PREV and BTR_MODIFY_PREV;
- NOTE that if has_search_latch
- is != 0, we maybe do not have a latch set
- on the cursor page, we assume
- the caller uses his search latch
- to protect the record! */
- btr_cur_t* cursor, /* in/out: tree cursor; the cursor page is
- s- or x-latched, but see also above! */
- ulint has_search_latch,/* in: latch mode the caller
- currently has on btr_search_latch:
- RW_S_LATCH, or 0 */
- mtr_t* mtr); /* in: mtr */
-/*********************************************************************
-Opens a cursor at either end of an index. */
-
-void
-btr_cur_open_at_index_side(
-/*=======================*/
- ibool from_left, /* in: TRUE if open to the low end,
- FALSE if to the high end */
- dict_index_t* index, /* in: index */
- ulint latch_mode, /* in: latch mode */
- btr_cur_t* cursor, /* in: cursor */
- mtr_t* mtr); /* in: mtr */
-/**************************************************************************
-Positions a cursor at a randomly chosen position within a B-tree. */
-
-void
-btr_cur_open_at_rnd_pos(
-/*====================*/
- dict_index_t* index, /* in: index */
- ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
- btr_cur_t* cursor, /* in/out: B-tree cursor */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
-Tries to perform an insert to a page in an index tree, next to cursor.
-It is assumed that mtr holds an x-latch on the page. The operation does
-not succeed if there is too little space on the page. If there is just
-one record on the page, the insert will always succeed; this is to
-prevent trying to split a page with just one record. */
-
-ulint
-btr_cur_optimistic_insert(
-/*======================*/
- /* out: DB_SUCCESS, DB_WAIT_LOCK,
- DB_FAIL, or error number */
- ulint flags, /* in: undo logging and locking flags: if not
- zero, the parameters index and thr should be
- specified */
- btr_cur_t* cursor, /* in: cursor on page after which to insert;
- cursor stays valid */
- dtuple_t* entry, /* in: entry to insert */
- rec_t** rec, /* out: pointer to inserted record if
- succeed */
- big_rec_t** big_rec,/* out: big rec vector whose fields have to
- be stored externally by the caller, or
- NULL */
- que_thr_t* thr, /* in: query thread or NULL */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
-Performs an insert on a page of an index tree. It is assumed that mtr
-holds an x-latch on the tree and on the cursor page. If the insert is
-made on the leaf level, to avoid deadlocks, mtr must also own x-latches
-to brothers of page, if those brothers exist. */
-
-ulint
-btr_cur_pessimistic_insert(
-/*=======================*/
- /* out: DB_SUCCESS or error number */
- ulint flags, /* in: undo logging and locking flags: if not
- zero, the parameter thr should be
- specified; if no undo logging is specified,
- then the caller must have reserved enough
- free extents in the file space so that the
- insertion will certainly succeed */
- btr_cur_t* cursor, /* in: cursor after which to insert;
- cursor stays valid */
- dtuple_t* entry, /* in: entry to insert */
- rec_t** rec, /* out: pointer to inserted record if
- succeed */
- big_rec_t** big_rec,/* out: big rec vector whose fields have to
- be stored externally by the caller, or
- NULL */
- que_thr_t* thr, /* in: query thread or NULL */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
-Updates a record when the update causes no size changes in its fields. */
-
-ulint
-btr_cur_update_in_place(
-/*====================*/
- /* out: DB_SUCCESS or error number */
- ulint flags, /* in: undo logging and locking flags */
- btr_cur_t* cursor, /* in: cursor on the record to update;
- cursor stays valid and positioned on the
- same record */
- upd_t* update, /* in: update vector */
- ulint cmpl_info,/* in: compiler info on secondary index
- updates */
- que_thr_t* thr, /* in: query thread */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
-Tries to update a record on a page in an index tree. It is assumed that mtr
-holds an x-latch on the page. The operation does not succeed if there is too
-little space on the page or if the update would result in too empty a page,
-so that tree compression is recommended. */
-
-ulint
-btr_cur_optimistic_update(
-/*======================*/
- /* out: DB_SUCCESS, or DB_OVERFLOW if the
- updated record does not fit, DB_UNDERFLOW
- if the page would become too empty */
- ulint flags, /* in: undo logging and locking flags */
- btr_cur_t* cursor, /* in: cursor on the record to update;
- cursor stays valid and positioned on the
- same record */
- upd_t* update, /* in: update vector; this must also
- contain trx id and roll ptr fields */
- ulint cmpl_info,/* in: compiler info on secondary index
- updates */
- que_thr_t* thr, /* in: query thread */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
-Performs an update of a record on a page of a tree. It is assumed
-that mtr holds an x-latch on the tree and on the cursor page. If the
-update is made on the leaf level, to avoid deadlocks, mtr must also
-own x-latches to brothers of page, if those brothers exist. */
-
-ulint
-btr_cur_pessimistic_update(
-/*=======================*/
- /* out: DB_SUCCESS or error code */
- ulint flags, /* in: undo logging, locking, and rollback
- flags */
- btr_cur_t* cursor, /* in: cursor on the record to update */
- big_rec_t** big_rec,/* out: big rec vector whose fields have to
- be stored externally by the caller, or NULL */
- upd_t* update, /* in: update vector; this is allowed also
- contain trx id and roll ptr fields, but
- the values in update vector have no effect */
- ulint cmpl_info,/* in: compiler info on secondary index
- updates */
- que_thr_t* thr, /* in: query thread */
- mtr_t* mtr); /* in: mtr */
-/***************************************************************
-Marks a clustered index record deleted. Writes an undo log record to
-undo log on this delete marking. Writes in the trx id field the id
-of the deleting transaction, and in the roll ptr field pointer to the
-undo log record created. */
-
-ulint
-btr_cur_del_mark_set_clust_rec(
-/*===========================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT, or error
- number */
- ulint flags, /* in: undo logging and locking flags */
- btr_cur_t* cursor, /* in: cursor */
- ibool val, /* in: value to set */
- que_thr_t* thr, /* in: query thread */
- mtr_t* mtr); /* in: mtr */
-/***************************************************************
-Sets a secondary index record delete mark to TRUE or FALSE. */
-
-ulint
-btr_cur_del_mark_set_sec_rec(
-/*=========================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT, or error
- number */
- ulint flags, /* in: locking flag */
- btr_cur_t* cursor, /* in: cursor */
- ibool val, /* in: value to set */
- que_thr_t* thr, /* in: query thread */
- mtr_t* mtr); /* in: mtr */
-/***************************************************************
-Sets a secondary index record delete mark to FALSE. This function is
-only used by the insert buffer insert merge mechanism. */
-
-void
-btr_cur_del_unmark_for_ibuf(
-/*========================*/
- rec_t* rec, /* in: record to delete unmark */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
-Tries to compress a page of the tree on the leaf level. It is assumed
-that mtr holds an x-latch on the tree and on the cursor page. To avoid
-deadlocks, mtr must also own x-latches to brothers of page, if those
-brothers exist. NOTE: it is assumed that the caller has reserved enough
-free extents so that the compression will always succeed if done! */
-
-void
-btr_cur_compress(
-/*=============*/
- btr_cur_t* cursor, /* in: cursor on the page to compress;
- cursor does not stay valid */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
-Tries to compress a page of the tree if it seems useful. It is assumed
-that mtr holds an x-latch on the tree and on the cursor page. To avoid
-deadlocks, mtr must also own x-latches to brothers of page, if those
-brothers exist. NOTE: it is assumed that the caller has reserved enough
-free extents so that the compression will always succeed if done! */
-
-ibool
-btr_cur_compress_if_useful(
-/*=======================*/
- /* out: TRUE if compression occurred */
- btr_cur_t* cursor, /* in: cursor on the page to compress;
- cursor does not stay valid if compression
- occurs */
- mtr_t* mtr); /* in: mtr */
-/***********************************************************
-Removes the record on which the tree cursor is positioned. It is assumed
-that the mtr has an x-latch on the page where the cursor is positioned,
-but no latch on the whole tree. */
-
-ibool
-btr_cur_optimistic_delete(
-/*======================*/
- /* out: TRUE if success, i.e., the page
- did not become too empty */
- btr_cur_t* cursor, /* in: cursor on the record to delete;
- cursor stays valid: if deletion succeeds,
- on function exit it points to the successor
- of the deleted record */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
-Removes the record on which the tree cursor is positioned. Tries
-to compress the page if its fillfactor drops below a threshold
-or if it is the only page on the level. It is assumed that mtr holds
-an x-latch on the tree and on the cursor page. To avoid deadlocks,
-mtr must also own x-latches to brothers of page, if those brothers
-exist. */
-
-ibool
-btr_cur_pessimistic_delete(
-/*=======================*/
- /* out: TRUE if compression occurred */
- ulint* err, /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
- the latter may occur because we may have
- to update node pointers on upper levels,
- and in the case of variable length keys
- these may actually grow in size */
- ibool has_reserved_extents, /* in: TRUE if the
- caller has already reserved enough free
- extents so that he knows that the operation
- will succeed */
- btr_cur_t* cursor, /* in: cursor on the record to delete;
- if compression does not occur, the cursor
- stays valid: it points to successor of
- deleted record on function exit */
- ibool in_rollback,/* in: TRUE if called in rollback */
- mtr_t* mtr); /* in: mtr */
-/***************************************************************
-Parses a redo log record of updating a record in-place. */
-
-byte*
-btr_cur_parse_update_in_place(
-/*==========================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page, /* in: page or NULL */
- dict_index_t* index); /* in: index corresponding to page */
-/********************************************************************
-Parses the redo log record for delete marking or unmarking of a clustered
-index record. */
-
-byte*
-btr_cur_parse_del_mark_set_clust_rec(
-/*=================================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- dict_index_t* index, /* in: index corresponding to page */
- page_t* page); /* in: page or NULL */
-/********************************************************************
-Parses the redo log record for delete marking or unmarking of a secondary
-index record. */
-
-byte*
-btr_cur_parse_del_mark_set_sec_rec(
-/*===============================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page); /* in: page or NULL */
-/***********************************************************************
-Estimates the number of rows in a given index range. */
-
-ib_longlong
-btr_estimate_n_rows_in_range(
-/*=========================*/
- /* out: estimated number of rows */
- dict_index_t* index, /* in: index */
- dtuple_t* tuple1, /* in: range start, may also be empty tuple */
- ulint mode1, /* in: search mode for range start */
- dtuple_t* tuple2, /* in: range end, may also be empty tuple */
- ulint mode2); /* in: search mode for range end */
-/***********************************************************************
-Estimates the number of different key values in a given index, for
-each n-column prefix of the index where n <= dict_index_get_n_unique(index).
-The estimates are stored in the array index->stat_n_diff_key_vals. */
-
-void
-btr_estimate_number_of_different_key_vals(
-/*======================================*/
- dict_index_t* index); /* in: index */
-/***********************************************************************
-Marks not updated extern fields as not-owned by this record. The ownership
-is transferred to the updated record which is inserted elsewhere in the
-index tree. In purge only the owner of externally stored field is allowed
-to free the field. */
-
-void
-btr_cur_mark_extern_inherited_fields(
-/*=================================*/
- rec_t* rec, /* in: record in a clustered index */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- upd_t* update, /* in: update vector */
- mtr_t* mtr); /* in: mtr */
-/***********************************************************************
-The complement of the previous function: in an update entry may inherit
-some externally stored fields from a record. We must mark them as inherited
-in entry, so that they are not freed in a rollback. */
-
-void
-btr_cur_mark_dtuple_inherited_extern(
-/*=================================*/
- dtuple_t* entry, /* in: updated entry to be inserted to
- clustered index */
- ulint* ext_vec, /* in: array of extern fields in the
- original record */
- ulint n_ext_vec, /* in: number of elements in ext_vec */
- upd_t* update); /* in: update vector */
-/***********************************************************************
-Marks all extern fields in a dtuple as owned by the record. */
-
-void
-btr_cur_unmark_dtuple_extern_fields(
-/*================================*/
- dtuple_t* entry, /* in: clustered index entry */
- ulint* ext_vec, /* in: array of numbers of fields
- which have been stored externally */
- ulint n_ext_vec); /* in: number of elements in ext_vec */
-/***********************************************************************
-Stores the fields in big_rec_vec to the tablespace and puts pointers to
-them in rec. The fields are stored on pages allocated from leaf node
-file segment of the index tree. */
-
-ulint
-btr_store_big_rec_extern_fields(
-/*============================*/
- /* out: DB_SUCCESS or error */
- dict_index_t* index, /* in: index of rec; the index tree
- MUST be X-latched */
- rec_t* rec, /* in: record */
- const ulint* offsets, /* in: rec_get_offsets(rec, index);
- the "external storage" flags in offsets
- will not correspond to rec when
- this function returns */
- big_rec_t* big_rec_vec, /* in: vector containing fields
- to be stored externally */
- mtr_t* local_mtr); /* in: mtr containing the latch to
- rec and to the tree */
-/***********************************************************************
-Frees the space in an externally stored field to the file space
-management if the field in data is owned the externally stored field,
-in a rollback we may have the additional condition that the field must
-not be inherited. */
-
-void
-btr_free_externally_stored_field(
-/*=============================*/
- dict_index_t* index, /* in: index of the data, the index
- tree MUST be X-latched; if the tree
- height is 1, then also the root page
- must be X-latched! (this is relevant
- in the case this function is called
- from purge where 'data' is located on
- an undo log page, not an index
- page) */
- byte* data, /* in: internally stored data
- + reference to the externally
- stored part */
- ulint local_len, /* in: length of data */
- ibool do_not_free_inherited,/* in: TRUE if called in a
- rollback and we do not want to free
- inherited fields */
- mtr_t* local_mtr); /* in: mtr containing the latch to
- data an an X-latch to the index
- tree */
-/***************************************************************
-Frees the externally stored fields for a record. */
-
-void
-btr_rec_free_externally_stored_fields(
-/*==================================*/
- dict_index_t* index, /* in: index of the data, the index
- tree MUST be X-latched */
- rec_t* rec, /* in: record */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- ibool do_not_free_inherited,/* in: TRUE if called in a
- rollback and we do not want to free
- inherited fields */
- mtr_t* mtr); /* in: mini-transaction handle which contains
- an X-latch to record page and to the index
- tree */
-/***********************************************************************
-Copies an externally stored field of a record to mem heap. */
-
-byte*
-btr_rec_copy_externally_stored_field(
-/*=================================*/
- /* out: the field copied to heap */
- rec_t* rec, /* in: record */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint no, /* in: field number */
- ulint* len, /* out: length of the field */
- mem_heap_t* heap); /* in: mem heap */
-/***********************************************************************
-Copies an externally stored field of a record to mem heap. Parameter
-data contains a pointer to 'internally' stored part of the field:
-possibly some data, and the reference to the externally stored part in
-the last 20 bytes of data. */
-
-byte*
-btr_copy_externally_stored_field(
-/*=============================*/
- /* out: the whole field copied to heap */
- ulint* len, /* out: length of the whole field */
- byte* data, /* in: 'internally' stored part of the
- field containing also the reference to
- the external part */
- ulint local_len,/* in: length of data */
- mem_heap_t* heap); /* in: mem heap */
-/***********************************************************************
-Stores the positions of the fields marked as extern storage in the update
-vector, and also those fields who are marked as extern storage in rec
-and not mentioned in updated fields. We use this function to remember
-which fields we must mark as extern storage in a record inserted for an
-update. */
-
-ulint
-btr_push_update_extern_fields(
-/*==========================*/
- /* out: number of values stored in ext_vect */
- ulint* ext_vect,/* in: array of ulints, must be preallocated
- to have space for all fields in rec */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- upd_t* update);/* in: update vector or NULL */
-
-
-/*######################################################################*/
-
-/* In the pessimistic delete, if the page data size drops below this
-limit, merging it to a neighbor is tried */
-
-#define BTR_CUR_PAGE_COMPRESS_LIMIT (UNIV_PAGE_SIZE / 2)
-
-/* A slot in the path array. We store here info on a search path down the
-tree. Each slot contains data on a single level of the tree. */
-
-typedef struct btr_path_struct btr_path_t;
-struct btr_path_struct{
- ulint nth_rec; /* index of the record
- where the page cursor stopped on
- this level (index in alphabetical
- order); value ULINT_UNDEFINED
- denotes array end */
- ulint n_recs; /* number of records on the page */
-};
-
-#define BTR_PATH_ARRAY_N_SLOTS 250 /* size of path array (in slots) */
-
-/* The tree cursor: the definition appears here only for the compiler
-to know struct size! */
-
-struct btr_cur_struct {
- dict_index_t* index; /* index where positioned */
- page_cur_t page_cur; /* page cursor */
- page_t* left_page; /* this field is used to store
- a pointer to the left neighbor
- page, in the cases
- BTR_SEARCH_PREV and
- BTR_MODIFY_PREV */
- /*------------------------------*/
- que_thr_t* thr; /* this field is only used when
- btr_cur_search_... is called for an
- index entry insertion: the calling
- query thread is passed here to be
- used in the insert buffer */
- /*------------------------------*/
- /* The following fields are used in btr_cur_search... to pass
- information: */
- ulint flag; /* BTR_CUR_HASH, BTR_CUR_HASH_FAIL,
- BTR_CUR_BINARY, or
- BTR_CUR_INSERT_TO_IBUF */
- ulint tree_height; /* Tree height if the search is done
- for a pessimistic insert or update
- operation */
- ulint up_match; /* If the search mode was PAGE_CUR_LE,
- the number of matched fields to the
- the first user record to the right of
- the cursor record after
- btr_cur_search_...;
- for the mode PAGE_CUR_GE, the matched
- fields to the first user record AT THE
- CURSOR or to the right of it;
- NOTE that the up_match and low_match
- values may exceed the correct values
- for comparison to the adjacent user
- record if that record is on a
- different leaf page! (See the note in
- row_ins_duplicate_key.) */
- ulint up_bytes; /* number of matched bytes to the
- right at the time cursor positioned;
- only used internally in searches: not
- defined after the search */
- ulint low_match; /* if search mode was PAGE_CUR_LE,
- the number of matched fields to the
- first user record AT THE CURSOR or
- to the left of it after
- btr_cur_search_...;
- NOT defined for PAGE_CUR_GE or any
- other search modes; see also the NOTE
- in up_match! */
- ulint low_bytes; /* number of matched bytes to the
- right at the time cursor positioned;
- only used internally in searches: not
- defined after the search */
- ulint n_fields; /* prefix length used in a hash
- search if hash_node != NULL */
- ulint n_bytes; /* hash prefix bytes if hash_node !=
- NULL */
- ulint fold; /* fold value used in the search if
- flag is BTR_CUR_HASH */
- /*------------------------------*/
- btr_path_t* path_arr; /* in estimating the number of
- rows in range, we store in this array
- information of the path through
- the tree */
-};
-
-/* Values for the flag documenting the used search method */
-#define BTR_CUR_HASH 1 /* successful shortcut using the hash
- index */
-#define BTR_CUR_HASH_FAIL 2 /* failure using hash, success using
- binary search: the misleading hash
- reference is stored in the field
- hash_node, and might be necessary to
- update */
-#define BTR_CUR_BINARY 3 /* success using the binary search */
-#define BTR_CUR_INSERT_TO_IBUF 4 /* performed the intended insert to
- the insert buffer */
-
-/* If pessimistic delete fails because of lack of file space,
-there is still a good change of success a little later: try this many times,
-and sleep this many microseconds in between */
-#define BTR_CUR_RETRY_DELETE_N_TIMES 100
-#define BTR_CUR_RETRY_SLEEP_TIME 50000
-
-/* The reference in a field for which data is stored on a different page.
-The reference is at the end of the 'locally' stored part of the field.
-'Locally' means storage in the index record.
-We store locally a long enough prefix of each column so that we can determine
-the ordering parts of each index record without looking into the externally
-stored part. */
-
-/*--------------------------------------*/
-#define BTR_EXTERN_SPACE_ID 0 /* space id where stored */
-#define BTR_EXTERN_PAGE_NO 4 /* page no where stored */
-#define BTR_EXTERN_OFFSET 8 /* offset of BLOB header
- on that page */
-#define BTR_EXTERN_LEN 12 /* 8 bytes containing the
- length of the externally
- stored part of the BLOB.
- The 2 highest bits are
- reserved to the flags below. */
-/*--------------------------------------*/
-#define BTR_EXTERN_FIELD_REF_SIZE 20
-
-/* The highest bit of BTR_EXTERN_LEN (i.e., the highest bit of the byte
-at lowest address) is set to 1 if this field does not 'own' the externally
-stored field; only the owner field is allowed to free the field in purge!
-If the 2nd highest bit is 1 then it means that the externally stored field
-was inherited from an earlier version of the row. In rollback we are not
-allowed to free an inherited external field. */
-
-#define BTR_EXTERN_OWNER_FLAG 128
-#define BTR_EXTERN_INHERITED_FLAG 64
-
-extern ulint btr_cur_n_non_sea;
-extern ulint btr_cur_n_sea;
-extern ulint btr_cur_n_non_sea_old;
-extern ulint btr_cur_n_sea_old;
-
-#ifndef UNIV_NONINL
-#include "btr0cur.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/btr0cur.ic b/storage/innobase/include/btr0cur.ic
deleted file mode 100644
index bd2c46eb734..00000000000
--- a/storage/innobase/include/btr0cur.ic
+++ /dev/null
@@ -1,154 +0,0 @@
-/******************************************************
-The index tree cursor
-
-(c) 1994-1996 Innobase Oy
-
-Created 10/16/1994 Heikki Tuuri
-*******************************************************/
-
-#include "btr0btr.h"
-
-/*************************************************************
-Returns the page cursor component of a tree cursor. */
-UNIV_INLINE
-page_cur_t*
-btr_cur_get_page_cur(
-/*=================*/
- /* out: pointer to page cursor component */
- btr_cur_t* cursor) /* in: tree cursor */
-{
- return(&(cursor->page_cur));
-}
-
-/*************************************************************
-Returns the record pointer of a tree cursor. */
-UNIV_INLINE
-rec_t*
-btr_cur_get_rec(
-/*============*/
- /* out: pointer to record */
- btr_cur_t* cursor) /* in: tree cursor */
-{
- return(page_cur_get_rec(&(cursor->page_cur)));
-}
-
-/*************************************************************
-Invalidates a tree cursor by setting record pointer to NULL. */
-UNIV_INLINE
-void
-btr_cur_invalidate(
-/*===============*/
- btr_cur_t* cursor) /* in: tree cursor */
-{
- page_cur_invalidate(&(cursor->page_cur));
-}
-
-/*************************************************************
-Returns the page of a tree cursor. */
-UNIV_INLINE
-page_t*
-btr_cur_get_page(
-/*=============*/
- /* out: pointer to page */
- btr_cur_t* cursor) /* in: tree cursor */
-{
- return(buf_frame_align(page_cur_get_rec(&(cursor->page_cur))));
-}
-
-/*************************************************************
-Returns the index of a cursor. */
-UNIV_INLINE
-dict_index_t*
-btr_cur_get_index(
-/*==============*/
- /* out: index */
- btr_cur_t* cursor) /* in: B-tree cursor */
-{
- return(cursor->index);
-}
-
-/*************************************************************
-Positions a tree cursor at a given record. */
-UNIV_INLINE
-void
-btr_cur_position(
-/*=============*/
- dict_index_t* index, /* in: index */
- rec_t* rec, /* in: record in tree */
- btr_cur_t* cursor) /* in: cursor */
-{
- page_cur_position(rec, btr_cur_get_page_cur(cursor));
-
- cursor->index = index;
-}
-
-/*************************************************************************
-Checks if compressing an index page where a btr cursor is placed makes
-sense. */
-UNIV_INLINE
-ibool
-btr_cur_compress_recommendation(
-/*============================*/
- /* out: TRUE if compression is recommended */
- btr_cur_t* cursor, /* in: btr cursor */
- mtr_t* mtr) /* in: mtr */
-{
- page_t* page;
-
- ut_ad(mtr_memo_contains(mtr, buf_block_align(btr_cur_get_rec(cursor)),
- MTR_MEMO_PAGE_X_FIX));
-
- page = btr_cur_get_page(cursor);
-
- if ((page_get_data_size(page) < BTR_CUR_PAGE_COMPRESS_LIMIT)
- || ((btr_page_get_next(page, mtr) == FIL_NULL)
- && (btr_page_get_prev(page, mtr) == FIL_NULL))) {
-
- /* The page fillfactor has dropped below a predefined
- minimum value OR the level in the B-tree contains just
- one page: we recommend compression if this is not the
- root page. */
-
- return(dict_index_get_page(cursor->index)
- != buf_frame_get_page_no(page));
- }
-
- return(FALSE);
-}
-
-/*************************************************************************
-Checks if the record on which the cursor is placed can be deleted without
-making tree compression necessary (or, recommended). */
-UNIV_INLINE
-ibool
-btr_cur_can_delete_without_compress(
-/*================================*/
- /* out: TRUE if can be deleted without
- recommended compression */
- btr_cur_t* cursor, /* in: btr cursor */
- ulint rec_size,/* in: rec_get_size(btr_cur_get_rec(cursor))*/
- mtr_t* mtr) /* in: mtr */
-{
- page_t* page;
-
- ut_ad(mtr_memo_contains(mtr, buf_block_align(btr_cur_get_rec(cursor)),
- MTR_MEMO_PAGE_X_FIX));
-
- page = btr_cur_get_page(cursor);
-
- if ((page_get_data_size(page) - rec_size < BTR_CUR_PAGE_COMPRESS_LIMIT)
- || ((btr_page_get_next(page, mtr) == FIL_NULL)
- && (btr_page_get_prev(page, mtr) == FIL_NULL))
- || (page_get_n_recs(page) < 2)) {
-
- /* The page fillfactor will drop below a predefined
- minimum value, OR the level in the B-tree contains just
- one page, OR the page will become empty: we recommend
- compression if this is not the root page. */
-
- return(dict_index_get_page(cursor->index)
- == buf_frame_get_page_no(page));
- }
-
- return(TRUE);
-}
diff --git a/storage/innobase/include/btr0pcur.h b/storage/innobase/include/btr0pcur.h
deleted file mode 100644
index ee40e905544..00000000000
--- a/storage/innobase/include/btr0pcur.h
+++ /dev/null
@@ -1,520 +0,0 @@
-/******************************************************
-The index tree persistent cursor
-
-(c) 1996 Innobase Oy
-
-Created 2/23/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef btr0pcur_h
-#define btr0pcur_h
-
-#include "univ.i"
-#include "dict0dict.h"
-#include "data0data.h"
-#include "mtr0mtr.h"
-#include "page0cur.h"
-#include "btr0cur.h"
-#include "btr0btr.h"
-#include "btr0types.h"
-
-/* Relative positions for a stored cursor position */
-#define BTR_PCUR_ON 1
-#define BTR_PCUR_BEFORE 2
-#define BTR_PCUR_AFTER 3
-/* Note that if the tree is not empty, btr_pcur_store_position does not
-use the following, but only uses the above three alternatives, where the
-position is stored relative to a specific record: this makes implementation
-of a scroll cursor easier */
-#define BTR_PCUR_BEFORE_FIRST_IN_TREE 4 /* in an empty tree */
-#define BTR_PCUR_AFTER_LAST_IN_TREE 5 /* in an empty tree */
-
-/******************************************************************
-Allocates memory for a persistent cursor object and initializes the cursor. */
-
-btr_pcur_t*
-btr_pcur_create_for_mysql(void);
-/*============================*/
- /* out, own: persistent cursor */
-/******************************************************************
-Frees the memory for a persistent cursor object. */
-
-void
-btr_pcur_free_for_mysql(
-/*====================*/
- btr_pcur_t* cursor); /* in, own: persistent cursor */
-/******************************************************************
-Copies the stored position of a pcur to another pcur. */
-
-void
-btr_pcur_copy_stored_position(
-/*==========================*/
- btr_pcur_t* pcur_receive, /* in: pcur which will receive the
- position info */
- btr_pcur_t* pcur_donate); /* in: pcur from which the info is
- copied */
-/******************************************************************
-Sets the old_rec_buf field to NULL. */
-UNIV_INLINE
-void
-btr_pcur_init(
-/*==========*/
- btr_pcur_t* pcur); /* in: persistent cursor */
-/******************************************************************
-Initializes and opens a persistent cursor to an index tree. It should be
-closed with btr_pcur_close. */
-UNIV_INLINE
-void
-btr_pcur_open(
-/*==========*/
- dict_index_t* index, /* in: index */
- dtuple_t* tuple, /* in: tuple on which search done */
- ulint mode, /* in: PAGE_CUR_L, ...;
- NOTE that if the search is made using a unique
- prefix of a record, mode should be
- PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
- may end up on the previous page from the
- record! */
- ulint latch_mode,/* in: BTR_SEARCH_LEAF, ... */
- btr_pcur_t* cursor, /* in: memory buffer for persistent cursor */
- mtr_t* mtr); /* in: mtr */
-/******************************************************************
-Opens an persistent cursor to an index tree without initializing the
-cursor. */
-UNIV_INLINE
-void
-btr_pcur_open_with_no_init(
-/*=======================*/
- dict_index_t* index, /* in: index */
- dtuple_t* tuple, /* in: tuple on which search done */
- ulint mode, /* in: PAGE_CUR_L, ...;
- NOTE that if the search is made using a unique
- prefix of a record, mode should be
- PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
- may end up on the previous page of the
- record! */
- ulint latch_mode,/* in: BTR_SEARCH_LEAF, ...;
- NOTE that if has_search_latch != 0 then
- we maybe do not acquire a latch on the cursor
- page, but assume that the caller uses his
- btr search latch to protect the record! */
- btr_pcur_t* cursor, /* in: memory buffer for persistent cursor */
- ulint has_search_latch,/* in: latch mode the caller
- currently has on btr_search_latch:
- RW_S_LATCH, or 0 */
- mtr_t* mtr); /* in: mtr */
-/*********************************************************************
-Opens a persistent cursor at either end of an index. */
-UNIV_INLINE
-void
-btr_pcur_open_at_index_side(
-/*========================*/
- ibool from_left, /* in: TRUE if open to the low end,
- FALSE if to the high end */
- dict_index_t* index, /* in: index */
- ulint latch_mode, /* in: latch mode */
- btr_pcur_t* pcur, /* in: cursor */
- ibool do_init, /* in: TRUE if should be initialized */
- mtr_t* mtr); /* in: mtr */
-/******************************************************************
-Gets the up_match value for a pcur after a search. */
-UNIV_INLINE
-ulint
-btr_pcur_get_up_match(
-/*==================*/
- /* out: number of matched fields at the cursor
- or to the right if search mode was PAGE_CUR_GE,
- otherwise undefined */
- btr_pcur_t* cursor); /* in: memory buffer for persistent cursor */
-/******************************************************************
-Gets the low_match value for a pcur after a search. */
-UNIV_INLINE
-ulint
-btr_pcur_get_low_match(
-/*===================*/
- /* out: number of matched fields at the cursor
- or to the right if search mode was PAGE_CUR_LE,
- otherwise undefined */
- btr_pcur_t* cursor); /* in: memory buffer for persistent cursor */
-/******************************************************************
-If mode is PAGE_CUR_G or PAGE_CUR_GE, opens a persistent cursor on the first
-user record satisfying the search condition, in the case PAGE_CUR_L or
-PAGE_CUR_LE, on the last user record. If no such user record exists, then
-in the first case sets the cursor after last in tree, and in the latter case
-before first in tree. The latching mode must be BTR_SEARCH_LEAF or
-BTR_MODIFY_LEAF. */
-
-void
-btr_pcur_open_on_user_rec(
-/*======================*/
- dict_index_t* index, /* in: index */
- dtuple_t* tuple, /* in: tuple on which search done */
- ulint mode, /* in: PAGE_CUR_L, ... */
- ulint latch_mode, /* in: BTR_SEARCH_LEAF or
- BTR_MODIFY_LEAF */
- btr_pcur_t* cursor, /* in: memory buffer for persistent
- cursor */
- mtr_t* mtr); /* in: mtr */
-/**************************************************************************
-Positions a cursor at a randomly chosen position within a B-tree. */
-UNIV_INLINE
-void
-btr_pcur_open_at_rnd_pos(
-/*=====================*/
- dict_index_t* index, /* in: index */
- ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
- btr_pcur_t* cursor, /* in/out: B-tree pcur */
- mtr_t* mtr); /* in: mtr */
-/******************************************************************
-Frees the possible old_rec_buf buffer of a persistent cursor and sets the
-latch mode of the persistent cursor to BTR_NO_LATCHES. */
-UNIV_INLINE
-void
-btr_pcur_close(
-/*===========*/
- btr_pcur_t* cursor); /* in: persistent cursor */
-/******************************************************************
-The position of the cursor is stored by taking an initial segment of the
-record the cursor is positioned on, before, or after, and copying it to the
-cursor data structure, or just setting a flag if the cursor id before the
-first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the
-page where the cursor is positioned must not be empty if the index tree is
-not totally empty! */
-
-void
-btr_pcur_store_position(
-/*====================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr); /* in: mtr */
-/******************************************************************
-Restores the stored position of a persistent cursor bufferfixing the page and
-obtaining the specified latches. If the cursor position was saved when the
-(1) cursor was positioned on a user record: this function restores the position
-to the last record LESS OR EQUAL to the stored record;
-(2) cursor was positioned on a page infimum record: restores the position to
-the last record LESS than the user record which was the successor of the page
-infimum;
-(3) cursor was positioned on the page supremum: restores to the first record
-GREATER than the user record which was the predecessor of the supremum.
-(4) cursor was positioned before the first or after the last in an empty tree:
-restores to before first or after the last in the tree. */
-
-ibool
-btr_pcur_restore_position(
-/*======================*/
- /* out: TRUE if the cursor position
- was stored when it was on a user record
- and it can be restored on a user record
- whose ordering fields are identical to
- the ones of the original user record */
- ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
- btr_pcur_t* cursor, /* in: detached persistent cursor */
- mtr_t* mtr); /* in: mtr */
-/******************************************************************
-If the latch mode of the cursor is BTR_LEAF_SEARCH or BTR_LEAF_MODIFY,
-releases the page latch and bufferfix reserved by the cursor.
-NOTE! In the case of BTR_LEAF_MODIFY, there should not exist changes
-made by the current mini-transaction to the data protected by the
-cursor latch, as then the latch must not be released until mtr_commit. */
-
-void
-btr_pcur_release_leaf(
-/*==================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr); /* in: mtr */
-/*************************************************************
-Gets the rel_pos field for a cursor whose position has been stored. */
-UNIV_INLINE
-ulint
-btr_pcur_get_rel_pos(
-/*=================*/
- /* out: BTR_PCUR_ON, ... */
- btr_pcur_t* cursor);/* in: persistent cursor */
-/*************************************************************
-Sets the mtr field for a pcur. */
-UNIV_INLINE
-void
-btr_pcur_set_mtr(
-/*=============*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr); /* in, own: mtr */
-/*************************************************************
-Gets the mtr field for a pcur. */
-UNIV_INLINE
-mtr_t*
-btr_pcur_get_mtr(
-/*=============*/
- /* out: mtr */
- btr_pcur_t* cursor); /* in: persistent cursor */
-/******************************************************************
-Commits the pcur mtr and sets the pcur latch mode to BTR_NO_LATCHES,
-that is, the cursor becomes detached. If there have been modifications
-to the page where pcur is positioned, this can be used instead of
-btr_pcur_release_leaf. Function btr_pcur_store_position should be used
-before calling this, if restoration of cursor is wanted later. */
-UNIV_INLINE
-void
-btr_pcur_commit(
-/*============*/
- btr_pcur_t* pcur); /* in: persistent cursor */
-/******************************************************************
-Differs from btr_pcur_commit in that we can specify the mtr to commit. */
-UNIV_INLINE
-void
-btr_pcur_commit_specify_mtr(
-/*========================*/
- btr_pcur_t* pcur, /* in: persistent cursor */
- mtr_t* mtr); /* in: mtr to commit */
-/******************************************************************
-Tests if a cursor is detached: that is the latch mode is BTR_NO_LATCHES. */
-UNIV_INLINE
-ibool
-btr_pcur_is_detached(
-/*=================*/
- /* out: TRUE if detached */
- btr_pcur_t* pcur); /* in: persistent cursor */
-/*************************************************************
-Moves the persistent cursor to the next record in the tree. If no records are
-left, the cursor stays 'after last in tree'. */
-UNIV_INLINE
-ibool
-btr_pcur_move_to_next(
-/*==================*/
- /* out: TRUE if the cursor was not after last
- in tree */
- btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the
- function may release the page latch */
- mtr_t* mtr); /* in: mtr */
-/*************************************************************
-Moves the persistent cursor to the previous record in the tree. If no records
-are left, the cursor stays 'before first in tree'. */
-
-ibool
-btr_pcur_move_to_prev(
-/*==================*/
- /* out: TRUE if the cursor was not before first
- in tree */
- btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the
- function may release the page latch */
- mtr_t* mtr); /* in: mtr */
-/*************************************************************
-Moves the persistent cursor to the last record on the same page. */
-UNIV_INLINE
-void
-btr_pcur_move_to_last_on_page(
-/*==========================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr); /* in: mtr */
-/*************************************************************
-Moves the persistent cursor to the next user record in the tree. If no user
-records are left, the cursor ends up 'after last in tree'. */
-UNIV_INLINE
-ibool
-btr_pcur_move_to_next_user_rec(
-/*===========================*/
- /* out: TRUE if the cursor moved forward,
- ending on a user record */
- btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the
- function may release the page latch */
- mtr_t* mtr); /* in: mtr */
-/*************************************************************
-Moves the persistent cursor to the first record on the next page.
-Releases the latch on the current page, and bufferunfixes it.
-Note that there must not be modifications on the current page,
-as then the x-latch can be released only in mtr_commit. */
-
-void
-btr_pcur_move_to_next_page(
-/*=======================*/
- btr_pcur_t* cursor, /* in: persistent cursor; must be on the
- last record of the current page */
- mtr_t* mtr); /* in: mtr */
-/*************************************************************
-Moves the persistent cursor backward if it is on the first record
-of the page. Releases the latch on the current page, and bufferunfixes
-it. Note that to prevent a possible deadlock, the operation first
-stores the position of the cursor, releases the leaf latch, acquires
-necessary latches and restores the cursor position again before returning.
-The alphabetical position of the cursor is guaranteed to be sensible
-on return, but it may happen that the cursor is not positioned on the
-last record of any page, because the structure of the tree may have
-changed while the cursor had no latches. */
-
-void
-btr_pcur_move_backward_from_page(
-/*=============================*/
- btr_pcur_t* cursor, /* in: persistent cursor, must be on the
- first record of the current page */
- mtr_t* mtr); /* in: mtr */
-/*************************************************************
-Returns the btr cursor component of a persistent cursor. */
-UNIV_INLINE
-btr_cur_t*
-btr_pcur_get_btr_cur(
-/*=================*/
- /* out: pointer to btr cursor component */
- btr_pcur_t* cursor); /* in: persistent cursor */
-/*************************************************************
-Returns the page cursor component of a persistent cursor. */
-UNIV_INLINE
-page_cur_t*
-btr_pcur_get_page_cur(
-/*==================*/
- /* out: pointer to page cursor component */
- btr_pcur_t* cursor); /* in: persistent cursor */
-/*************************************************************
-Returns the page of a persistent cursor. */
-UNIV_INLINE
-page_t*
-btr_pcur_get_page(
-/*==============*/
- /* out: pointer to the page */
- btr_pcur_t* cursor);/* in: persistent cursor */
-/*************************************************************
-Returns the record of a persistent cursor. */
-UNIV_INLINE
-rec_t*
-btr_pcur_get_rec(
-/*=============*/
- /* out: pointer to the record */
- btr_pcur_t* cursor);/* in: persistent cursor */
-/*************************************************************
-Checks if the persistent cursor is on a user record. */
-UNIV_INLINE
-ibool
-btr_pcur_is_on_user_rec(
-/*====================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr); /* in: mtr */
-/*************************************************************
-Checks if the persistent cursor is after the last user record on
-a page. */
-UNIV_INLINE
-ibool
-btr_pcur_is_after_last_on_page(
-/*===========================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr); /* in: mtr */
-/*************************************************************
-Checks if the persistent cursor is before the first user record on
-a page. */
-UNIV_INLINE
-ibool
-btr_pcur_is_before_first_on_page(
-/*=============================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr); /* in: mtr */
-/*************************************************************
-Checks if the persistent cursor is before the first user record in
-the index tree. */
-UNIV_INLINE
-ibool
-btr_pcur_is_before_first_in_tree(
-/*=============================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr); /* in: mtr */
-/*************************************************************
-Checks if the persistent cursor is after the last user record in
-the index tree. */
-UNIV_INLINE
-ibool
-btr_pcur_is_after_last_in_tree(
-/*===========================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr); /* in: mtr */
-/*************************************************************
-Moves the persistent cursor to the next record on the same page. */
-UNIV_INLINE
-void
-btr_pcur_move_to_next_on_page(
-/*==========================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr); /* in: mtr */
-/*************************************************************
-Moves the persistent cursor to the previous record on the same page. */
-UNIV_INLINE
-void
-btr_pcur_move_to_prev_on_page(
-/*==========================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr); /* in: mtr */
-
-
-/* The persistent B-tree cursor structure. This is used mainly for SQL
-selects, updates, and deletes. */
-
-struct btr_pcur_struct{
- btr_cur_t btr_cur; /* a B-tree cursor */
- ulint latch_mode; /* see TODO note below!
- BTR_SEARCH_LEAF, BTR_MODIFY_LEAF,
- BTR_MODIFY_TREE, or BTR_NO_LATCHES,
- depending on the latching state of
- the page and tree where the cursor is
- positioned; the last value means that
- the cursor is not currently positioned:
- we say then that the cursor is
- detached; it can be restored to
- attached if the old position was
- stored in old_rec */
- ulint old_stored; /* BTR_PCUR_OLD_STORED
- or BTR_PCUR_OLD_NOT_STORED */
- rec_t* old_rec; /* if cursor position is stored,
- contains an initial segment of the
- latest record cursor was positioned
- either on, before, or after */
- ulint old_n_fields; /* number of fields in old_rec */
- ulint rel_pos; /* BTR_PCUR_ON, BTR_PCUR_BEFORE, or
- BTR_PCUR_AFTER, depending on whether
- cursor was on, before, or after the
- old_rec record */
- buf_block_t* block_when_stored;/* buffer block when the position was
- stored; note that if AWE is on, frames
- may move */
- dulint modify_clock; /* the modify clock value of the
- buffer block when the cursor position
- was stored */
- ulint pos_state; /* see TODO note below!
- BTR_PCUR_IS_POSITIONED,
- BTR_PCUR_WAS_POSITIONED,
- BTR_PCUR_NOT_POSITIONED */
- ulint search_mode; /* PAGE_CUR_G, ... */
- trx_t* trx_if_known; /* the transaction, if we know it;
- otherwise this field is not defined;
- can ONLY BE USED in error prints in
- fatal assertion failures! */
- /*-----------------------------*/
- /* NOTE that the following fields may possess dynamically allocated
- memory which should be freed if not needed anymore! */
-
- mtr_t* mtr; /* NULL, or this field may contain
- a mini-transaction which holds the
- latch on the cursor page */
- byte* old_rec_buf; /* NULL, or a dynamically allocated
- buffer for old_rec */
- ulint buf_size; /* old_rec_buf size if old_rec_buf
- is not NULL */
-};
-
-#define BTR_PCUR_IS_POSITIONED 1997660512 /* TODO: currently, the state
- can be BTR_PCUR_IS_POSITIONED,
- though it really should be
- BTR_PCUR_WAS_POSITIONED,
- because we have no obligation
- to commit the cursor with
- mtr; similarly latch_mode may
- be out of date. This can
- lead to problems if btr_pcur
- is not used the right way;
- all current code should be
- ok. */
-#define BTR_PCUR_WAS_POSITIONED 1187549791
-#define BTR_PCUR_NOT_POSITIONED 1328997689
-
-#define BTR_PCUR_OLD_STORED 908467085
-#define BTR_PCUR_OLD_NOT_STORED 122766467
-
-#ifndef UNIV_NONINL
-#include "btr0pcur.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/btr0pcur.ic b/storage/innobase/include/btr0pcur.ic
deleted file mode 100644
index 66462530716..00000000000
--- a/storage/innobase/include/btr0pcur.ic
+++ /dev/null
@@ -1,630 +0,0 @@
-/******************************************************
-The index tree persistent cursor
-
-(c) 1996 Innobase Oy
-
-Created 2/23/1996 Heikki Tuuri
-*******************************************************/
-
-
-/*************************************************************
-Gets the rel_pos field for a cursor whose position has been stored. */
-UNIV_INLINE
-ulint
-btr_pcur_get_rel_pos(
-/*=================*/
- /* out: BTR_PCUR_ON, ... */
- btr_pcur_t* cursor) /* in: persistent cursor */
-{
- ut_ad(cursor);
- ut_ad(cursor->old_rec);
- ut_ad(cursor->old_stored == BTR_PCUR_OLD_STORED);
- ut_ad(cursor->pos_state == BTR_PCUR_WAS_POSITIONED
- || cursor->pos_state == BTR_PCUR_IS_POSITIONED);
-
- return(cursor->rel_pos);
-}
-
-/*************************************************************
-Sets the mtr field for a pcur. */
-UNIV_INLINE
-void
-btr_pcur_set_mtr(
-/*=============*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr) /* in, own: mtr */
-{
- ut_ad(cursor);
-
- cursor->mtr = mtr;
-}
-
-/*************************************************************
-Gets the mtr field for a pcur. */
-UNIV_INLINE
-mtr_t*
-btr_pcur_get_mtr(
-/*=============*/
- /* out: mtr */
- btr_pcur_t* cursor) /* in: persistent cursor */
-{
- ut_ad(cursor);
-
- return(cursor->mtr);
-}
-
-/*************************************************************
-Returns the btr cursor component of a persistent cursor. */
-UNIV_INLINE
-btr_cur_t*
-btr_pcur_get_btr_cur(
-/*=================*/
- /* out: pointer to btr cursor component */
- btr_pcur_t* cursor) /* in: persistent cursor */
-{
- return(&(cursor->btr_cur));
-}
-
-/*************************************************************
-Returns the page cursor component of a persistent cursor. */
-UNIV_INLINE
-page_cur_t*
-btr_pcur_get_page_cur(
-/*==================*/
- /* out: pointer to page cursor component */
- btr_pcur_t* cursor) /* in: persistent cursor */
-{
- return(btr_cur_get_page_cur(&(cursor->btr_cur)));
-}
-
-/*************************************************************
-Returns the page of a persistent cursor. */
-UNIV_INLINE
-page_t*
-btr_pcur_get_page(
-/*==============*/
- /* out: pointer to the page */
- btr_pcur_t* cursor) /* in: persistent cursor */
-{
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
-
- return(page_cur_get_page(btr_pcur_get_page_cur(cursor)));
-}
-
-/*************************************************************
-Returns the record of a persistent cursor. */
-UNIV_INLINE
-rec_t*
-btr_pcur_get_rec(
-/*=============*/
- /* out: pointer to the record */
- btr_pcur_t* cursor) /* in: persistent cursor */
-{
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- return(page_cur_get_rec(btr_pcur_get_page_cur(cursor)));
-}
-
-/******************************************************************
-Gets the up_match value for a pcur after a search. */
-UNIV_INLINE
-ulint
-btr_pcur_get_up_match(
-/*==================*/
- /* out: number of matched fields at the cursor
- or to the right if search mode was PAGE_CUR_GE,
- otherwise undefined */
- btr_pcur_t* cursor) /* in: memory buffer for persistent cursor */
-{
- btr_cur_t* btr_cursor;
-
- ut_ad((cursor->pos_state == BTR_PCUR_WAS_POSITIONED)
- || (cursor->pos_state == BTR_PCUR_IS_POSITIONED));
-
- btr_cursor = btr_pcur_get_btr_cur(cursor);
-
- ut_ad(btr_cursor->up_match != ULINT_UNDEFINED);
-
- return(btr_cursor->up_match);
-}
-
-/******************************************************************
-Gets the low_match value for a pcur after a search. */
-UNIV_INLINE
-ulint
-btr_pcur_get_low_match(
-/*===================*/
- /* out: number of matched fields at the cursor
- or to the right if search mode was PAGE_CUR_LE,
- otherwise undefined */
- btr_pcur_t* cursor) /* in: memory buffer for persistent cursor */
-{
- btr_cur_t* btr_cursor;
-
- ut_ad((cursor->pos_state == BTR_PCUR_WAS_POSITIONED)
- || (cursor->pos_state == BTR_PCUR_IS_POSITIONED));
-
- btr_cursor = btr_pcur_get_btr_cur(cursor);
- ut_ad(btr_cursor->low_match != ULINT_UNDEFINED);
-
- return(btr_cursor->low_match);
-}
-
-/*************************************************************
-Checks if the persistent cursor is after the last user record on
-a page. */
-UNIV_INLINE
-ibool
-btr_pcur_is_after_last_on_page(
-/*===========================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr) /* in: mtr */
-{
- UT_NOT_USED(mtr);
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- return(page_cur_is_after_last(btr_pcur_get_page_cur(cursor)));
-}
-
-/*************************************************************
-Checks if the persistent cursor is before the first user record on
-a page. */
-UNIV_INLINE
-ibool
-btr_pcur_is_before_first_on_page(
-/*=============================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr) /* in: mtr */
-{
- UT_NOT_USED(mtr);
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- return(page_cur_is_before_first(btr_pcur_get_page_cur(cursor)));
-}
-
-/*************************************************************
-Checks if the persistent cursor is on a user record. */
-UNIV_INLINE
-ibool
-btr_pcur_is_on_user_rec(
-/*====================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr) /* in: mtr */
-{
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- if ((btr_pcur_is_before_first_on_page(cursor, mtr))
- || (btr_pcur_is_after_last_on_page(cursor, mtr))) {
-
- return(FALSE);
- }
-
- return(TRUE);
-}
-
-/*************************************************************
-Checks if the persistent cursor is before the first user record in
-the index tree. */
-UNIV_INLINE
-ibool
-btr_pcur_is_before_first_in_tree(
-/*=============================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr) /* in: mtr */
-{
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- if (btr_page_get_prev(btr_pcur_get_page(cursor), mtr) != FIL_NULL) {
-
- return(FALSE);
- }
-
- return(page_cur_is_before_first(btr_pcur_get_page_cur(cursor)));
-}
-
-/*************************************************************
-Checks if the persistent cursor is after the last user record in
-the index tree. */
-UNIV_INLINE
-ibool
-btr_pcur_is_after_last_in_tree(
-/*===========================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr) /* in: mtr */
-{
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- if (btr_page_get_next(btr_pcur_get_page(cursor), mtr) != FIL_NULL) {
-
- return(FALSE);
- }
-
- return(page_cur_is_after_last(btr_pcur_get_page_cur(cursor)));
-}
-
-/*************************************************************
-Moves the persistent cursor to the next record on the same page. */
-UNIV_INLINE
-void
-btr_pcur_move_to_next_on_page(
-/*==========================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr) /* in: mtr */
-{
- UT_NOT_USED(mtr);
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- page_cur_move_to_next(btr_pcur_get_page_cur(cursor));
-
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
-}
-
-/*************************************************************
-Moves the persistent cursor to the previous record on the same page. */
-UNIV_INLINE
-void
-btr_pcur_move_to_prev_on_page(
-/*==========================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr) /* in: mtr */
-{
- UT_NOT_USED(mtr);
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- page_cur_move_to_prev(btr_pcur_get_page_cur(cursor));
-
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
-}
-
-/*************************************************************
-Moves the persistent cursor to the last record on the same page. */
-UNIV_INLINE
-void
-btr_pcur_move_to_last_on_page(
-/*==========================*/
- btr_pcur_t* cursor, /* in: persistent cursor */
- mtr_t* mtr) /* in: mtr */
-{
- UT_NOT_USED(mtr);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- page_cur_set_after_last(buf_frame_align(btr_pcur_get_rec(cursor)),
- btr_pcur_get_page_cur(cursor));
-
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
-}
-
-/*************************************************************
-Moves the persistent cursor to the next user record in the tree. If no user
-records are left, the cursor ends up 'after last in tree'. */
-UNIV_INLINE
-ibool
-btr_pcur_move_to_next_user_rec(
-/*===========================*/
- /* out: TRUE if the cursor moved forward,
- ending on a user record */
- btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the
- function may release the page latch */
- mtr_t* mtr) /* in: mtr */
-{
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
-loop:
- if (btr_pcur_is_after_last_on_page(cursor, mtr)) {
-
- if (btr_pcur_is_after_last_in_tree(cursor, mtr)) {
-
- return(FALSE);
- }
-
- btr_pcur_move_to_next_page(cursor, mtr);
- } else {
- btr_pcur_move_to_next_on_page(cursor, mtr);
- }
-
- if (btr_pcur_is_on_user_rec(cursor, mtr)) {
-
- return(TRUE);
- }
-
- goto loop;
-}
-
-/*************************************************************
-Moves the persistent cursor to the next record in the tree. If no records are
-left, the cursor stays 'after last in tree'. */
-UNIV_INLINE
-ibool
-btr_pcur_move_to_next(
-/*==================*/
- /* out: TRUE if the cursor was not after last
- in tree */
- btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the
- function may release the page latch */
- mtr_t* mtr) /* in: mtr */
-{
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
-
- if (btr_pcur_is_after_last_on_page(cursor, mtr)) {
-
- if (btr_pcur_is_after_last_in_tree(cursor, mtr)) {
-
- return(FALSE);
- }
-
- btr_pcur_move_to_next_page(cursor, mtr);
-
- return(TRUE);
- }
-
- btr_pcur_move_to_next_on_page(cursor, mtr);
-
- return(TRUE);
-}
-
-/******************************************************************
-Commits the pcur mtr and sets the pcur latch mode to BTR_NO_LATCHES,
-that is, the cursor becomes detached. If there have been modifications
-to the page where pcur is positioned, this can be used instead of
-btr_pcur_release_leaf. Function btr_pcur_store_position should be used
-before calling this, if restoration of cursor is wanted later. */
-UNIV_INLINE
-void
-btr_pcur_commit(
-/*============*/
- btr_pcur_t* pcur) /* in: persistent cursor */
-{
- ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
-
- pcur->latch_mode = BTR_NO_LATCHES;
-
- mtr_commit(pcur->mtr);
-
- pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
-}
-
-/******************************************************************
-Differs from btr_pcur_commit in that we can specify the mtr to commit. */
-UNIV_INLINE
-void
-btr_pcur_commit_specify_mtr(
-/*========================*/
- btr_pcur_t* pcur, /* in: persistent cursor */
- mtr_t* mtr) /* in: mtr to commit */
-{
- ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
-
- pcur->latch_mode = BTR_NO_LATCHES;
-
- mtr_commit(mtr);
-
- pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
-}
-
-/******************************************************************
-Sets the pcur latch mode to BTR_NO_LATCHES. */
-UNIV_INLINE
-void
-btr_pcur_detach(
-/*============*/
- btr_pcur_t* pcur) /* in: persistent cursor */
-{
- ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
-
- pcur->latch_mode = BTR_NO_LATCHES;
-
- pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
-}
-
-/******************************************************************
-Tests if a cursor is detached: that is the latch mode is BTR_NO_LATCHES. */
-UNIV_INLINE
-ibool
-btr_pcur_is_detached(
-/*=================*/
- /* out: TRUE if detached */
- btr_pcur_t* pcur) /* in: persistent cursor */
-{
- if (pcur->latch_mode == BTR_NO_LATCHES) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/******************************************************************
-Sets the old_rec_buf field to NULL. */
-UNIV_INLINE
-void
-btr_pcur_init(
-/*==========*/
- btr_pcur_t* pcur) /* in: persistent cursor */
-{
- pcur->old_stored = BTR_PCUR_OLD_NOT_STORED;
- pcur->old_rec_buf = NULL;
- pcur->old_rec = NULL;
-}
-
-/******************************************************************
-Initializes and opens a persistent cursor to an index tree. It should be
-closed with btr_pcur_close. */
-UNIV_INLINE
-void
-btr_pcur_open(
-/*==========*/
- dict_index_t* index, /* in: index */
- dtuple_t* tuple, /* in: tuple on which search done */
- ulint mode, /* in: PAGE_CUR_L, ...;
- NOTE that if the search is made using a unique
- prefix of a record, mode should be
- PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
- may end up on the previous page from the
- record! */
- ulint latch_mode,/* in: BTR_SEARCH_LEAF, ... */
- btr_pcur_t* cursor, /* in: memory buffer for persistent cursor */
- mtr_t* mtr) /* in: mtr */
-{
- btr_cur_t* btr_cursor;
-
- /* Initialize the cursor */
-
- btr_pcur_init(cursor);
-
- cursor->latch_mode = latch_mode;
- cursor->search_mode = mode;
-
- /* Search with the tree cursor */
-
- btr_cursor = btr_pcur_get_btr_cur(cursor);
-
- btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
- btr_cursor, 0, mtr);
- cursor->pos_state = BTR_PCUR_IS_POSITIONED;
-
- cursor->trx_if_known = NULL;
-}
-
-/******************************************************************
-Opens an persistent cursor to an index tree without initializing the
-cursor. */
-UNIV_INLINE
-void
-btr_pcur_open_with_no_init(
-/*=======================*/
- dict_index_t* index, /* in: index */
- dtuple_t* tuple, /* in: tuple on which search done */
- ulint mode, /* in: PAGE_CUR_L, ...;
- NOTE that if the search is made using a unique
- prefix of a record, mode should be
- PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
- may end up on the previous page of the
- record! */
- ulint latch_mode,/* in: BTR_SEARCH_LEAF, ...;
- NOTE that if has_search_latch != 0 then
- we maybe do not acquire a latch on the cursor
- page, but assume that the caller uses his
- btr search latch to protect the record! */
- btr_pcur_t* cursor, /* in: memory buffer for persistent cursor */
- ulint has_search_latch,/* in: latch mode the caller
- currently has on btr_search_latch:
- RW_S_LATCH, or 0 */
- mtr_t* mtr) /* in: mtr */
-{
- btr_cur_t* btr_cursor;
-
- cursor->latch_mode = latch_mode;
- cursor->search_mode = mode;
-
- /* Search with the tree cursor */
-
- btr_cursor = btr_pcur_get_btr_cur(cursor);
-
- btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
- btr_cursor, has_search_latch, mtr);
- cursor->pos_state = BTR_PCUR_IS_POSITIONED;
-
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
-
- cursor->trx_if_known = NULL;
-}
-
-/*********************************************************************
-Opens a persistent cursor at either end of an index. */
-UNIV_INLINE
-void
-btr_pcur_open_at_index_side(
-/*========================*/
- ibool from_left, /* in: TRUE if open to the low end,
- FALSE if to the high end */
- dict_index_t* index, /* in: index */
- ulint latch_mode, /* in: latch mode */
- btr_pcur_t* pcur, /* in: cursor */
- ibool do_init, /* in: TRUE if should be initialized */
- mtr_t* mtr) /* in: mtr */
-{
- pcur->latch_mode = latch_mode;
-
- if (from_left) {
- pcur->search_mode = PAGE_CUR_G;
- } else {
- pcur->search_mode = PAGE_CUR_L;
- }
-
- if (do_init) {
- btr_pcur_init(pcur);
- }
-
- btr_cur_open_at_index_side(from_left, index, latch_mode,
- btr_pcur_get_btr_cur(pcur), mtr);
- pcur->pos_state = BTR_PCUR_IS_POSITIONED;
-
- pcur->old_stored = BTR_PCUR_OLD_NOT_STORED;
-
- pcur->trx_if_known = NULL;
-}
-
-/**************************************************************************
-Positions a cursor at a randomly chosen position within a B-tree. */
-UNIV_INLINE
-void
-btr_pcur_open_at_rnd_pos(
-/*=====================*/
- dict_index_t* index, /* in: index */
- ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
- btr_pcur_t* cursor, /* in/out: B-tree pcur */
- mtr_t* mtr) /* in: mtr */
-{
- /* Initialize the cursor */
-
- cursor->latch_mode = latch_mode;
- cursor->search_mode = PAGE_CUR_G;
-
- btr_pcur_init(cursor);
-
- btr_cur_open_at_rnd_pos(index, latch_mode,
- btr_pcur_get_btr_cur(cursor), mtr);
- cursor->pos_state = BTR_PCUR_IS_POSITIONED;
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
-
- cursor->trx_if_known = NULL;
-}
-
-/******************************************************************
-Frees the possible memory heap of a persistent cursor and sets the latch
-mode of the persistent cursor to BTR_NO_LATCHES. */
-UNIV_INLINE
-void
-btr_pcur_close(
-/*===========*/
- btr_pcur_t* cursor) /* in: persistent cursor */
-{
- if (cursor->old_rec_buf != NULL) {
-
- mem_free(cursor->old_rec_buf);
-
- cursor->old_rec = NULL;
- cursor->old_rec_buf = NULL;
- }
-
- cursor->btr_cur.page_cur.rec = NULL;
- cursor->old_rec = NULL;
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
-
- cursor->latch_mode = BTR_NO_LATCHES;
- cursor->pos_state = BTR_PCUR_NOT_POSITIONED;
-
- cursor->trx_if_known = NULL;
-}
diff --git a/storage/innobase/include/btr0sea.h b/storage/innobase/include/btr0sea.h
deleted file mode 100644
index 6d1c2bb86d3..00000000000
--- a/storage/innobase/include/btr0sea.h
+++ /dev/null
@@ -1,255 +0,0 @@
-/************************************************************************
-The index tree adaptive search
-
-(c) 1996 Innobase Oy
-
-Created 2/17/1996 Heikki Tuuri
-*************************************************************************/
-
-#ifndef btr0sea_h
-#define btr0sea_h
-
-#include "univ.i"
-
-#include "rem0rec.h"
-#include "dict0dict.h"
-#include "btr0types.h"
-#include "mtr0mtr.h"
-#include "ha0ha.h"
-
-/*********************************************************************
-Creates and initializes the adaptive search system at a database start. */
-
-void
-btr_search_sys_create(
-/*==================*/
- ulint hash_size); /* in: hash index hash table size */
-/************************************************************************
-Returns search info for an index. */
-UNIV_INLINE
-btr_search_t*
-btr_search_get_info(
-/*================*/
- /* out: search info; search mutex reserved */
- dict_index_t* index); /* in: index */
-/*********************************************************************
-Creates and initializes a search info struct. */
-
-btr_search_t*
-btr_search_info_create(
-/*===================*/
- /* out, own: search info struct */
- mem_heap_t* heap); /* in: heap where created */
-/*********************************************************************
-Returns the value of ref_count. The value is protected by
-btr_search_latch. */
-ulint
-btr_search_info_get_ref_count(
-/*==========================*/
- /* out: ref_count value. */
- btr_search_t* info); /* in: search info. */
-/*************************************************************************
-Updates the search info. */
-UNIV_INLINE
-void
-btr_search_info_update(
-/*===================*/
- dict_index_t* index, /* in: index of the cursor */
- btr_cur_t* cursor);/* in: cursor which was just positioned */
-/**********************************************************************
-Tries to guess the right search position based on the hash search info
-of the index. Note that if mode is PAGE_CUR_LE, which is used in inserts,
-and the function returns TRUE, then cursor->up_match and cursor->low_match
-both have sensible values. */
-
-ibool
-btr_search_guess_on_hash(
-/*=====================*/
- /* out: TRUE if succeeded */
- dict_index_t* index, /* in: index */
- btr_search_t* info, /* in: index search info */
- dtuple_t* tuple, /* in: logical record */
- ulint mode, /* in: PAGE_CUR_L, ... */
- ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
- btr_cur_t* cursor, /* out: tree cursor */
- ulint has_search_latch,/* in: latch mode the caller
- currently has on btr_search_latch:
- RW_S_LATCH, RW_X_LATCH, or 0 */
- mtr_t* mtr); /* in: mtr */
-/************************************************************************
-Moves or deletes hash entries for moved records. If new_page is already hashed,
-then the hash index for page, if any, is dropped. If new_page is not hashed,
-and page is hashed, then a new hash index is built to new_page with the same
-parameters as page (this often happens when a page is split). */
-
-void
-btr_search_move_or_delete_hash_entries(
-/*===================================*/
- page_t* new_page, /* in: records are copied
- to this page */
- page_t* page, /* in: index page */
- dict_index_t* index); /* in: record descriptor */
-/************************************************************************
-Drops a page hash index. */
-
-void
-btr_search_drop_page_hash_index(
-/*============================*/
- page_t* page); /* in: index page, s- or x-latched */
-/************************************************************************
-Drops a page hash index when a page is freed from a fseg to the file system.
-Drops possible hash index if the page happens to be in the buffer pool. */
-
-void
-btr_search_drop_page_hash_when_freed(
-/*=================================*/
- ulint space, /* in: space id */
- ulint page_no); /* in: page number */
-/************************************************************************
-Updates the page hash index when a single record is inserted on a page. */
-
-void
-btr_search_update_hash_node_on_insert(
-/*==================================*/
- btr_cur_t* cursor);/* in: cursor which was positioned to the
- place to insert using btr_cur_search_...,
- and the new record has been inserted next
- to the cursor */
-/************************************************************************
-Updates the page hash index when a single record is inserted on a page. */
-
-void
-btr_search_update_hash_on_insert(
-/*=============================*/
- btr_cur_t* cursor);/* in: cursor which was positioned to the
- place to insert using btr_cur_search_...,
- and the new record has been inserted next
- to the cursor */
-/************************************************************************
-Updates the page hash index when a single record is deleted from a page. */
-
-void
-btr_search_update_hash_on_delete(
-/*=============================*/
- btr_cur_t* cursor);/* in: cursor which was positioned on the
- record to delete using btr_cur_search_...,
- the record is not yet deleted */
-/************************************************************************
-Validates the search system. */
-
-ibool
-btr_search_validate(void);
-/*======================*/
- /* out: TRUE if ok */
-
-/* The search info struct in an index */
-
-struct btr_search_struct{
- ulint ref_count; /* Number of blocks in this index tree
- that have search index built
- i.e. block->index points to this index.
- Protected by btr_search_latch except
- when during initialization in
- btr_search_info_create(). */
-
- /* The following fields are not protected by any latch.
- Unfortunately, this means that they must be aligned to
- the machine word, i.e., they cannot be turned into bit-fields. */
- page_t* root_guess; /* the root page frame when it was last time
- fetched, or NULL */
- ulint hash_analysis; /* when this exceeds BTR_SEARCH_HASH_ANALYSIS,
- the hash analysis starts; this is reset if no
- success noticed */
- ibool last_hash_succ; /* TRUE if the last search would have
- succeeded, or did succeed, using the hash
- index; NOTE that the value here is not exact:
- it is not calculated for every search, and the
- calculation itself is not always accurate! */
- ulint n_hash_potential;
- /* number of consecutive searches
- which would have succeeded, or did succeed,
- using the hash index;
- the range is 0 .. BTR_SEARCH_BUILD_LIMIT + 5 */
- /*----------------------*/
- ulint n_fields; /* recommended prefix length for hash search:
- number of full fields */
- ulint n_bytes; /* recommended prefix: number of bytes in
- an incomplete field;
- see also BTR_PAGE_MAX_REC_SIZE */
- ibool left_side; /* TRUE or FALSE, depending on whether
- the leftmost record of several records with
- the same prefix should be indexed in the
- hash index */
- /*----------------------*/
-#ifdef UNIV_SEARCH_PERF_STAT
- ulint n_hash_succ; /* number of successful hash searches thus
- far */
- ulint n_hash_fail; /* number of failed hash searches */
- ulint n_patt_succ; /* number of successful pattern searches thus
- far */
- ulint n_searches; /* number of searches */
-#endif /* UNIV_SEARCH_PERF_STAT */
-#ifdef UNIV_DEBUG
- ulint magic_n; /* magic number */
-# define BTR_SEARCH_MAGIC_N 1112765
-#endif /* UNIV_DEBUG */
-};
-
-/* The hash index system */
-
-typedef struct btr_search_sys_struct btr_search_sys_t;
-
-struct btr_search_sys_struct{
- hash_table_t* hash_index;
-};
-
-extern btr_search_sys_t* btr_search_sys;
-
-/* The latch protecting the adaptive search system: this latch protects the
-(1) hash index;
-(2) columns of a record to which we have a pointer in the hash index;
-
-but does NOT protect:
-
-(3) next record offset field in a record;
-(4) next or previous records on the same page.
-
-Bear in mind (3) and (4) when using the hash index.
-*/
-
-extern rw_lock_t* btr_search_latch_temp;
-
-#define btr_search_latch (*btr_search_latch_temp)
-
-#ifdef UNIV_SEARCH_PERF_STAT
-extern ulint btr_search_n_succ;
-extern ulint btr_search_n_hash_fail;
-#endif /* UNIV_SEARCH_PERF_STAT */
-
-/* After change in n_fields or n_bytes in info, this many rounds are waited
-before starting the hash analysis again: this is to save CPU time when there
-is no hope in building a hash index. */
-
-#define BTR_SEARCH_HASH_ANALYSIS 17
-
-/* Limit of consecutive searches for trying a search shortcut on the search
-pattern */
-
-#define BTR_SEARCH_ON_PATTERN_LIMIT 3
-
-/* Limit of consecutive searches for trying a search shortcut using the hash
-index */
-
-#define BTR_SEARCH_ON_HASH_LIMIT 3
-
-/* We do this many searches before trying to keep the search latch over calls
-from MySQL. If we notice someone waiting for the latch, we again set this
-much timeout. This is to reduce contention. */
-
-#define BTR_SEA_TIMEOUT 10000
-
-#ifndef UNIV_NONINL
-#include "btr0sea.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/btr0sea.ic b/storage/innobase/include/btr0sea.ic
deleted file mode 100644
index f4e33027c25..00000000000
--- a/storage/innobase/include/btr0sea.ic
+++ /dev/null
@@ -1,67 +0,0 @@
-/************************************************************************
-The index tree adaptive search
-
-(c) 1996 Innobase Oy
-
-Created 2/17/1996 Heikki Tuuri
-*************************************************************************/
-
-#include "dict0mem.h"
-#include "btr0cur.h"
-#include "buf0buf.h"
-
-/*************************************************************************
-Updates the search info. */
-
-void
-btr_search_info_update_slow(
-/*========================*/
- btr_search_t* info, /* in/out: search info */
- btr_cur_t* cursor);/* in: cursor which was just positioned */
-
-/************************************************************************
-Returns search info for an index. */
-UNIV_INLINE
-btr_search_t*
-btr_search_get_info(
-/*================*/
- /* out: search info; search mutex reserved */
- dict_index_t* index) /* in: index */
-{
- ut_ad(index);
-
- return(index->search_info);
-}
-
-/*************************************************************************
-Updates the search info. */
-UNIV_INLINE
-void
-btr_search_info_update(
-/*===================*/
- dict_index_t* index, /* in: index of the cursor */
- btr_cur_t* cursor) /* in: cursor which was just positioned */
-{
- btr_search_t* info;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- info = btr_search_get_info(index);
-
- info->hash_analysis++;
-
- if (info->hash_analysis < BTR_SEARCH_HASH_ANALYSIS) {
-
- /* Do nothing */
-
- return;
-
- }
-
- ut_ad(cursor->flag != BTR_CUR_HASH);
-
- btr_search_info_update_slow(info, cursor);
-}
diff --git a/storage/innobase/include/btr0types.h b/storage/innobase/include/btr0types.h
deleted file mode 100644
index 8fa0bf0602d..00000000000
--- a/storage/innobase/include/btr0types.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/************************************************************************
-The index tree general types
-
-(c) 1996 Innobase Oy
-
-Created 2/17/1996 Heikki Tuuri
-*************************************************************************/
-
-#ifndef btr0types_h
-#define btr0types_h
-
-#include "univ.i"
-
-#include "rem0types.h"
-#include "page0types.h"
-
-typedef struct btr_pcur_struct btr_pcur_t;
-typedef struct btr_cur_struct btr_cur_t;
-typedef struct btr_search_struct btr_search_t;
-
-#endif
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
deleted file mode 100644
index 3e8972d9182..00000000000
--- a/storage/innobase/include/buf0buf.h
+++ /dev/null
@@ -1,1074 +0,0 @@
-/* Innobase relational database engine; Copyright (C) 2001 Innobase Oy
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License 2
- as published by the Free Software Foundation in June 1991.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License 2
- along with this program (in file COPYING); if not, write to the Free
- Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
-/******************************************************
-The database buffer pool high-level routines
-
-(c) 1995 Innobase Oy
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef buf0buf_h
-#define buf0buf_h
-
-#include "univ.i"
-#include "fil0fil.h"
-#include "mtr0types.h"
-#include "buf0types.h"
-#include "sync0rw.h"
-#include "hash0hash.h"
-#include "ut0byte.h"
-#include "os0proc.h"
-
-/* Flags for flush types */
-#define BUF_FLUSH_LRU 1
-#define BUF_FLUSH_SINGLE_PAGE 2
-#define BUF_FLUSH_LIST 3 /* An array in the pool struct
- has size BUF_FLUSH_LIST + 1: if you
- add more flush types, put them in
- the middle! */
-/* Modes for buf_page_get_gen */
-#define BUF_GET 10 /* get always */
-#define BUF_GET_IF_IN_POOL 11 /* get if in pool */
-#define BUF_GET_NOWAIT 12 /* get if can set the latch without
- waiting */
-#define BUF_GET_NO_LATCH 14 /* get and bufferfix, but set no latch;
- we have separated this case, because
- it is error-prone programming not to
- set a latch, and it should be used
- with care */
-/* Modes for buf_page_get_known_nowait */
-#define BUF_MAKE_YOUNG 51
-#define BUF_KEEP_OLD 52
-/* Magic value to use instead of checksums when they are disabled */
-#define BUF_NO_CHECKSUM_MAGIC 0xDEADBEEFUL
-
-extern buf_pool_t* buf_pool; /* The buffer pool of the database */
-#ifdef UNIV_DEBUG
-extern ibool buf_debug_prints;/* If this is set TRUE, the program
- prints info whenever read or flush
- occurs */
-#endif /* UNIV_DEBUG */
-extern ulint srv_buf_pool_write_requests; /* variable to count write request
- issued */
-
-/************************************************************************
-Creates the buffer pool. */
-
-buf_pool_t*
-buf_pool_init(
-/*==========*/
- /* out, own: buf_pool object, NULL if not
- enough memory or error */
- ulint max_size, /* in: maximum size of the buf_pool in
- blocks */
- ulint curr_size, /* in: current size to use, must be <=
- max_size, currently must be equal to
- max_size */
- ulint n_frames); /* in: number of frames; if AWE is used,
- this is the size of the address space window
- where physical memory pages are mapped; if
- AWE is not used then this must be the same
- as max_size */
-/*************************************************************************
-Gets the current size of buffer buf_pool in bytes. In the case of AWE, the
-size of AWE window (= the frames). */
-UNIV_INLINE
-ulint
-buf_pool_get_curr_size(void);
-/*========================*/
- /* out: size in bytes */
-/*************************************************************************
-Gets the maximum size of buffer pool in bytes. In the case of AWE, the
-size of AWE window (= the frames). */
-UNIV_INLINE
-ulint
-buf_pool_get_max_size(void);
-/*=======================*/
- /* out: size in bytes */
-/************************************************************************
-Gets the smallest oldest_modification lsn for any page in the pool. Returns
-ut_dulint_zero if all modified pages have been flushed to disk. */
-UNIV_INLINE
-dulint
-buf_pool_get_oldest_modification(void);
-/*==================================*/
- /* out: oldest modification in pool,
- ut_dulint_zero if none */
-/*************************************************************************
-Allocates a buffer frame. */
-
-buf_frame_t*
-buf_frame_alloc(void);
-/*==================*/
- /* out: buffer frame */
-/*************************************************************************
-Frees a buffer frame which does not contain a file page. */
-
-void
-buf_frame_free(
-/*===========*/
- buf_frame_t* frame); /* in: buffer frame */
-/*************************************************************************
-Copies contents of a buffer frame to a given buffer. */
-UNIV_INLINE
-byte*
-buf_frame_copy(
-/*===========*/
- /* out: buf */
- byte* buf, /* in: buffer to copy to */
- buf_frame_t* frame); /* in: buffer frame */
-/******************************************************************
-NOTE! The following macros should be used instead of buf_page_get_gen,
-to improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed
-in LA! */
-#define buf_page_get(SP, OF, LA, MTR) buf_page_get_gen(\
- SP, OF, LA, NULL,\
- BUF_GET, __FILE__, __LINE__, MTR)
-/******************************************************************
-Use these macros to bufferfix a page with no latching. Remember not to
-read the contents of the page unless you know it is safe. Do not modify
-the contents of the page! We have separated this case, because it is
-error-prone programming not to set a latch, and it should be used
-with care. */
-#define buf_page_get_with_no_latch(SP, OF, MTR) buf_page_get_gen(\
- SP, OF, RW_NO_LATCH, NULL,\
- BUF_GET_NO_LATCH, __FILE__, __LINE__, MTR)
-/******************************************************************
-NOTE! The following macros should be used instead of buf_page_get_gen, to
-improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed as LA! */
-#define buf_page_get_nowait(SP, OF, LA, MTR) buf_page_get_gen(\
- SP, OF, LA, NULL,\
- BUF_GET_NOWAIT, __FILE__, __LINE__, MTR)
-/******************************************************************
-NOTE! The following macros should be used instead of
-buf_page_optimistic_get_func, to improve debugging. Only values RW_S_LATCH and
-RW_X_LATCH are allowed as LA! */
-#define buf_page_optimistic_get(LA, BL, G, MC, MTR) \
- buf_page_optimistic_get_func(LA, BL, G, MC, __FILE__, __LINE__, MTR)
-/************************************************************************
-This is the general function used to get optimistic access to a database
-page. */
-
-ibool
-buf_page_optimistic_get_func(
-/*=========================*/
- /* out: TRUE if success */
- ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
- buf_block_t* block, /* in: guessed block */
- buf_frame_t* guess, /* in: guessed frame; note that AWE may move
- frames */
- dulint modify_clock,/* in: modify clock value if mode is
- ..._GUESS_ON_CLOCK */
- const char* file, /* in: file name */
- ulint line, /* in: line where called */
- mtr_t* mtr); /* in: mini-transaction */
-/************************************************************************
-Tries to get the page, but if file io is required, releases all latches
-in mtr down to the given savepoint. If io is required, this function
-retrieves the page to buffer buf_pool, but does not bufferfix it or latch
-it. */
-UNIV_INLINE
-buf_frame_t*
-buf_page_get_release_on_io(
-/*=======================*/
- /* out: pointer to the frame, or NULL
- if not in buffer buf_pool */
- ulint space, /* in: space id */
- ulint offset, /* in: offset of the page within space
- in units of a page */
- buf_frame_t* guess, /* in: guessed frame or NULL */
- ulint rw_latch, /* in: RW_X_LATCH, RW_S_LATCH,
- or RW_NO_LATCH */
- ulint savepoint, /* in: mtr savepoint */
- mtr_t* mtr); /* in: mtr */
-/************************************************************************
-This is used to get access to a known database page, when no waiting can be
-done. */
-
-ibool
-buf_page_get_known_nowait(
-/*======================*/
- /* out: TRUE if success */
- ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
- buf_frame_t* guess, /* in: the known page frame */
- ulint mode, /* in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
- const char* file, /* in: file name */
- ulint line, /* in: line where called */
- mtr_t* mtr); /* in: mini-transaction */
-/************************************************************************
-This is the general function used to get access to a database page. */
-
-buf_frame_t*
-buf_page_get_gen(
-/*=============*/
- /* out: pointer to the frame or NULL */
- ulint space, /* in: space id */
- ulint offset, /* in: page number */
- ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
- buf_frame_t* guess, /* in: guessed frame or NULL */
- ulint mode, /* in: BUF_GET, BUF_GET_IF_IN_POOL,
- BUF_GET_NO_LATCH */
- const char* file, /* in: file name */
- ulint line, /* in: line where called */
- mtr_t* mtr); /* in: mini-transaction */
-/************************************************************************
-Initializes a page to the buffer buf_pool. The page is usually not read
-from a file even if it cannot be found in the buffer buf_pool. This is one
-of the functions which perform to a block a state transition NOT_USED =>
-FILE_PAGE (the other is buf_page_init_for_read above). */
-
-buf_frame_t*
-buf_page_create(
-/*============*/
- /* out: pointer to the frame, page bufferfixed */
- ulint space, /* in: space id */
- ulint offset, /* in: offset of the page within space in units of
- a page */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************************
-Inits a page to the buffer buf_pool, for use in ibbackup --restore. */
-
-void
-buf_page_init_for_backup_restore(
-/*=============================*/
- ulint space, /* in: space id */
- ulint offset, /* in: offset of the page within space
- in units of a page */
- buf_block_t* block); /* in: block to init */
-/************************************************************************
-Decrements the bufferfix count of a buffer control block and releases
-a latch, if specified. */
-UNIV_INLINE
-void
-buf_page_release(
-/*=============*/
- buf_block_t* block, /* in: buffer block */
- ulint rw_latch, /* in: RW_S_LATCH, RW_X_LATCH,
- RW_NO_LATCH */
- mtr_t* mtr); /* in: mtr */
-/************************************************************************
-Moves a page to the start of the buffer pool LRU list. This high-level
-function can be used to prevent an important page from from slipping out of
-the buffer pool. */
-
-void
-buf_page_make_young(
-/*================*/
- buf_frame_t* frame); /* in: buffer frame of a file page */
-/************************************************************************
-Returns TRUE if the page can be found in the buffer pool hash table. NOTE
-that it is possible that the page is not yet read from disk, though. */
-
-ibool
-buf_page_peek(
-/*==========*/
- /* out: TRUE if found from page hash table,
- NOTE that the page is not necessarily yet read
- from disk! */
- ulint space, /* in: space id */
- ulint offset);/* in: page number */
-/************************************************************************
-Returns the buffer control block if the page can be found in the buffer
-pool. NOTE that it is possible that the page is not yet read
-from disk, though. This is a very low-level function: use with care! */
-
-buf_block_t*
-buf_page_peek_block(
-/*================*/
- /* out: control block if found from page hash table,
- otherwise NULL; NOTE that the page is not necessarily
- yet read from disk! */
- ulint space, /* in: space id */
- ulint offset);/* in: page number */
-/************************************************************************
-Resets the check_index_page_at_flush field of a page if found in the buffer
-pool. */
-
-void
-buf_reset_check_index_page_at_flush(
-/*================================*/
- ulint space, /* in: space id */
- ulint offset);/* in: page number */
-/************************************************************************
-Sets file_page_was_freed TRUE if the page is found in the buffer pool.
-This function should be called when we free a file page and want the
-debug version to check that it is not accessed any more unless
-reallocated. */
-
-buf_block_t*
-buf_page_set_file_page_was_freed(
-/*=============================*/
- /* out: control block if found from page hash table,
- otherwise NULL */
- ulint space, /* in: space id */
- ulint offset); /* in: page number */
-/************************************************************************
-Sets file_page_was_freed FALSE if the page is found in the buffer pool.
-This function should be called when we free a file page and want the
-debug version to check that it is not accessed any more unless
-reallocated. */
-
-buf_block_t*
-buf_page_reset_file_page_was_freed(
-/*===============================*/
- /* out: control block if found from page hash table,
- otherwise NULL */
- ulint space, /* in: space id */
- ulint offset); /* in: page number */
-/************************************************************************
-Recommends a move of a block to the start of the LRU list if there is danger
-of dropping from the buffer pool. NOTE: does not reserve the buffer pool
-mutex. */
-UNIV_INLINE
-ibool
-buf_block_peek_if_too_old(
-/*======================*/
- /* out: TRUE if should be made younger */
- buf_block_t* block); /* in: block to make younger */
-/************************************************************************
-Returns the current state of is_hashed of a page. FALSE if the page is
-not in the pool. NOTE that this operation does not fix the page in the
-pool if it is found there. */
-
-ibool
-buf_page_peek_if_search_hashed(
-/*===========================*/
- /* out: TRUE if page hash index is built in search
- system */
- ulint space, /* in: space id */
- ulint offset);/* in: page number */
-/************************************************************************
-Gets the youngest modification log sequence number for a frame.
-Returns zero if not file page or no modification occurred yet. */
-UNIV_INLINE
-dulint
-buf_frame_get_newest_modification(
-/*==============================*/
- /* out: newest modification to page */
- buf_frame_t* frame); /* in: pointer to a frame */
-/************************************************************************
-Increments the modify clock of a frame by 1. The caller must (1) own the
-pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
-on the block. */
-UNIV_INLINE
-dulint
-buf_frame_modify_clock_inc(
-/*=======================*/
- /* out: new value */
- buf_frame_t* frame); /* in: pointer to a frame */
-/************************************************************************
-Increments the modify clock of a frame by 1. The caller must (1) own the
-buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
-on the block. */
-UNIV_INLINE
-dulint
-buf_block_modify_clock_inc(
-/*=======================*/
- /* out: new value */
- buf_block_t* block); /* in: block */
-/************************************************************************
-Returns the value of the modify clock. The caller must have an s-lock
-or x-lock on the block. */
-UNIV_INLINE
-dulint
-buf_block_get_modify_clock(
-/*=======================*/
- /* out: value */
- buf_block_t* block); /* in: block */
-/************************************************************************
-Calculates a page checksum which is stored to the page when it is written
-to a file. Note that we must be careful to calculate the same value
-on 32-bit and 64-bit architectures. */
-
-ulint
-buf_calc_page_new_checksum(
-/*=======================*/
- /* out: checksum */
- byte* page); /* in: buffer page */
-/************************************************************************
-In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only
-looked at the first few bytes of the page. This calculates that old
-checksum.
-NOTE: we must first store the new formula checksum to
-FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
-because this takes that field as an input! */
-
-ulint
-buf_calc_page_old_checksum(
-/*=======================*/
- /* out: checksum */
- byte* page); /* in: buffer page */
-/************************************************************************
-Checks if a page is corrupt. */
-
-ibool
-buf_page_is_corrupted(
-/*==================*/
- /* out: TRUE if corrupted */
- byte* read_buf); /* in: a database page */
-/**************************************************************************
-Gets the page number of a pointer pointing within a buffer frame containing
-a file page. */
-UNIV_INLINE
-ulint
-buf_frame_get_page_no(
-/*==================*/
- /* out: page number */
- byte* ptr); /* in: pointer to within a buffer frame */
-/**************************************************************************
-Gets the space id of a pointer pointing within a buffer frame containing a
-file page. */
-UNIV_INLINE
-ulint
-buf_frame_get_space_id(
-/*===================*/
- /* out: space id */
- byte* ptr); /* in: pointer to within a buffer frame */
-/**************************************************************************
-Gets the space id, page offset, and byte offset within page of a
-pointer pointing to a buffer frame containing a file page. */
-UNIV_INLINE
-void
-buf_ptr_get_fsp_addr(
-/*=================*/
- byte* ptr, /* in: pointer to a buffer frame */
- ulint* space, /* out: space id */
- fil_addr_t* addr); /* out: page offset and byte offset */
-/**************************************************************************
-Gets the hash value of the page the pointer is pointing to. This can be used
-in searches in the lock hash table. */
-UNIV_INLINE
-ulint
-buf_frame_get_lock_hash_val(
-/*========================*/
- /* out: lock hash value */
- byte* ptr); /* in: pointer to within a buffer frame */
-/**************************************************************************
-Gets the mutex number protecting the page record lock hash chain in the lock
-table. */
-UNIV_INLINE
-mutex_t*
-buf_frame_get_mutex(
-/*================*/
- /* out: mutex */
- byte* ptr); /* in: pointer to within a buffer frame */
-/***********************************************************************
-Gets the frame the pointer is pointing to. */
-UNIV_INLINE
-buf_frame_t*
-buf_frame_align(
-/*============*/
- /* out: pointer to frame */
- byte* ptr); /* in: pointer to a frame */
-/***********************************************************************
-Checks if a pointer points to the block array of the buffer pool (blocks, not
-the frames). */
-UNIV_INLINE
-ibool
-buf_pool_is_block(
-/*==============*/
- /* out: TRUE if pointer to block */
- void* ptr); /* in: pointer to memory */
-#ifdef UNIV_DEBUG
-/*************************************************************************
-Validates the buffer pool data structure. */
-
-ibool
-buf_validate(void);
-/*==============*/
-/*************************************************************************
-Prints info of the buffer pool data structure. */
-
-void
-buf_print(void);
-/*============*/
-
-/*************************************************************************
-Returns the number of latched pages in the buffer pool. */
-
-ulint
-buf_get_latched_pages_number(void);
-/*==============================*/
-#endif /* UNIV_DEBUG */
-
-/************************************************************************
-Prints a page to stderr. */
-
-void
-buf_page_print(
-/*===========*/
- byte* read_buf); /* in: a database page */
-
-/*************************************************************************
-Returns the number of pending buf pool ios. */
-
-ulint
-buf_get_n_pending_ios(void);
-/*=======================*/
-/*************************************************************************
-Prints info of the buffer i/o. */
-
-void
-buf_print_io(
-/*=========*/
- FILE* file); /* in: file where to print */
-/*************************************************************************
-Returns the ratio in percents of modified pages in the buffer pool /
-database pages in the buffer pool. */
-
-ulint
-buf_get_modified_ratio_pct(void);
-/*============================*/
-/**************************************************************************
-Refreshes the statistics used to print per-second averages. */
-
-void
-buf_refresh_io_stats(void);
-/*======================*/
-/*************************************************************************
-Checks that all file pages in the buffer are in a replaceable state. */
-
-ibool
-buf_all_freed(void);
-/*===============*/
-/*************************************************************************
-Checks that there currently are no pending i/o-operations for the buffer
-pool. */
-
-ibool
-buf_pool_check_no_pending_io(void);
-/*==============================*/
- /* out: TRUE if there is no pending i/o */
-/*************************************************************************
-Invalidates the file pages in the buffer pool when an archive recovery is
-completed. All the file pages buffered must be in a replaceable state when
-this function is called: not latched and not modified. */
-
-void
-buf_pool_invalidate(void);
-/*=====================*/
-
-/*========================================================================
---------------------------- LOWER LEVEL ROUTINES -------------------------
-=========================================================================*/
-
-/************************************************************************
-Maps the page of block to a frame, if not mapped yet. Unmaps some page
-from the end of the awe_LRU_free_mapped. */
-
-void
-buf_awe_map_page_to_frame(
-/*======================*/
- buf_block_t* block, /* in: block whose page should be
- mapped to a frame */
- ibool add_to_mapped_list);/* in: TRUE if we in the case
- we need to map the page should also
- add the block to the
- awe_LRU_free_mapped list */
-#ifdef UNIV_SYNC_DEBUG
-/*************************************************************************
-Adds latch level info for the rw-lock protecting the buffer frame. This
-should be called in the debug version after a successful latching of a
-page if we know the latching order level of the acquired latch. */
-UNIV_INLINE
-void
-buf_page_dbg_add_level(
-/*===================*/
- buf_frame_t* frame, /* in: buffer page where we have acquired
- a latch */
- ulint level); /* in: latching order level */
-#endif /* UNIV_SYNC_DEBUG */
-/*************************************************************************
-Gets a pointer to the memory frame of a block. */
-UNIV_INLINE
-buf_frame_t*
-buf_block_get_frame(
-/*================*/
- /* out: pointer to the frame */
- buf_block_t* block); /* in: pointer to the control block */
-/*************************************************************************
-Gets the space id of a block. */
-UNIV_INLINE
-ulint
-buf_block_get_space(
-/*================*/
- /* out: space id */
- buf_block_t* block); /* in: pointer to the control block */
-/*************************************************************************
-Gets the page number of a block. */
-UNIV_INLINE
-ulint
-buf_block_get_page_no(
-/*==================*/
- /* out: page number */
- buf_block_t* block); /* in: pointer to the control block */
-/***********************************************************************
-Gets the block to whose frame the pointer is pointing to. */
-UNIV_INLINE
-buf_block_t*
-buf_block_align(
-/*============*/
- /* out: pointer to block */
- byte* ptr); /* in: pointer to a frame */
-/************************************************************************
-This function is used to get info if there is an io operation
-going on on a buffer page. */
-UNIV_INLINE
-ibool
-buf_page_io_query(
-/*==============*/
- /* out: TRUE if io going on */
- buf_block_t* block); /* in: pool block, must be bufferfixed */
-/***********************************************************************
-Accessor function for block array. */
-UNIV_INLINE
-buf_block_t*
-buf_pool_get_nth_block(
-/*===================*/
- /* out: pointer to block */
- buf_pool_t* pool, /* in: pool */
- ulint i); /* in: index of the block */
-/************************************************************************
-Function which inits a page for read to the buffer buf_pool. If the page is
-(1) already in buf_pool, or
-(2) if we specify to read only ibuf pages and the page is not an ibuf page, or
-(3) if the space is deleted or being deleted,
-then this function does nothing.
-Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
-on the buffer frame. The io-handler must take care that the flag is cleared
-and the lock released later. This is one of the functions which perform the
-state transition NOT_USED => FILE_PAGE to a block (the other is
-buf_page_create). */
-
-buf_block_t*
-buf_page_init_for_read(
-/*===================*/
- /* out: pointer to the block or NULL */
- ulint* err, /* out: DB_SUCCESS or DB_TABLESPACE_DELETED */
- ulint mode, /* in: BUF_READ_IBUF_PAGES_ONLY, ... */
- ulint space, /* in: space id */
- ib_longlong tablespace_version,/* in: prevents reading from a wrong
- version of the tablespace in case we have done
- DISCARD + IMPORT */
- ulint offset);/* in: page number */
-/************************************************************************
-Completes an asynchronous read or write request of a file page to or from
-the buffer pool. */
-
-void
-buf_page_io_complete(
-/*=================*/
- buf_block_t* block); /* in: pointer to the block in question */
-/************************************************************************
-Calculates a folded value of a file page address to use in the page hash
-table. */
-UNIV_INLINE
-ulint
-buf_page_address_fold(
-/*==================*/
- /* out: the folded value */
- ulint space, /* in: space id */
- ulint offset);/* in: offset of the page within space */
-/**********************************************************************
-Returns the control block of a file page, NULL if not found. */
-UNIV_INLINE
-buf_block_t*
-buf_page_hash_get(
-/*==============*/
- /* out: block, NULL if not found */
- ulint space, /* in: space id */
- ulint offset);/* in: offset of the page within space */
-/***********************************************************************
-Increments the pool clock by one and returns its new value. Remember that
-in the 32 bit version the clock wraps around at 4 billion! */
-UNIV_INLINE
-ulint
-buf_pool_clock_tic(void);
-/*====================*/
- /* out: new clock value */
-/*************************************************************************
-Gets the current length of the free list of buffer blocks. */
-
-ulint
-buf_get_free_list_len(void);
-/*=======================*/
-
-
-
-/* The buffer control block structure */
-
-struct buf_block_struct{
-
- /* 1. General fields */
-
- ulint magic_n; /* magic number to check */
- ulint state; /* state of the control block:
- BUF_BLOCK_NOT_USED, ...; changing
- this is only allowed when a thread
- has BOTH the buffer pool mutex AND
- block->mutex locked */
- byte* frame; /* pointer to buffer frame which
- is of size UNIV_PAGE_SIZE, and
- aligned to an address divisible by
- UNIV_PAGE_SIZE; if AWE is used, this
- will be NULL for the pages which are
- currently not mapped into the virtual
- address space window of the buffer
- pool */
- os_awe_t* awe_info; /* if AWE is used, then an array of
- awe page infos for
- UNIV_PAGE_SIZE / OS_AWE_X86_PAGE_SIZE
- (normally = 4) physical memory
- pages; otherwise NULL */
- ulint space; /* space id of the page */
- ulint offset; /* page number within the space */
- ulint lock_hash_val; /* hashed value of the page address
- in the record lock hash table */
- mutex_t mutex; /* mutex protecting this block:
- state (also protected by the buffer
- pool mutex), io_fix, buf_fix_count,
- and accessed; we introduce this new
- mutex in InnoDB-5.1 to relieve
- contention on the buffer pool mutex */
- rw_lock_t lock; /* read-write lock of the buffer
- frame */
- buf_block_t* hash; /* node used in chaining to the page
- hash table */
- ibool check_index_page_at_flush;
- /* TRUE if we know that this is
- an index page, and want the database
- to check its consistency before flush;
- note that there may be pages in the
- buffer pool which are index pages,
- but this flag is not set because
- we do not keep track of all pages */
- /* 2. Page flushing fields */
-
- UT_LIST_NODE_T(buf_block_t) flush_list;
- /* node of the modified, not yet
- flushed blocks list */
- dulint newest_modification;
- /* log sequence number of the youngest
- modification to this block, zero if
- not modified */
- dulint oldest_modification;
- /* log sequence number of the START of
- the log entry written of the oldest
- modification to this block which has
- not yet been flushed on disk; zero if
- all modifications are on disk */
- ulint flush_type; /* if this block is currently being
- flushed to disk, this tells the
- flush_type: BUF_FLUSH_LRU or
- BUF_FLUSH_LIST */
-
- /* 3. LRU replacement algorithm fields */
-
- UT_LIST_NODE_T(buf_block_t) free;
- /* node of the free block list */
- ibool in_free_list; /* TRUE if in the free list; used in
- debugging */
- UT_LIST_NODE_T(buf_block_t) LRU;
- /* node of the LRU list */
- UT_LIST_NODE_T(buf_block_t) awe_LRU_free_mapped;
- /* in the AWE version node in the
- list of free and LRU blocks which are
- mapped to a frame */
- ibool in_LRU_list; /* TRUE of the page is in the LRU list;
- used in debugging */
- ulint LRU_position; /* value which monotonically
- decreases (or may stay constant if
- the block is in the old blocks) toward
- the end of the LRU list, if the pool
- ulint_clock has not wrapped around:
- NOTE that this value can only be used
- in heuristic algorithms, because of
- the possibility of a wrap-around! */
- ulint freed_page_clock;/* the value of freed_page_clock
- of the buffer pool when this block was
- the last time put to the head of the
- LRU list; a thread is allowed to
- read this for heuristic purposes
- without holding any mutex or latch */
- ibool old; /* TRUE if the block is in the old
- blocks in the LRU list */
- ibool accessed; /* TRUE if the page has been accessed
- while in the buffer pool: read-ahead
- may read in pages which have not been
- accessed yet; this is protected by
- block->mutex; a thread is allowed to
- read this for heuristic purposes
- without holding any mutex or latch */
- ulint buf_fix_count; /* count of how manyfold this block
- is currently bufferfixed; this is
- protected by block->mutex */
- ulint io_fix; /* if a read is pending to the frame,
- io_fix is BUF_IO_READ, in the case
- of a write BUF_IO_WRITE, otherwise 0;
- this is protected by block->mutex */
- /* 4. Optimistic search field */
-
- dulint modify_clock; /* this clock is incremented every
- time a pointer to a record on the
- page may become obsolete; this is
- used in the optimistic cursor
- positioning: if the modify clock has
- not changed, we know that the pointer
- is still valid; this field may be
- changed if the thread (1) owns the
- pool mutex and the page is not
- bufferfixed, or (2) the thread has an
- x-latch on the block */
-
- /* 5. Hash search fields: NOTE that the first 4 fields are NOT
- protected by any semaphore! */
-
- ulint n_hash_helps; /* counter which controls building
- of a new hash index for the page */
- ulint n_fields; /* recommended prefix length for hash
- search: number of full fields */
- ulint n_bytes; /* recommended prefix: number of bytes
- in an incomplete field */
- ibool left_side; /* TRUE or FALSE, depending on
- whether the leftmost record of several
- records with the same prefix should be
- indexed in the hash index */
-
- /* These 6 fields may only be modified when we have
- an x-latch on btr_search_latch AND
- a) we are holding an s-latch or x-latch on block->lock or
- b) we know that block->buf_fix_count == 0.
-
- An exception to this is when we init or create a page
- in the buffer pool in buf0buf.c. */
-
- ibool is_hashed; /* TRUE if hash index has already been
- built on this page; note that it does
- not guarantee that the index is
- complete, though: there may have been
- hash collisions, record deletions,
- etc. */
- ulint n_pointers; /* used in debugging: the number of
- pointers in the adaptive hash index
- pointing to this frame */
- ulint curr_n_fields; /* prefix length for hash indexing:
- number of full fields */
- ulint curr_n_bytes; /* number of bytes in hash indexing */
- ibool curr_left_side; /* TRUE or FALSE in hash indexing */
- dict_index_t* index; /* Index for which the adaptive
- hash index has been created. */
- /* 6. Debug fields */
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_t debug_latch; /* in the debug version, each thread
- which bufferfixes the block acquires
- an s-latch here; so we can use the
- debug utilities in sync0rw */
-#endif
- ibool file_page_was_freed;
- /* this is set to TRUE when fsp
- frees a page in buffer pool */
-};
-
-#define BUF_BLOCK_MAGIC_N 41526563
-
-/* The buffer pool structure. NOTE! The definition appears here only for
-other modules of this directory (buf) to see it. Do not use from outside! */
-
-struct buf_pool_struct{
-
- /* 1. General fields */
-
- mutex_t mutex; /* mutex protecting the buffer pool
- struct and control blocks, except the
- read-write lock in them */
- byte* frame_mem; /* pointer to the memory area which
- was allocated for the frames; in AWE
- this is the virtual address space
- window where we map pages stored
- in physical memory */
- byte* frame_zero; /* pointer to the first buffer frame:
- this may differ from frame_mem, because
- this is aligned by the frame size */
- byte* high_end; /* pointer to the end of the buffer
- frames */
- ulint n_frames; /* number of frames */
- buf_block_t* blocks; /* array of buffer control blocks */
- buf_block_t** blocks_of_frames;/* inverse mapping which can be used
- to retrieve the buffer control block
- of a frame; this is an array which
- lists the blocks of frames in the
- order frame_zero,
- frame_zero + UNIV_PAGE_SIZE, ...
- a control block is always assigned
- for each frame, even if the frame does
- not contain any data; note that in AWE
- there are more control blocks than
- buffer frames */
- os_awe_t* awe_info; /* if AWE is used, AWE info for the
- physical 4 kB memory pages associated
- with buffer frames */
- ulint max_size; /* number of control blocks ==
- maximum pool size in pages */
- ulint curr_size; /* current pool size in pages;
- currently always the same as
- max_size */
- hash_table_t* page_hash; /* hash table of the file pages */
-
- ulint n_pend_reads; /* number of pending read operations */
-
- time_t last_printout_time; /* when buf_print was last time
- called */
- ulint n_pages_read; /* number read operations */
- ulint n_pages_written;/* number write operations */
- ulint n_pages_created;/* number of pages created in the pool
- with no read */
- ulint n_page_gets; /* number of page gets performed;
- also successful searches through
- the adaptive hash index are
- counted as page gets; this field
- is NOT protected by the buffer
- pool mutex */
- ulint n_pages_awe_remapped; /* if AWE is enabled, the
- number of remaps of blocks to
- buffer frames */
- ulint n_page_gets_old;/* n_page_gets when buf_print was
- last time called: used to calculate
- hit rate */
- ulint n_pages_read_old;/* n_pages_read when buf_print was
- last time called */
- ulint n_pages_written_old;/* number write operations */
- ulint n_pages_created_old;/* number of pages created in
- the pool with no read */
- ulint n_pages_awe_remapped_old;
- /* 2. Page flushing algorithm fields */
-
- UT_LIST_BASE_NODE_T(buf_block_t) flush_list;
- /* base node of the modified block
- list */
- ibool init_flush[BUF_FLUSH_LIST + 1];
- /* this is TRUE when a flush of the
- given type is being initialized */
- ulint n_flush[BUF_FLUSH_LIST + 1];
- /* this is the number of pending
- writes in the given flush type */
- os_event_t no_flush[BUF_FLUSH_LIST + 1];
- /* this is in the set state when there
- is no flush batch of the given type
- running */
- ulint ulint_clock; /* a sequence number used to count
- time. NOTE! This counter wraps
- around at 4 billion (if ulint ==
- 32 bits)! */
- ulint freed_page_clock;/* a sequence number used to count the
- number of buffer blocks removed from
- the end of the LRU list; NOTE that
- this counter may wrap around at 4
- billion! A thread is allowed to
- read this for heuristic purposes
- without holding any mutex or latch */
- ulint LRU_flush_ended;/* when an LRU flush ends for a page,
- this is incremented by one; this is
- set to zero when a buffer block is
- allocated */
-
- /* 3. LRU replacement algorithm fields */
-
- UT_LIST_BASE_NODE_T(buf_block_t) free;
- /* base node of the free block list;
- in the case of AWE, at the start are
- always free blocks for which the
- physical memory is mapped to a frame */
- UT_LIST_BASE_NODE_T(buf_block_t) LRU;
- /* base node of the LRU list */
- buf_block_t* LRU_old; /* pointer to the about 3/8 oldest
- blocks in the LRU list; NULL if LRU
- length less than BUF_LRU_OLD_MIN_LEN */
- ulint LRU_old_len; /* length of the LRU list from
- the block to which LRU_old points
- onward, including that block;
- see buf0lru.c for the restrictions
- on this value; not defined if
- LRU_old == NULL */
- UT_LIST_BASE_NODE_T(buf_block_t) awe_LRU_free_mapped;
- /* list of those blocks which are
- in the LRU list or the free list, and
- where the page is mapped to a frame;
- thus, frames allocated, e.g., to the
- locki table, are not in this list */
-};
-
-/* States of a control block */
-#define BUF_BLOCK_NOT_USED 211 /* is in the free list */
-#define BUF_BLOCK_READY_FOR_USE 212 /* when buf_get_free_block returns
- a block, it is in this state */
-#define BUF_BLOCK_FILE_PAGE 213 /* contains a buffered file page */
-#define BUF_BLOCK_MEMORY 214 /* contains some main memory object */
-#define BUF_BLOCK_REMOVE_HASH 215 /* hash index should be removed
- before putting to the free list */
-
-/* Io_fix states of a control block; these must be != 0 */
-#define BUF_IO_READ 561
-#define BUF_IO_WRITE 562
-
-/************************************************************************
-Let us list the consistency conditions for different control block states.
-
-NOT_USED: is in free list, not in LRU list, not in flush list, nor
- page hash table
-READY_FOR_USE: is not in free list, LRU list, or flush list, nor page
- hash table
-MEMORY: is not in free list, LRU list, or flush list, nor page
- hash table
-FILE_PAGE: space and offset are defined, is in page hash table
- if io_fix == BUF_IO_WRITE,
- pool: no_flush[block->flush_type] is in reset state,
- pool: n_flush[block->flush_type] > 0
-
- (1) if buf_fix_count == 0, then
- is in LRU list, not in free list
- is in flush list,
- if and only if oldest_modification > 0
- is x-locked,
- if and only if io_fix == BUF_IO_READ
- is s-locked,
- if and only if io_fix == BUF_IO_WRITE
-
- (2) if buf_fix_count > 0, then
- is not in LRU list, not in free list
- is in flush list,
- if and only if oldest_modification > 0
- if io_fix == BUF_IO_READ,
- is x-locked
- if io_fix == BUF_IO_WRITE,
- is s-locked
-
-State transitions:
-
-NOT_USED => READY_FOR_USE
-READY_FOR_USE => MEMORY
-READY_FOR_USE => FILE_PAGE
-MEMORY => NOT_USED
-FILE_PAGE => NOT_USED NOTE: This transition is allowed if and only if
- (1) buf_fix_count == 0,
- (2) oldest_modification == 0, and
- (3) io_fix == 0.
-*/
-
-#ifndef UNIV_NONINL
-#include "buf0buf.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic
deleted file mode 100644
index 4e96e13b8dc..00000000000
--- a/storage/innobase/include/buf0buf.ic
+++ /dev/null
@@ -1,665 +0,0 @@
-/******************************************************
-The database buffer buf_pool
-
-(c) 1995 Innobase Oy
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
-#include "buf0flu.h"
-#include "buf0lru.h"
-#include "buf0rea.h"
-#include "mtr0mtr.h"
-
-#ifdef UNIV_DEBUG
-extern ulint buf_dbg_counter; /* This is used to insert validation
- operations in execution in the
- debug version */
-#endif /* UNIV_DEBUG */
-/************************************************************************
-Recommends a move of a block to the start of the LRU list if there is danger
-of dropping from the buffer pool. NOTE: does not reserve the buffer pool
-mutex. */
-UNIV_INLINE
-ibool
-buf_block_peek_if_too_old(
-/*======================*/
- /* out: TRUE if should be made younger */
- buf_block_t* block) /* in: block to make younger */
-{
- return(buf_pool->freed_page_clock >= block->freed_page_clock
- + 1 + (buf_pool->curr_size / 4));
-}
-
-/*************************************************************************
-Gets the current size of buffer buf_pool in bytes. In the case of AWE, the
-size of AWE window (= the frames). */
-UNIV_INLINE
-ulint
-buf_pool_get_curr_size(void)
-/*========================*/
- /* out: size in bytes */
-{
- return((buf_pool->n_frames) * UNIV_PAGE_SIZE);
-}
-
-/*************************************************************************
-Gets the maximum size of buffer buf_pool in bytes. In the case of AWE, the
-size of AWE window (= the frames). */
-UNIV_INLINE
-ulint
-buf_pool_get_max_size(void)
-/*=======================*/
- /* out: size in bytes */
-{
- return((buf_pool->n_frames) * UNIV_PAGE_SIZE);
-}
-
-/***********************************************************************
-Accessor function for block array. */
-UNIV_INLINE
-buf_block_t*
-buf_pool_get_nth_block(
-/*===================*/
- /* out: pointer to block */
- buf_pool_t* buf_pool,/* in: buf_pool */
- ulint i) /* in: index of the block */
-{
- ut_ad(buf_pool);
- ut_ad(i < buf_pool->max_size);
-
- return(i + buf_pool->blocks);
-}
-
-/***********************************************************************
-Checks if a pointer points to the block array of the buffer pool (blocks, not
-the frames). */
-UNIV_INLINE
-ibool
-buf_pool_is_block(
-/*==============*/
- /* out: TRUE if pointer to block */
- void* ptr) /* in: pointer to memory */
-{
- if ((buf_pool->blocks <= (buf_block_t*)ptr)
- && ((buf_block_t*)ptr < buf_pool->blocks
- + buf_pool->max_size)) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/************************************************************************
-Gets the smallest oldest_modification lsn for any page in the pool. Returns
-ut_dulint_zero if all modified pages have been flushed to disk. */
-UNIV_INLINE
-dulint
-buf_pool_get_oldest_modification(void)
-/*==================================*/
- /* out: oldest modification in pool,
- ut_dulint_zero if none */
-{
- buf_block_t* block;
- dulint lsn;
-
- mutex_enter(&(buf_pool->mutex));
-
- block = UT_LIST_GET_LAST(buf_pool->flush_list);
-
- if (block == NULL) {
- lsn = ut_dulint_zero;
- } else {
- lsn = block->oldest_modification;
- }
-
- mutex_exit(&(buf_pool->mutex));
-
- return(lsn);
-}
-
-/***********************************************************************
-Increments the buf_pool clock by one and returns its new value. Remember
-that in the 32 bit version the clock wraps around at 4 billion! */
-UNIV_INLINE
-ulint
-buf_pool_clock_tic(void)
-/*====================*/
- /* out: new clock value */
-{
- ut_ad(mutex_own(&(buf_pool->mutex)));
-
- buf_pool->ulint_clock++;
-
- return(buf_pool->ulint_clock);
-}
-
-/*************************************************************************
-Gets a pointer to the memory frame of a block. */
-UNIV_INLINE
-buf_frame_t*
-buf_block_get_frame(
-/*================*/
- /* out: pointer to the frame */
- buf_block_t* block) /* in: pointer to the control block */
-{
- ut_ad(block);
- ut_ad(block >= buf_pool->blocks);
- ut_ad(block < buf_pool->blocks + buf_pool->max_size);
- ut_ad(block->state != BUF_BLOCK_NOT_USED);
- ut_ad((block->state != BUF_BLOCK_FILE_PAGE)
- || (block->buf_fix_count > 0));
-
- return(block->frame);
-}
-
-/*************************************************************************
-Gets the space id of a block. */
-UNIV_INLINE
-ulint
-buf_block_get_space(
-/*================*/
- /* out: space id */
- buf_block_t* block) /* in: pointer to the control block */
-{
- ut_ad(block);
- ut_ad(block >= buf_pool->blocks);
- ut_ad(block < buf_pool->blocks + buf_pool->max_size);
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
- ut_ad(block->buf_fix_count > 0);
-
- return(block->space);
-}
-
-/*************************************************************************
-Gets the page number of a block. */
-UNIV_INLINE
-ulint
-buf_block_get_page_no(
-/*==================*/
- /* out: page number */
- buf_block_t* block) /* in: pointer to the control block */
-{
- ut_ad(block);
- ut_ad(block >= buf_pool->blocks);
- ut_ad(block < buf_pool->blocks + buf_pool->max_size);
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
- ut_ad(block->buf_fix_count > 0);
-
- return(block->offset);
-}
-
-/***********************************************************************
-Gets the block to whose frame the pointer is pointing to. */
-UNIV_INLINE
-buf_block_t*
-buf_block_align(
-/*============*/
- /* out: pointer to block */
- byte* ptr) /* in: pointer to a frame */
-{
- buf_block_t* block;
- buf_frame_t* frame_zero;
-
- ut_ad(ptr);
-
- frame_zero = buf_pool->frame_zero;
-
- if (UNIV_UNLIKELY((ulint)ptr < (ulint)frame_zero)
- || UNIV_UNLIKELY((ulint)ptr > (ulint)(buf_pool->high_end))) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- "InnoDB: Error: trying to access a stray pointer %p\n"
- "InnoDB: buf pool start is at %p, end at %p\n"
- "InnoDB: Probable reason is database corruption"
- " or memory\n"
- "InnoDB: corruption. If this happens in an"
- " InnoDB database recovery, see\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "forcing-recovery.html\n"
- "InnoDB: how to force recovery.\n",
- ptr, frame_zero,
- buf_pool->high_end);
- ut_error;
- }
-
- block = *(buf_pool->blocks_of_frames + (((ulint)(ptr - frame_zero))
- >> UNIV_PAGE_SIZE_SHIFT));
- return(block);
-}
-
-/***********************************************************************
-Gets the frame the pointer is pointing to. */
-UNIV_INLINE
-buf_frame_t*
-buf_frame_align(
-/*============*/
- /* out: pointer to frame */
- byte* ptr) /* in: pointer to a frame */
-{
- buf_frame_t* frame;
-
- ut_ad(ptr);
-
- frame = ut_align_down(ptr, UNIV_PAGE_SIZE);
-
- if (UNIV_UNLIKELY((ulint)frame < (ulint)(buf_pool->frame_zero))
- || UNIV_UNLIKELY((ulint)frame >= (ulint)(buf_pool->high_end))) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- "InnoDB: Error: trying to access a stray pointer %p\n"
- "InnoDB: buf pool start is at %p, end at %p\n"
- "InnoDB: Probable reason is database corruption"
- " or memory\n"
- "InnoDB: corruption. If this happens in an"
- " InnoDB database recovery, see\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "forcing-recovery.html\n"
- "InnoDB: how to force recovery.\n",
- ptr, buf_pool->frame_zero,
- buf_pool->high_end);
- ut_error;
- }
-
- return(frame);
-}
-
-/**************************************************************************
-Gets the page number of a pointer pointing within a buffer frame containing
-a file page. */
-UNIV_INLINE
-ulint
-buf_frame_get_page_no(
-/*==================*/
- /* out: page number */
- byte* ptr) /* in: pointer to within a buffer frame */
-{
- return(buf_block_get_page_no(buf_block_align(ptr)));
-}
-
-/**************************************************************************
-Gets the space id of a pointer pointing within a buffer frame containing a
-file page. */
-UNIV_INLINE
-ulint
-buf_frame_get_space_id(
-/*===================*/
- /* out: space id */
- byte* ptr) /* in: pointer to within a buffer frame */
-{
- return(buf_block_get_space(buf_block_align(ptr)));
-}
-
-/**************************************************************************
-Gets the space id, page offset, and byte offset within page of a
-pointer pointing to a buffer frame containing a file page. */
-UNIV_INLINE
-void
-buf_ptr_get_fsp_addr(
-/*=================*/
- byte* ptr, /* in: pointer to a buffer frame */
- ulint* space, /* out: space id */
- fil_addr_t* addr) /* out: page offset and byte offset */
-{
- buf_block_t* block;
-
- block = buf_block_align(ptr);
-
- *space = buf_block_get_space(block);
- addr->page = buf_block_get_page_no(block);
- addr->boffset = ptr - buf_frame_align(ptr);
-}
-
-/**************************************************************************
-Gets the hash value of the page the pointer is pointing to. This can be used
-in searches in the lock hash table. */
-UNIV_INLINE
-ulint
-buf_frame_get_lock_hash_val(
-/*========================*/
- /* out: lock hash value */
- byte* ptr) /* in: pointer to within a buffer frame */
-{
- buf_block_t* block;
-
- block = buf_block_align(ptr);
-
- return(block->lock_hash_val);
-}
-
-/**************************************************************************
-Gets the mutex number protecting the page record lock hash chain in the lock
-table. */
-UNIV_INLINE
-mutex_t*
-buf_frame_get_mutex(
-/*================*/
- /* out: mutex */
- byte* ptr) /* in: pointer to within a buffer frame */
-{
- buf_block_t* block;
-
- block = buf_block_align(ptr);
-
- return(&block->mutex);
-}
-
-/*************************************************************************
-Copies contents of a buffer frame to a given buffer. */
-UNIV_INLINE
-byte*
-buf_frame_copy(
-/*===========*/
- /* out: buf */
- byte* buf, /* in: buffer to copy to */
- buf_frame_t* frame) /* in: buffer frame */
-{
- ut_ad(buf && frame);
-
- ut_memcpy(buf, frame, UNIV_PAGE_SIZE);
-
- return(buf);
-}
-
-/************************************************************************
-Calculates a folded value of a file page address to use in the page hash
-table. */
-UNIV_INLINE
-ulint
-buf_page_address_fold(
-/*==================*/
- /* out: the folded value */
- ulint space, /* in: space id */
- ulint offset) /* in: offset of the page within space */
-{
- return((space << 20) + space + offset);
-}
-
-/************************************************************************
-This function is used to get info if there is an io operation
-going on on a buffer page. */
-UNIV_INLINE
-ibool
-buf_page_io_query(
-/*==============*/
- /* out: TRUE if io going on */
- buf_block_t* block) /* in: buf_pool block, must be bufferfixed */
-{
- mutex_enter(&(buf_pool->mutex));
-
- ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
- ut_ad(block->buf_fix_count > 0);
-
- if (block->io_fix != 0) {
- mutex_exit(&(buf_pool->mutex));
-
- return(TRUE);
- }
-
- mutex_exit(&(buf_pool->mutex));
-
- return(FALSE);
-}
-
-/************************************************************************
-Gets the youngest modification log sequence number for a frame. Returns zero
-if not a file page or no modification occurred yet. */
-UNIV_INLINE
-dulint
-buf_frame_get_newest_modification(
-/*==============================*/
- /* out: newest modification to the page */
- buf_frame_t* frame) /* in: pointer to a frame */
-{
- buf_block_t* block;
- dulint lsn;
-
- ut_ad(frame);
-
- block = buf_block_align(frame);
-
- mutex_enter(&(buf_pool->mutex));
-
- if (block->state == BUF_BLOCK_FILE_PAGE) {
- lsn = block->newest_modification;
- } else {
- lsn = ut_dulint_zero;
- }
-
- mutex_exit(&(buf_pool->mutex));
-
- return(lsn);
-}
-
-/************************************************************************
-Increments the modify clock of a frame by 1. The caller must (1) own the
-buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
-on the block. */
-UNIV_INLINE
-dulint
-buf_frame_modify_clock_inc(
-/*=======================*/
- /* out: new value */
- buf_frame_t* frame) /* in: pointer to a frame */
-{
- buf_block_t* block;
-
- ut_ad(frame);
-
- block = buf_block_align(frame);
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad((mutex_own(&(buf_pool->mutex)) && (block->buf_fix_count == 0))
- || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
-#endif /* UNIV_SYNC_DEBUG */
-
- UT_DULINT_INC(block->modify_clock);
-
- return(block->modify_clock);
-}
-
-/************************************************************************
-Increments the modify clock of a frame by 1. The caller must (1) own the
-buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
-on the block. */
-UNIV_INLINE
-dulint
-buf_block_modify_clock_inc(
-/*=======================*/
- /* out: new value */
- buf_block_t* block) /* in: block */
-{
-#ifdef UNIV_SYNC_DEBUG
- ut_ad((mutex_own(&(buf_pool->mutex)) && (block->buf_fix_count == 0))
- || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
-#endif /* UNIV_SYNC_DEBUG */
-
- UT_DULINT_INC(block->modify_clock);
-
- return(block->modify_clock);
-}
-
-/************************************************************************
-Returns the value of the modify clock. The caller must have an s-lock
-or x-lock on the block. */
-UNIV_INLINE
-dulint
-buf_block_get_modify_clock(
-/*=======================*/
- /* out: value */
- buf_block_t* block) /* in: block */
-{
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
- || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
-#endif /* UNIV_SYNC_DEBUG */
-
- return(block->modify_clock);
-}
-
-#ifdef UNIV_SYNC_DEBUG
-/***********************************************************************
-Increments the bufferfix count. */
-UNIV_INLINE
-void
-buf_block_buf_fix_inc_debug(
-/*========================*/
- buf_block_t* block, /* in: block to bufferfix */
- const char* file __attribute__ ((unused)), /* in: file name */
- ulint line __attribute__ ((unused))) /* in: line */
-{
- ibool ret;
-
- ret = rw_lock_s_lock_nowait(&(block->debug_latch), file, line);
-
- ut_ad(ret == TRUE);
- ut_ad(mutex_own(&block->mutex));
- block->buf_fix_count++;
-}
-#else /* UNIV_SYNC_DEBUG */
-/***********************************************************************
-Increments the bufferfix count. */
-UNIV_INLINE
-void
-buf_block_buf_fix_inc(
-/*==================*/
- buf_block_t* block) /* in: block to bufferfix */
-{
- ut_ad(mutex_own(&block->mutex));
-
- block->buf_fix_count++;
-}
-#endif /* UNIV_SYNC_DEBUG */
-/**********************************************************************
-Returns the control block of a file page, NULL if not found. */
-UNIV_INLINE
-buf_block_t*
-buf_page_hash_get(
-/*==============*/
- /* out: block, NULL if not found */
- ulint space, /* in: space id */
- ulint offset) /* in: offset of the page within space */
-{
- buf_block_t* block;
- ulint fold;
-
- ut_ad(buf_pool);
- ut_ad(mutex_own(&(buf_pool->mutex)));
-
- /* Look for the page in the hash table */
-
- fold = buf_page_address_fold(space, offset);
-
- HASH_SEARCH(hash, buf_pool->page_hash, fold, block,
- (block->space == space) && (block->offset == offset));
- ut_a(block == NULL || block->state == BUF_BLOCK_FILE_PAGE);
-
- return(block);
-}
-
-/************************************************************************
-Tries to get the page, but if file io is required, releases all latches
-in mtr down to the given savepoint. If io is required, this function
-retrieves the page to buffer buf_pool, but does not bufferfix it or latch
-it. */
-UNIV_INLINE
-buf_frame_t*
-buf_page_get_release_on_io(
-/*=======================*/
- /* out: pointer to the frame, or NULL
- if not in buffer buf_pool */
- ulint space, /* in: space id */
- ulint offset, /* in: offset of the page within space
- in units of a page */
- buf_frame_t* guess, /* in: guessed frame or NULL */
- ulint rw_latch, /* in: RW_X_LATCH, RW_S_LATCH,
- or RW_NO_LATCH */
- ulint savepoint, /* in: mtr savepoint */
- mtr_t* mtr) /* in: mtr */
-{
- buf_frame_t* frame;
-
- frame = buf_page_get_gen(space, offset, rw_latch, guess,
- BUF_GET_IF_IN_POOL,
- __FILE__, __LINE__,
- mtr);
- if (frame != NULL) {
-
- return(frame);
- }
-
- /* The page was not in the buffer buf_pool: release the latches
- down to the savepoint */
-
- mtr_rollback_to_savepoint(mtr, savepoint);
-
- buf_page_get(space, offset, RW_S_LATCH, mtr);
-
- /* When we get here, the page is in buffer, but we release
- the latches again down to the savepoint, before returning */
-
- mtr_rollback_to_savepoint(mtr, savepoint);
-
- return(NULL);
-}
-
-/************************************************************************
-Decrements the bufferfix count of a buffer control block and releases
-a latch, if specified. */
-UNIV_INLINE
-void
-buf_page_release(
-/*=============*/
- buf_block_t* block, /* in: buffer block */
- ulint rw_latch, /* in: RW_S_LATCH, RW_X_LATCH,
- RW_NO_LATCH */
- mtr_t* mtr) /* in: mtr */
-{
- ut_ad(block);
-
- ut_a(block->state == BUF_BLOCK_FILE_PAGE);
- ut_a(block->buf_fix_count > 0);
-
- if (rw_latch == RW_X_LATCH && mtr->modifications) {
- mutex_enter(&buf_pool->mutex);
- buf_flush_note_modification(block, mtr);
- mutex_exit(&buf_pool->mutex);
- }
-
- mutex_enter(&block->mutex);
-
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_s_unlock(&(block->debug_latch));
-#endif
- block->buf_fix_count--;
-
- mutex_exit(&block->mutex);
-
- if (rw_latch == RW_S_LATCH) {
- rw_lock_s_unlock(&(block->lock));
- } else if (rw_latch == RW_X_LATCH) {
- rw_lock_x_unlock(&(block->lock));
- }
-}
-
-#ifdef UNIV_SYNC_DEBUG
-/*************************************************************************
-Adds latch level info for the rw-lock protecting the buffer frame. This
-should be called in the debug version after a successful latching of a
-page if we know the latching order level of the acquired latch. If
-UNIV_SYNC_DEBUG is not defined, compiles to an empty function. */
-UNIV_INLINE
-void
-buf_page_dbg_add_level(
-/*===================*/
- buf_frame_t* frame __attribute__((unused)), /* in: buffer page
- where we have acquired latch */
- ulint level __attribute__((unused))) /* in: latching order
- level */
-{
- sync_thread_add_level(&(buf_block_align(frame)->lock), level);
-}
-#endif /* UNIV_SYNC_DEBUG */
diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h
deleted file mode 100644
index 322848509f4..00000000000
--- a/storage/innobase/include/buf0flu.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/******************************************************
-The database buffer pool flush algorithm
-
-(c) 1995 Innobase Oy
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef buf0flu_h
-#define buf0flu_h
-
-#include "univ.i"
-#include "buf0types.h"
-#include "ut0byte.h"
-#include "mtr0types.h"
-
-/************************************************************************
-Updates the flush system data structures when a write is completed. */
-
-void
-buf_flush_write_complete(
-/*=====================*/
- buf_block_t* block); /* in: pointer to the block in question */
-/*************************************************************************
-Flushes pages from the end of the LRU list if there is too small
-a margin of replaceable pages there. */
-
-void
-buf_flush_free_margin(void);
-/*=======================*/
-/************************************************************************
-Initializes a page for writing to the tablespace. */
-
-void
-buf_flush_init_for_writing(
-/*=======================*/
- byte* page, /* in: page */
- dulint newest_lsn, /* in: newest modification lsn to the page */
- ulint space, /* in: space id */
- ulint page_no); /* in: page number */
-/***********************************************************************
-This utility flushes dirty blocks from the end of the LRU list or flush_list.
-NOTE 1: in the case of an LRU flush the calling thread may own latches to
-pages: to avoid deadlocks, this function must be written so that it cannot
-end up waiting for these latches! NOTE 2: in the case of a flush list flush,
-the calling thread is not allowed to own any latches on pages! */
-
-ulint
-buf_flush_batch(
-/*============*/
- /* out: number of blocks for which the write
- request was queued */
- ulint flush_type, /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST; if
- BUF_FLUSH_LIST, then the caller must not own
- any latches on pages */
- ulint min_n, /* in: wished minimum mumber of blocks flushed
- (it is not guaranteed that the actual number
- is that big, though) */
- dulint lsn_limit); /* in the case BUF_FLUSH_LIST all blocks whose
- oldest_modification is smaller than this
- should be flushed (if their number does not
- exceed min_n), otherwise ignored */
-/**********************************************************************
-Waits until a flush batch of the given type ends */
-
-void
-buf_flush_wait_batch_end(
-/*=====================*/
- ulint type); /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
-/************************************************************************
-This function should be called at a mini-transaction commit, if a page was
-modified in it. Puts the block to the list of modified blocks, if it not
-already in it. */
-UNIV_INLINE
-void
-buf_flush_note_modification(
-/*========================*/
- buf_block_t* block, /* in: block which is modified */
- mtr_t* mtr); /* in: mtr */
-/************************************************************************
-This function should be called when recovery has modified a buffer page. */
-UNIV_INLINE
-void
-buf_flush_recv_note_modification(
-/*=============================*/
- buf_block_t* block, /* in: block which is modified */
- dulint start_lsn, /* in: start lsn of the first mtr in a
- set of mtr's */
- dulint end_lsn); /* in: end lsn of the last mtr in the
- set of mtr's */
-/************************************************************************
-Returns TRUE if the file page block is immediately suitable for replacement,
-i.e., transition FILE_PAGE => NOT_USED allowed. */
-ibool
-buf_flush_ready_for_replace(
-/*========================*/
- /* out: TRUE if can replace immediately */
- buf_block_t* block); /* in: buffer control block, must be in state
- BUF_BLOCK_FILE_PAGE and in the LRU list */
-/**********************************************************************
-Validates the flush list. */
-
-ibool
-buf_flush_validate(void);
-/*====================*/
- /* out: TRUE if ok */
-
-/* When buf_flush_free_margin is called, it tries to make this many blocks
-available to replacement in the free list and at the end of the LRU list (to
-make sure that a read-ahead batch can be read efficiently in a single
-sweep). */
-
-#define BUF_FLUSH_FREE_BLOCK_MARGIN (5 + BUF_READ_AHEAD_AREA)
-#define BUF_FLUSH_EXTRA_MARGIN (BUF_FLUSH_FREE_BLOCK_MARGIN / 4 + 100)
-
-#ifndef UNIV_NONINL
-#include "buf0flu.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/buf0flu.ic b/storage/innobase/include/buf0flu.ic
deleted file mode 100644
index ae873c42088..00000000000
--- a/storage/innobase/include/buf0flu.ic
+++ /dev/null
@@ -1,106 +0,0 @@
-/******************************************************
-The database buffer pool flush algorithm
-
-(c) 1995 Innobase Oy
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
-#include "buf0buf.h"
-#include "mtr0mtr.h"
-
-/************************************************************************
-Inserts a modified block into the flush list. */
-
-void
-buf_flush_insert_into_flush_list(
-/*=============================*/
- buf_block_t* block); /* in: block which is modified */
-/************************************************************************
-Inserts a modified block into the flush list in the right sorted position.
-This function is used by recovery, because there the modifications do not
-necessarily come in the order of lsn's. */
-
-void
-buf_flush_insert_sorted_into_flush_list(
-/*====================================*/
- buf_block_t* block); /* in: block which is modified */
-
-/************************************************************************
-This function should be called at a mini-transaction commit, if a page was
-modified in it. Puts the block to the list of modified blocks, if it is not
-already in it. */
-UNIV_INLINE
-void
-buf_flush_note_modification(
-/*========================*/
- buf_block_t* block, /* in: block which is modified */
- mtr_t* mtr) /* in: mtr */
-{
- ut_ad(block);
- ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
- ut_ad(block->buf_fix_count > 0);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(mutex_own(&(buf_pool->mutex)));
-
- ut_ad(ut_dulint_cmp(mtr->start_lsn, ut_dulint_zero) != 0);
- ut_ad(mtr->modifications);
- ut_ad(ut_dulint_cmp(block->newest_modification, mtr->end_lsn) <= 0);
-
- block->newest_modification = mtr->end_lsn;
-
- if (ut_dulint_is_zero(block->oldest_modification)) {
-
- block->oldest_modification = mtr->start_lsn;
- ut_ad(!ut_dulint_is_zero(block->oldest_modification));
-
- buf_flush_insert_into_flush_list(block);
- } else {
- ut_ad(ut_dulint_cmp(block->oldest_modification,
- mtr->start_lsn) <= 0);
- }
-
- ++srv_buf_pool_write_requests;
-}
-
-/************************************************************************
-This function should be called when recovery has modified a buffer page. */
-UNIV_INLINE
-void
-buf_flush_recv_note_modification(
-/*=============================*/
- buf_block_t* block, /* in: block which is modified */
- dulint start_lsn, /* in: start lsn of the first mtr in a
- set of mtr's */
- dulint end_lsn) /* in: end lsn of the last mtr in the
- set of mtr's */
-{
- ut_ad(block);
- ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
- ut_ad(block->buf_fix_count > 0);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- mutex_enter(&(buf_pool->mutex));
-
- ut_ad(ut_dulint_cmp(block->newest_modification, end_lsn) <= 0);
-
- block->newest_modification = end_lsn;
-
- if (ut_dulint_is_zero(block->oldest_modification)) {
-
- block->oldest_modification = start_lsn;
-
- ut_ad(!ut_dulint_is_zero(block->oldest_modification));
-
- buf_flush_insert_sorted_into_flush_list(block);
- } else {
- ut_ad(ut_dulint_cmp(block->oldest_modification,
- start_lsn) <= 0);
- }
-
- mutex_exit(&(buf_pool->mutex));
-}
diff --git a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
deleted file mode 100644
index 6d26fd4d3b2..00000000000
--- a/storage/innobase/include/buf0lru.h
+++ /dev/null
@@ -1,144 +0,0 @@
-/******************************************************
-The database buffer pool LRU replacement algorithm
-
-(c) 1995 Innobase Oy
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef buf0lru_h
-#define buf0lru_h
-
-#include "univ.i"
-#include "ut0byte.h"
-#include "buf0types.h"
-
-/**********************************************************************
-Tries to remove LRU flushed blocks from the end of the LRU list and put them
-to the free list. This is beneficial for the efficiency of the insert buffer
-operation, as flushed pages from non-unique non-clustered indexes are here
-taken out of the buffer pool, and their inserts redirected to the insert
-buffer. Otherwise, the flushed blocks could get modified again before read
-operations need new buffer blocks, and the i/o work done in flushing would be
-wasted. */
-
-void
-buf_LRU_try_free_flushed_blocks(void);
-/*==================================*/
-/**********************************************************************
-Returns TRUE if less than 25 % of the buffer pool is available. This can be
-used in heuristics to prevent huge transactions eating up the whole buffer
-pool for their locks. */
-
-ibool
-buf_LRU_buf_pool_running_out(void);
-/*==============================*/
- /* out: TRUE if less than 25 % of buffer pool
- left */
-
-/*#######################################################################
-These are low-level functions
-#########################################################################*/
-
-/* Minimum LRU list length for which the LRU_old pointer is defined */
-
-#define BUF_LRU_OLD_MIN_LEN 80
-
-#define BUF_LRU_FREE_SEARCH_LEN (5 + 2 * BUF_READ_AHEAD_AREA)
-
-/**********************************************************************
-Invalidates all pages belonging to a given tablespace when we are deleting
-the data file(s) of that tablespace. A PROBLEM: if readahead is being started,
-what guarantees that it will not try to read in pages after this operation has
-completed? */
-
-void
-buf_LRU_invalidate_tablespace(
-/*==========================*/
- ulint id); /* in: space id */
-/**********************************************************************
-Gets the minimum LRU_position field for the blocks in an initial segment
-(determined by BUF_LRU_INITIAL_RATIO) of the LRU list. The limit is not
-guaranteed to be precise, because the ulint_clock may wrap around. */
-
-ulint
-buf_LRU_get_recent_limit(void);
-/*==========================*/
- /* out: the limit; zero if could not determine it */
-/**********************************************************************
-Look for a replaceable block from the end of the LRU list and put it to
-the free list if found. */
-
-ibool
-buf_LRU_search_and_free_block(
-/*==========================*/
- /* out: TRUE if freed */
- ulint n_iterations); /* in: how many times this has been called
- repeatedly without result: a high value means
- that we should search farther; if value is
- k < 10, then we only search k/10 * number
- of pages in the buffer pool from the end
- of the LRU list */
-/**********************************************************************
-Returns a free block from the buf_pool. The block is taken off the
-free list. If it is empty, blocks are moved from the end of the
-LRU list to the free list. */
-
-buf_block_t*
-buf_LRU_get_free_block(void);
-/*=========================*/
- /* out: the free control block; also if AWE is
- used, it is guaranteed that the block has its
- page mapped to a frame when we return */
-/**********************************************************************
-Puts a block back to the free list. */
-
-void
-buf_LRU_block_free_non_file_page(
-/*=============================*/
- buf_block_t* block); /* in: block, must not contain a file page */
-/**********************************************************************
-Adds a block to the LRU list. */
-
-void
-buf_LRU_add_block(
-/*==============*/
- buf_block_t* block, /* in: control block */
- ibool old); /* in: TRUE if should be put to the old
- blocks in the LRU list, else put to the
- start; if the LRU list is very short, added to
- the start regardless of this parameter */
-/**********************************************************************
-Moves a block to the start of the LRU list. */
-
-void
-buf_LRU_make_block_young(
-/*=====================*/
- buf_block_t* block); /* in: control block */
-/**********************************************************************
-Moves a block to the end of the LRU list. */
-
-void
-buf_LRU_make_block_old(
-/*===================*/
- buf_block_t* block); /* in: control block */
-#ifdef UNIV_DEBUG
-/**************************************************************************
-Validates the LRU list. */
-
-ibool
-buf_LRU_validate(void);
-/*==================*/
-/**************************************************************************
-Prints the LRU list. */
-
-void
-buf_LRU_print(void);
-/*===============*/
-#endif /* UNIV_DEBUG */
-
-#ifndef UNIV_NONINL
-#include "buf0lru.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/buf0lru.ic b/storage/innobase/include/buf0lru.ic
deleted file mode 100644
index 7b8ee457b0b..00000000000
--- a/storage/innobase/include/buf0lru.ic
+++ /dev/null
@@ -1,8 +0,0 @@
-/******************************************************
-The database buffer replacement algorithm
-
-(c) 1995 Innobase Oy
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
diff --git a/storage/innobase/include/buf0rea.h b/storage/innobase/include/buf0rea.h
deleted file mode 100644
index e4620172860..00000000000
--- a/storage/innobase/include/buf0rea.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/******************************************************
-The database buffer read
-
-(c) 1995 Innobase Oy
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef buf0rea_h
-#define buf0rea_h
-
-#include "univ.i"
-#include "buf0types.h"
-
-/************************************************************************
-High-level function which reads a page asynchronously from a file to the
-buffer buf_pool if it is not already there. Sets the io_fix flag and sets
-an exclusive lock on the buffer frame. The flag is cleared and the x-lock
-released by the i/o-handler thread. Does a random read-ahead if it seems
-sensible. */
-
-ulint
-buf_read_page(
-/*==========*/
- /* out: number of page read requests issued: this can
- be > 1 if read-ahead occurred */
- ulint space, /* in: space id */
- ulint offset);/* in: page number */
-/************************************************************************
-Applies linear read-ahead if in the buf_pool the page is a border page of
-a linear read-ahead area and all the pages in the area have been accessed.
-Does not read any page if the read-ahead mechanism is not activated. Note
-that the the algorithm looks at the 'natural' adjacent successor and
-predecessor of the page, which on the leaf level of a B-tree are the next
-and previous page in the chain of leaves. To know these, the page specified
-in (space, offset) must already be present in the buf_pool. Thus, the
-natural way to use this function is to call it when a page in the buf_pool
-is accessed the first time, calling this function just after it has been
-bufferfixed.
-NOTE 1: as this function looks at the natural predecessor and successor
-fields on the page, what happens, if these are not initialized to any
-sensible value? No problem, before applying read-ahead we check that the
-area to read is within the span of the space, if not, read-ahead is not
-applied. An uninitialized value may result in a useless read operation, but
-only very improbably.
-NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this
-function must be written such that it cannot end up waiting for these
-latches!
-NOTE 3: the calling thread must want access to the page given: this rule is
-set to prevent unintended read-aheads performed by ibuf routines, a situation
-which could result in a deadlock if the OS does not support asynchronous io. */
-
-ulint
-buf_read_ahead_linear(
-/*==================*/
- /* out: number of page read requests issued */
- ulint space, /* in: space id */
- ulint offset);/* in: page number of a page; NOTE: the current thread
- must want access to this page (see NOTE 3 above) */
-/************************************************************************
-Issues read requests for pages which the ibuf module wants to read in, in
-order to contract the insert buffer tree. Technically, this function is like
-a read-ahead function. */
-
-void
-buf_read_ibuf_merge_pages(
-/*======================*/
- ibool sync, /* in: TRUE if the caller wants this function
- to wait for the highest address page to get
- read in, before this function returns */
- ulint* space_ids, /* in: array of space ids */
- ib_longlong* space_versions,/* in: the spaces must have this version
- number (timestamp), otherwise we discard the
- read; we use this to cancel reads if
- DISCARD + IMPORT may have changed the
- tablespace size */
- ulint* page_nos, /* in: array of page numbers to read, with the
- highest page number the last in the array */
- ulint n_stored); /* in: number of page numbers in the array */
-/************************************************************************
-Issues read requests for pages which recovery wants to read in. */
-
-void
-buf_read_recv_pages(
-/*================*/
- ibool sync, /* in: TRUE if the caller wants this function
- to wait for the highest address page to get
- read in, before this function returns */
- ulint space, /* in: space id */
- ulint* page_nos, /* in: array of page numbers to read, with the
- highest page number the last in the array */
- ulint n_stored); /* in: number of page numbers in the array */
-
-/* The size in pages of the area which the read-ahead algorithms read if
-invoked */
-
-#define BUF_READ_AHEAD_AREA \
- ut_min(64, ut_2_power_up(buf_pool->curr_size / 32))
-
-/* Modes used in read-ahead */
-#define BUF_READ_IBUF_PAGES_ONLY 131
-#define BUF_READ_ANY_PAGE 132
-
-#endif
diff --git a/storage/innobase/include/buf0types.h b/storage/innobase/include/buf0types.h
deleted file mode 100644
index 44fdfa80e73..00000000000
--- a/storage/innobase/include/buf0types.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/******************************************************
-The database buffer pool global types for the directory
-
-(c) 1995 Innobase Oy
-
-Created 11/17/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef buf0types_h
-#define buf0types_h
-
-typedef struct buf_block_struct buf_block_t;
-typedef struct buf_pool_struct buf_pool_t;
-
-/* The 'type' used of a buffer frame */
-typedef byte buf_frame_t;
-
-
-#endif
-
diff --git a/storage/innobase/include/data0data.h b/storage/innobase/include/data0data.h
deleted file mode 100644
index 40592c3c0ce..00000000000
--- a/storage/innobase/include/data0data.h
+++ /dev/null
@@ -1,424 +0,0 @@
-/************************************************************************
-SQL data field and tuple
-
-(c) 1994-1996 Innobase Oy
-
-Created 5/30/1994 Heikki Tuuri
-*************************************************************************/
-
-#ifndef data0data_h
-#define data0data_h
-
-#include "univ.i"
-
-#include "data0types.h"
-#include "data0type.h"
-#include "mem0mem.h"
-#include "dict0types.h"
-
-typedef struct big_rec_struct big_rec_t;
-
-/* Some non-inlined functions used in the MySQL interface: */
-void
-dfield_set_data_noninline(
- dfield_t* field, /* in: field */
- void* data, /* in: data */
- ulint len); /* in: length or UNIV_SQL_NULL */
-void*
-dfield_get_data_noninline(
- dfield_t* field); /* in: field */
-ulint
-dfield_get_len_noninline(
- dfield_t* field); /* in: field */
-ulint
-dtuple_get_n_fields_noninline(
- dtuple_t* tuple); /* in: tuple */
-dfield_t*
-dtuple_get_nth_field_noninline(
- dtuple_t* tuple, /* in: tuple */
- ulint n); /* in: index of field */
-
-/*************************************************************************
-Gets pointer to the type struct of SQL data field. */
-UNIV_INLINE
-dtype_t*
-dfield_get_type(
-/*============*/
- /* out: pointer to the type struct */
- dfield_t* field); /* in: SQL data field */
-/*************************************************************************
-Sets the type struct of SQL data field. */
-UNIV_INLINE
-void
-dfield_set_type(
-/*============*/
- dfield_t* field, /* in: SQL data field */
- dtype_t* type); /* in: pointer to data type struct */
-/*************************************************************************
-Gets pointer to the data in a field. */
-UNIV_INLINE
-void*
-dfield_get_data(
-/*============*/
- /* out: pointer to data */
- dfield_t* field); /* in: field */
-/*************************************************************************
-Gets length of field data. */
-UNIV_INLINE
-ulint
-dfield_get_len(
-/*===========*/
- /* out: length of data; UNIV_SQL_NULL if
- SQL null data */
- dfield_t* field); /* in: field */
-/*************************************************************************
-Sets length in a field. */
-UNIV_INLINE
-void
-dfield_set_len(
-/*===========*/
- dfield_t* field, /* in: field */
- ulint len); /* in: length or UNIV_SQL_NULL */
-/*************************************************************************
-Sets pointer to the data and length in a field. */
-UNIV_INLINE
-void
-dfield_set_data(
-/*============*/
- dfield_t* field, /* in: field */
- const void* data, /* in: data */
- ulint len); /* in: length or UNIV_SQL_NULL */
-/**************************************************************************
-Writes an SQL null field full of zeros. */
-UNIV_INLINE
-void
-data_write_sql_null(
-/*================*/
- byte* data, /* in: pointer to a buffer of size len */
- ulint len); /* in: SQL null size in bytes */
-/*************************************************************************
-Copies the data and len fields. */
-UNIV_INLINE
-void
-dfield_copy_data(
-/*=============*/
- dfield_t* field1, /* in: field to copy to */
- dfield_t* field2);/* in: field to copy from */
-/*************************************************************************
-Copies a data field to another. */
-UNIV_INLINE
-void
-dfield_copy(
-/*========*/
- dfield_t* field1, /* in: field to copy to */
- dfield_t* field2);/* in: field to copy from */
-/*************************************************************************
-Tests if data length and content is equal for two dfields. */
-UNIV_INLINE
-ibool
-dfield_datas_are_binary_equal(
-/*==========================*/
- /* out: TRUE if equal */
- dfield_t* field1, /* in: field */
- dfield_t* field2);/* in: field */
-/*************************************************************************
-Tests if dfield data length and content is equal to the given. */
-
-ibool
-dfield_data_is_binary_equal(
-/*========================*/
- /* out: TRUE if equal */
- dfield_t* field, /* in: field */
- ulint len, /* in: data length or UNIV_SQL_NULL */
- byte* data); /* in: data */
-/*************************************************************************
-Gets number of fields in a data tuple. */
-UNIV_INLINE
-ulint
-dtuple_get_n_fields(
-/*================*/
- /* out: number of fields */
- dtuple_t* tuple); /* in: tuple */
-/*************************************************************************
-Gets nth field of a tuple. */
-UNIV_INLINE
-dfield_t*
-dtuple_get_nth_field(
-/*=================*/
- /* out: nth field */
- dtuple_t* tuple, /* in: tuple */
- ulint n); /* in: index of field */
-/*************************************************************************
-Gets info bits in a data tuple. */
-UNIV_INLINE
-ulint
-dtuple_get_info_bits(
-/*=================*/
- /* out: info bits */
- dtuple_t* tuple); /* in: tuple */
-/*************************************************************************
-Sets info bits in a data tuple. */
-UNIV_INLINE
-void
-dtuple_set_info_bits(
-/*=================*/
- dtuple_t* tuple, /* in: tuple */
- ulint info_bits); /* in: info bits */
-/*************************************************************************
-Gets number of fields used in record comparisons. */
-UNIV_INLINE
-ulint
-dtuple_get_n_fields_cmp(
-/*====================*/
- /* out: number of fields used in comparisons
- in rem0cmp.* */
- dtuple_t* tuple); /* in: tuple */
-/*************************************************************************
-Gets number of fields used in record comparisons. */
-UNIV_INLINE
-void
-dtuple_set_n_fields_cmp(
-/*====================*/
- dtuple_t* tuple, /* in: tuple */
- ulint n_fields_cmp); /* in: number of fields used in
- comparisons in rem0cmp.* */
-/**************************************************************
-Creates a data tuple to a memory heap. The default value for number
-of fields used in record comparisons for this tuple is n_fields. */
-UNIV_INLINE
-dtuple_t*
-dtuple_create(
-/*==========*/
- /* out, own: created tuple */
- mem_heap_t* heap, /* in: memory heap where the tuple
- is created */
- ulint n_fields); /* in: number of fields */
-
-/*************************************************************************
-Creates a dtuple for use in MySQL. */
-
-dtuple_t*
-dtuple_create_for_mysql(
-/*====================*/
- /* out, own created dtuple */
- void** heap, /* out: created memory heap */
- ulint n_fields); /* in: number of fields */
-/*************************************************************************
-Frees a dtuple used in MySQL. */
-
-void
-dtuple_free_for_mysql(
-/*==================*/
- void* heap);
-/*************************************************************************
-Sets number of fields used in a tuple. Normally this is set in
-dtuple_create, but if you want later to set it smaller, you can use this. */
-
-void
-dtuple_set_n_fields(
-/*================*/
- dtuple_t* tuple, /* in: tuple */
- ulint n_fields); /* in: number of fields */
-/**************************************************************
-The following function returns the sum of data lengths of a tuple. The space
-occupied by the field structs or the tuple struct is not counted. */
-UNIV_INLINE
-ulint
-dtuple_get_data_size(
-/*=================*/
- /* out: sum of data lens */
- dtuple_t* tuple); /* in: typed data tuple */
-/****************************************************************
-Returns TRUE if lengths of two dtuples are equal and respective data fields
-in them are equal when compared with collation in char fields (not as binary
-strings). */
-
-ibool
-dtuple_datas_are_ordering_equal(
-/*============================*/
- /* out: TRUE if length and fieds are equal
- when compared with cmp_data_data:
- NOTE: in character type fields some letters
- are identified with others! (collation) */
- dtuple_t* tuple1, /* in: tuple 1 */
- dtuple_t* tuple2);/* in: tuple 2 */
-/****************************************************************
-Folds a prefix given as the number of fields of a tuple. */
-UNIV_INLINE
-ulint
-dtuple_fold(
-/*========*/
- /* out: the folded value */
- dtuple_t* tuple, /* in: the tuple */
- ulint n_fields,/* in: number of complete fields to fold */
- ulint n_bytes,/* in: number of bytes to fold in an
- incomplete last field */
- dulint tree_id);/* in: index tree id */
-/***********************************************************************
-Sets types of fields binary in a tuple. */
-UNIV_INLINE
-void
-dtuple_set_types_binary(
-/*====================*/
- dtuple_t* tuple, /* in: data tuple */
- ulint n); /* in: number of fields to set */
-/**************************************************************************
-Checks if a dtuple contains an SQL null value. */
-UNIV_INLINE
-ibool
-dtuple_contains_null(
-/*=================*/
- /* out: TRUE if some field is SQL null */
- dtuple_t* tuple); /* in: dtuple */
-/**************************************************************
-Checks that a data field is typed. Asserts an error if not. */
-
-ibool
-dfield_check_typed(
-/*===============*/
- /* out: TRUE if ok */
- dfield_t* field); /* in: data field */
-/**************************************************************
-Checks that a data tuple is typed. Asserts an error if not. */
-
-ibool
-dtuple_check_typed(
-/*===============*/
- /* out: TRUE if ok */
- dtuple_t* tuple); /* in: tuple */
-/**************************************************************
-Checks that a data tuple is typed. */
-
-ibool
-dtuple_check_typed_no_assert(
-/*=========================*/
- /* out: TRUE if ok */
- dtuple_t* tuple); /* in: tuple */
-#ifdef UNIV_DEBUG
-/**************************************************************
-Validates the consistency of a tuple which must be complete, i.e,
-all fields must have been set. */
-
-ibool
-dtuple_validate(
-/*============*/
- /* out: TRUE if ok */
- dtuple_t* tuple); /* in: tuple */
-#endif /* UNIV_DEBUG */
-/*****************************************************************
-Pretty prints a dfield value according to its data type. */
-
-void
-dfield_print(
-/*=========*/
- dfield_t* dfield);/* in: dfield */
-/*****************************************************************
-Pretty prints a dfield value according to its data type. Also the hex string
-is printed if a string contains non-printable characters. */
-
-void
-dfield_print_also_hex(
-/*==================*/
- dfield_t* dfield); /* in: dfield */
-/**************************************************************
-The following function prints the contents of a tuple. */
-
-void
-dtuple_print(
-/*=========*/
- FILE* f, /* in: output stream */
- dtuple_t* tuple); /* in: tuple */
-/******************************************************************
-Moves parts of long fields in entry to the big record vector so that
-the size of tuple drops below the maximum record size allowed in the
-database. Moves data only from those fields which are not necessary
-to determine uniquely the insertion place of the tuple in the index. */
-
-big_rec_t*
-dtuple_convert_big_rec(
-/*===================*/
- /* out, own: created big record vector,
- NULL if we are not able to shorten
- the entry enough, i.e., if there are
- too many short fields in entry */
- dict_index_t* index, /* in: index */
- dtuple_t* entry, /* in: index entry */
- ulint* ext_vec,/* in: array of externally stored fields,
- or NULL: if a field already is externally
- stored, then we cannot move it to the vector
- this function returns */
- ulint n_ext_vec);/* in: number of elements is ext_vec */
-/******************************************************************
-Puts back to entry the data stored in vector. Note that to ensure the
-fields in entry can accommodate the data, vector must have been created
-from entry with dtuple_convert_big_rec. */
-
-void
-dtuple_convert_back_big_rec(
-/*========================*/
- dict_index_t* index, /* in: index */
- dtuple_t* entry, /* in: entry whose data was put to vector */
- big_rec_t* vector);/* in, own: big rec vector; it is
- freed in this function */
-/******************************************************************
-Frees the memory in a big rec vector. */
-
-void
-dtuple_big_rec_free(
-/*================*/
- big_rec_t* vector); /* in, own: big rec vector; it is
- freed in this function */
-
-/*######################################################################*/
-
-/* Structure for an SQL data field */
-struct dfield_struct{
- void* data; /* pointer to data */
- ulint len; /* data length; UNIV_SQL_NULL if SQL null; */
- dtype_t type; /* type of data */
-};
-
-struct dtuple_struct {
- ulint info_bits; /* info bits of an index record:
- the default is 0; this field is used
- if an index record is built from
- a data tuple */
- ulint n_fields; /* number of fields in dtuple */
- ulint n_fields_cmp; /* number of fields which should
- be used in comparison services
- of rem0cmp.*; the index search
- is performed by comparing only these
- fields, others are ignored; the
- default value in dtuple creation is
- the same value as n_fields */
- dfield_t* fields; /* fields */
- UT_LIST_NODE_T(dtuple_t) tuple_list;
- /* data tuples can be linked into a
- list using this field */
- ulint magic_n;
-};
-#define DATA_TUPLE_MAGIC_N 65478679
-
-/* A slot for a field in a big rec vector */
-
-typedef struct big_rec_field_struct big_rec_field_t;
-struct big_rec_field_struct {
- ulint field_no; /* field number in record */
- ulint len; /* stored data len */
- byte* data; /* stored data */
-};
-
-/* Storage format for overflow data in a big record, that is, a record
-which needs external storage of data fields */
-
-struct big_rec_struct {
- mem_heap_t* heap; /* memory heap from which allocated */
- ulint n_fields; /* number of stored fields */
- big_rec_field_t* fields; /* stored fields */
-};
-
-#ifndef UNIV_NONINL
-#include "data0data.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/data0data.ic b/storage/innobase/include/data0data.ic
deleted file mode 100644
index 753fa9ba45f..00000000000
--- a/storage/innobase/include/data0data.ic
+++ /dev/null
@@ -1,436 +0,0 @@
-/************************************************************************
-SQL data field and tuple
-
-(c) 1994-1996 Innobase Oy
-
-Created 5/30/1994 Heikki Tuuri
-*************************************************************************/
-
-#include "mem0mem.h"
-#include "ut0rnd.h"
-
-#ifdef UNIV_DEBUG
-extern byte data_error;
-#endif /* UNIV_DEBUG */
-
-/*************************************************************************
-Gets pointer to the type struct of SQL data field. */
-UNIV_INLINE
-dtype_t*
-dfield_get_type(
-/*============*/
- /* out: pointer to the type struct */
- dfield_t* field) /* in: SQL data field */
-{
- ut_ad(field);
-
- return(&(field->type));
-}
-
-/*************************************************************************
-Sets the type struct of SQL data field. */
-UNIV_INLINE
-void
-dfield_set_type(
-/*============*/
- dfield_t* field, /* in: SQL data field */
- dtype_t* type) /* in: pointer to data type struct */
-{
- ut_ad(field && type);
-
- field->type = *type;
-}
-
-/*************************************************************************
-Gets pointer to the data in a field. */
-UNIV_INLINE
-void*
-dfield_get_data(
-/*============*/
- /* out: pointer to data */
- dfield_t* field) /* in: field */
-{
- ut_ad(field);
- ut_ad((field->len == UNIV_SQL_NULL)
- || (field->data != &data_error));
-
- return(field->data);
-}
-
-/*************************************************************************
-Gets length of field data. */
-UNIV_INLINE
-ulint
-dfield_get_len(
-/*===========*/
- /* out: length of data; UNIV_SQL_NULL if
- SQL null data */
- dfield_t* field) /* in: field */
-{
- ut_ad(field);
- ut_ad((field->len == UNIV_SQL_NULL)
- || (field->data != &data_error));
-
- return(field->len);
-}
-
-/*************************************************************************
-Sets length in a field. */
-UNIV_INLINE
-void
-dfield_set_len(
-/*===========*/
- dfield_t* field, /* in: field */
- ulint len) /* in: length or UNIV_SQL_NULL */
-{
- ut_ad(field);
-
- field->len = len;
-}
-
-/*************************************************************************
-Sets pointer to the data and length in a field. */
-UNIV_INLINE
-void
-dfield_set_data(
-/*============*/
- dfield_t* field, /* in: field */
- const void* data, /* in: data */
- ulint len) /* in: length or UNIV_SQL_NULL */
-{
- ut_ad(field);
-
- field->data = (void*) data;
- field->len = len;
-}
-
-/*************************************************************************
-Copies the data and len fields. */
-UNIV_INLINE
-void
-dfield_copy_data(
-/*=============*/
- dfield_t* field1, /* in: field to copy to */
- dfield_t* field2) /* in: field to copy from */
-{
- ut_ad(field1 && field2);
-
- field1->data = field2->data;
- field1->len = field2->len;
-}
-
-/*************************************************************************
-Copies a data field to another. */
-UNIV_INLINE
-void
-dfield_copy(
-/*========*/
- dfield_t* field1, /* in: field to copy to */
- dfield_t* field2) /* in: field to copy from */
-{
- *field1 = *field2;
-}
-
-/*************************************************************************
-Tests if data length and content is equal for two dfields. */
-UNIV_INLINE
-ibool
-dfield_datas_are_binary_equal(
-/*==========================*/
- /* out: TRUE if equal */
- dfield_t* field1, /* in: field */
- dfield_t* field2) /* in: field */
-{
- ulint len;
-
- len = field1->len;
-
- if ((len != field2->len)
- || ((len != UNIV_SQL_NULL)
- && (0 != ut_memcmp(field1->data, field2->data,
- len)))) {
-
- return(FALSE);
- }
-
- return(TRUE);
-}
-
-/*************************************************************************
-Gets info bits in a data tuple. */
-UNIV_INLINE
-ulint
-dtuple_get_info_bits(
-/*=================*/
- /* out: info bits */
- dtuple_t* tuple) /* in: tuple */
-{
- ut_ad(tuple);
-
- return(tuple->info_bits);
-}
-
-/*************************************************************************
-Sets info bits in a data tuple. */
-UNIV_INLINE
-void
-dtuple_set_info_bits(
-/*=================*/
- dtuple_t* tuple, /* in: tuple */
- ulint info_bits) /* in: info bits */
-{
- ut_ad(tuple);
-
- tuple->info_bits = info_bits;
-}
-
-/*************************************************************************
-Gets number of fields used in record comparisons. */
-UNIV_INLINE
-ulint
-dtuple_get_n_fields_cmp(
-/*====================*/
- /* out: number of fields used in comparisons
- in rem0cmp.* */
- dtuple_t* tuple) /* in: tuple */
-{
- ut_ad(tuple);
-
- return(tuple->n_fields_cmp);
-}
-
-/*************************************************************************
-Sets number of fields used in record comparisons. */
-UNIV_INLINE
-void
-dtuple_set_n_fields_cmp(
-/*====================*/
- dtuple_t* tuple, /* in: tuple */
- ulint n_fields_cmp) /* in: number of fields used in
- comparisons in rem0cmp.* */
-{
- ut_ad(tuple);
- ut_ad(n_fields_cmp <= tuple->n_fields);
-
- tuple->n_fields_cmp = n_fields_cmp;
-}
-
-/*************************************************************************
-Gets number of fields in a data tuple. */
-UNIV_INLINE
-ulint
-dtuple_get_n_fields(
-/*================*/
- /* out: number of fields */
- dtuple_t* tuple) /* in: tuple */
-{
- ut_ad(tuple);
-
- return(tuple->n_fields);
-}
-
-/*************************************************************************
-Gets nth field of a tuple. */
-UNIV_INLINE
-dfield_t*
-dtuple_get_nth_field(
-/*=================*/
- /* out: nth field */
- dtuple_t* tuple, /* in: tuple */
- ulint n) /* in: index of field */
-{
- ut_ad(tuple);
- ut_ad(n < tuple->n_fields);
-
- return(tuple->fields + n);
-}
-
-/**************************************************************
-Creates a data tuple to a memory heap. The default value for number
-of fields used in record comparisons for this tuple is n_fields. */
-UNIV_INLINE
-dtuple_t*
-dtuple_create(
-/*==========*/
- /* out, own: created tuple */
- mem_heap_t* heap, /* in: memory heap where the tuple
- is created */
- ulint n_fields) /* in: number of fields */
-{
- dtuple_t* tuple;
-
- ut_ad(heap);
-
- tuple = (dtuple_t*) mem_heap_alloc(heap, sizeof(dtuple_t)
- + n_fields * sizeof(dfield_t));
- tuple->info_bits = 0;
- tuple->n_fields = n_fields;
- tuple->n_fields_cmp = n_fields;
- tuple->fields = (dfield_t*)(((byte*)tuple) + sizeof(dtuple_t));
-
-#ifdef UNIV_DEBUG
- tuple->magic_n = DATA_TUPLE_MAGIC_N;
-
- { /* In the debug version, initialize fields to an error value */
- ulint i;
-
- for (i = 0; i < n_fields; i++) {
- (tuple->fields + i)->data = &data_error;
- dfield_get_type(tuple->fields + i)->mtype = DATA_ERROR;
- }
- }
-#endif
- return(tuple);
-}
-
-/**************************************************************
-The following function returns the sum of data lengths of a tuple. The space
-occupied by the field structs or the tuple struct is not counted. Neither
-is possible space in externally stored parts of the field. */
-UNIV_INLINE
-ulint
-dtuple_get_data_size(
-/*=================*/
- /* out: sum of data lengths */
- dtuple_t* tuple) /* in: typed data tuple */
-{
- dfield_t* field;
- ulint n_fields;
- ulint len;
- ulint i;
- ulint sum = 0;
-
- ut_ad(tuple);
- ut_ad(dtuple_check_typed(tuple));
- ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
-
- n_fields = tuple->n_fields;
-
- for (i = 0; i < n_fields; i++) {
- field = dtuple_get_nth_field(tuple, i);
- len = dfield_get_len(field);
-
- if (len == UNIV_SQL_NULL) {
- len = dtype_get_sql_null_size(dfield_get_type(field));
- }
-
- sum += len;
- }
-
- return(sum);
-}
-
-/***********************************************************************
-Sets types of fields binary in a tuple. */
-UNIV_INLINE
-void
-dtuple_set_types_binary(
-/*====================*/
- dtuple_t* tuple, /* in: data tuple */
- ulint n) /* in: number of fields to set */
-{
- dtype_t* dfield_type;
- ulint i;
-
- for (i = 0; i < n; i++) {
- dfield_type = dfield_get_type(dtuple_get_nth_field(tuple, i));
- dtype_set(dfield_type, DATA_BINARY, 0, 0);
- }
-}
-
-/****************************************************************
-Folds a prefix given as the number of fields of a tuple. */
-UNIV_INLINE
-ulint
-dtuple_fold(
-/*========*/
- /* out: the folded value */
- dtuple_t* tuple, /* in: the tuple */
- ulint n_fields,/* in: number of complete fields to fold */
- ulint n_bytes,/* in: number of bytes to fold in an
- incomplete last field */
- dulint tree_id)/* in: index tree id */
-{
- dfield_t* field;
- ulint i;
- byte* data;
- ulint len;
- ulint fold;
-
- ut_ad(tuple);
- ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
- ut_ad(dtuple_check_typed(tuple));
-
- fold = ut_fold_dulint(tree_id);
-
- for (i = 0; i < n_fields; i++) {
- field = dtuple_get_nth_field(tuple, i);
-
- data = (byte*) dfield_get_data(field);
- len = dfield_get_len(field);
-
- if (len != UNIV_SQL_NULL) {
- fold = ut_fold_ulint_pair(fold,
- ut_fold_binary(data, len));
- }
- }
-
- if (n_bytes > 0) {
- field = dtuple_get_nth_field(tuple, i);
-
- data = (byte*) dfield_get_data(field);
- len = dfield_get_len(field);
-
- if (len != UNIV_SQL_NULL) {
- if (len > n_bytes) {
- len = n_bytes;
- }
-
- fold = ut_fold_ulint_pair(fold,
- ut_fold_binary(data, len));
- }
- }
-
- return(fold);
-}
-
-/**************************************************************************
-Writes an SQL null field full of zeros. */
-UNIV_INLINE
-void
-data_write_sql_null(
-/*================*/
- byte* data, /* in: pointer to a buffer of size len */
- ulint len) /* in: SQL null size in bytes */
-{
- ulint j;
-
- for (j = 0; j < len; j++) {
- data[j] = '\0';
- }
-}
-
-/**************************************************************************
-Checks if a dtuple contains an SQL null value. */
-UNIV_INLINE
-ibool
-dtuple_contains_null(
-/*=================*/
- /* out: TRUE if some field is SQL null */
- dtuple_t* tuple) /* in: dtuple */
-{
- ulint n;
- ulint i;
-
- n = dtuple_get_n_fields(tuple);
-
- for (i = 0; i < n; i++) {
- if (dfield_get_len(dtuple_get_nth_field(tuple, i))
- == UNIV_SQL_NULL) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
diff --git a/storage/innobase/include/data0type.h b/storage/innobase/include/data0type.h
deleted file mode 100644
index e5e9c5076be..00000000000
--- a/storage/innobase/include/data0type.h
+++ /dev/null
@@ -1,450 +0,0 @@
-/******************************************************
-Data types
-
-(c) 1996 Innobase Oy
-
-Created 1/16/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef data0type_h
-#define data0type_h
-
-#include "univ.i"
-
-extern ulint data_mysql_default_charset_coll;
-#define DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL 8
-#define DATA_MYSQL_BINARY_CHARSET_COLL 63
-
-/* SQL data type struct */
-typedef struct dtype_struct dtype_t;
-
-/*-------------------------------------------*/
-/* The 'MAIN TYPE' of a column */
-#define DATA_VARCHAR 1 /* character varying of the
- latin1_swedish_ci charset-collation; note
- that the MySQL format for this, DATA_BINARY,
- DATA_VARMYSQL, is also affected by whether the
- 'precise type' contains
- DATA_MYSQL_TRUE_VARCHAR */
-#define DATA_CHAR 2 /* fixed length character of the
- latin1_swedish_ci charset-collation */
-#define DATA_FIXBINARY 3 /* binary string of fixed length */
-#define DATA_BINARY 4 /* binary string */
-#define DATA_BLOB 5 /* binary large object, or a TEXT type;
- if prtype & DATA_BINARY_TYPE == 0, then this is
- actually a TEXT column (or a BLOB created
- with < 4.0.14; since column prefix indexes
- came only in 4.0.14, the missing flag in BLOBs
- created before that does not cause any harm) */
-#define DATA_INT 6 /* integer: can be any size 1 - 8 bytes */
-#define DATA_SYS_CHILD 7 /* address of the child page in node pointer */
-#define DATA_SYS 8 /* system column */
-
-/* Data types >= DATA_FLOAT must be compared using the whole field, not as
-binary strings */
-
-#define DATA_FLOAT 9
-#define DATA_DOUBLE 10
-#define DATA_DECIMAL 11 /* decimal number stored as an ASCII string */
-#define DATA_VARMYSQL 12 /* any charset varying length char */
-#define DATA_MYSQL 13 /* any charset fixed length char */
- /* NOTE that 4.1.1 used DATA_MYSQL and
- DATA_VARMYSQL for all character sets, and the
- charset-collation for tables created with it
- can also be latin1_swedish_ci */
-#define DATA_MTYPE_MAX 63 /* dtype_store_for_order_and_null_size()
- requires the values are <= 63 */
-/*-------------------------------------------*/
-/* The 'PRECISE TYPE' of a column */
-/*
-Tables created by a MySQL user have the following convention:
-
-- In the least significant byte in the precise type we store the MySQL type
-code (not applicable for system columns).
-
-- In the second least significant byte we OR flags DATA_NOT_NULL,
-DATA_UNSIGNED, DATA_BINARY_TYPE.
-
-- In the third least significant byte of the precise type of string types we
-store the MySQL charset-collation code. In DATA_BLOB columns created with
-< 4.0.14 we do not actually know if it is a BLOB or a TEXT column. Since there
-are no indexes on prefixes of BLOB or TEXT columns in < 4.0.14, this is no
-problem, though.
-
-Note that versions < 4.1.2 or < 5.0.1 did not store the charset code to the
-precise type, since the charset was always the default charset of the MySQL
-installation. If the stored charset code is 0 in the system table SYS_COLUMNS
-of InnoDB, that means that the default charset of this MySQL installation
-should be used.
-
-When loading a table definition from the system tables to the InnoDB data
-dictionary cache in main memory, InnoDB versions >= 4.1.2 and >= 5.0.1 check
-if the stored charset-collation is 0, and if that is the case and the type is
-a non-binary string, replace that 0 by the default charset-collation code of
-this MySQL installation. In short, in old tables, the charset-collation code
-in the system tables on disk can be 0, but in in-memory data structures
-(dtype_t), the charset-collation code is always != 0 for non-binary string
-types.
-
-In new tables, in binary string types, the charset-collation code is the
-MySQL code for the 'binary charset', that is, != 0.
-
-For binary string types and for DATA_CHAR, DATA_VARCHAR, and for those
-DATA_BLOB which are binary or have the charset-collation latin1_swedish_ci,
-InnoDB performs all comparisons internally, without resorting to the MySQL
-comparison functions. This is to save CPU time.
-
-InnoDB's own internal system tables have different precise types for their
-columns, and for them the precise type is usually not used at all.
-*/
-
-#define DATA_ENGLISH 4 /* English language character string: this
- is a relic from pre-MySQL time and only used
- for InnoDB's own system tables */
-#define DATA_ERROR 111 /* another relic from pre-MySQL time */
-
-#define DATA_MYSQL_TYPE_MASK 255 /* AND with this mask to extract the MySQL
- type from the precise type */
-#define DATA_MYSQL_TRUE_VARCHAR 15 /* MySQL type code for the >= 5.0.3
- format true VARCHAR */
-
-/* Precise data types for system columns and the length of those columns;
-NOTE: the values must run from 0 up in the order given! All codes must
-be less than 256 */
-#define DATA_ROW_ID 0 /* row id: a dulint */
-#define DATA_ROW_ID_LEN 6 /* stored length for row id */
-
-#define DATA_TRX_ID 1 /* transaction id: 6 bytes */
-#define DATA_TRX_ID_LEN 6
-
-#define DATA_ROLL_PTR 2 /* rollback data pointer: 7 bytes */
-#define DATA_ROLL_PTR_LEN 7
-
-#define DATA_N_SYS_COLS 3 /* number of system columns defined above */
-
-/* Flags ORed to the precise data type */
-#define DATA_NOT_NULL 256 /* this is ORed to the precise type when
- the column is declared as NOT NULL */
-#define DATA_UNSIGNED 512 /* this id ORed to the precise type when
- we have an unsigned integer type */
-#define DATA_BINARY_TYPE 1024 /* if the data type is a binary character
- string, this is ORed to the precise type:
- this only holds for tables created with
- >= MySQL-4.0.14 */
-/* #define DATA_NONLATIN1 2048 This is a relic from < 4.1.2 and < 5.0.1.
- In earlier versions this was set for some
- BLOB columns.
-*/
-#define DATA_LONG_TRUE_VARCHAR 4096 /* this is ORed to the precise data
- type when the column is true VARCHAR where
- MySQL uses 2 bytes to store the data len;
- for shorter VARCHARs MySQL uses only 1 byte */
-/*-------------------------------------------*/
-
-/* This many bytes we need to store the type information affecting the
-alphabetical order for a single field and decide the storage size of an
-SQL null*/
-#define DATA_ORDER_NULL_TYPE_BUF_SIZE 4
-/* In the >= 4.1.x storage format we add 2 bytes more so that we can also
-store the charset-collation number; one byte is left unused, though */
-#define DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE 6
-
-/*************************************************************************
-Gets the MySQL type code from a dtype. */
-UNIV_INLINE
-ulint
-dtype_get_mysql_type(
-/*=================*/
- /* out: MySQL type code; this is NOT an InnoDB
- type code! */
- dtype_t* type); /* in: type struct */
-/*************************************************************************
-Determine how many bytes the first n characters of the given string occupy.
-If the string is shorter than n characters, returns the number of bytes
-the characters in the string occupy. */
-
-ulint
-dtype_get_at_most_n_mbchars(
-/*========================*/
- /* out: length of the prefix,
- in bytes */
- ulint prtype, /* in: precise type */
- ulint mbminlen, /* in: minimum length of a
- multi-byte character */
- ulint mbmaxlen, /* in: maximum length of a
- multi-byte character */
- ulint prefix_len, /* in: length of the requested
- prefix, in characters, multiplied by
- dtype_get_mbmaxlen(dtype) */
- ulint data_len, /* in: length of str (in bytes) */
- const char* str); /* in: the string whose prefix
- length is being determined */
-/*************************************************************************
-Checks if a data main type is a string type. Also a BLOB is considered a
-string type. */
-
-ibool
-dtype_is_string_type(
-/*=================*/
- /* out: TRUE if string type */
- ulint mtype); /* in: InnoDB main data type code: DATA_CHAR, ... */
-/*************************************************************************
-Checks if a type is a binary string type. Note that for tables created with
-< 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. For
-those DATA_BLOB columns this function currently returns FALSE. */
-
-ibool
-dtype_is_binary_string_type(
-/*========================*/
- /* out: TRUE if binary string type */
- ulint mtype, /* in: main data type */
- ulint prtype);/* in: precise type */
-/*************************************************************************
-Checks if a type is a non-binary string type. That is, dtype_is_string_type is
-TRUE and dtype_is_binary_string_type is FALSE. Note that for tables created
-with < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column.
-For those DATA_BLOB columns this function currently returns TRUE. */
-
-ibool
-dtype_is_non_binary_string_type(
-/*============================*/
- /* out: TRUE if non-binary string type */
- ulint mtype, /* in: main data type */
- ulint prtype);/* in: precise type */
-/*************************************************************************
-Sets a data type structure. */
-UNIV_INLINE
-void
-dtype_set(
-/*======*/
- dtype_t* type, /* in: type struct to init */
- ulint mtype, /* in: main data type */
- ulint prtype, /* in: precise type */
- ulint len); /* in: precision of type */
-/*************************************************************************
-Copies a data type structure. */
-UNIV_INLINE
-void
-dtype_copy(
-/*=======*/
- dtype_t* type1, /* in: type struct to copy to */
- const dtype_t* type2); /* in: type struct to copy from */
-/*************************************************************************
-Gets the SQL main data type. */
-UNIV_INLINE
-ulint
-dtype_get_mtype(
-/*============*/
- dtype_t* type);
-/*************************************************************************
-Gets the precise data type. */
-UNIV_INLINE
-ulint
-dtype_get_prtype(
-/*=============*/
- dtype_t* type);
-/*************************************************************************
-Compute the mbminlen and mbmaxlen members of a data type structure. */
-UNIV_INLINE
-void
-dtype_get_mblen(
-/*============*/
- ulint mtype, /* in: main type */
- ulint prtype, /* in: precise type (and collation) */
- ulint* mbminlen, /* out: minimum length of a
- multi-byte character */
- ulint* mbmaxlen); /* out: maximum length of a
- multi-byte character */
-/*************************************************************************
-Gets the MySQL charset-collation code for MySQL string types. */
-
-ulint
-dtype_get_charset_coll_noninline(
-/*=============================*/
- ulint prtype);/* in: precise data type */
-/*************************************************************************
-Gets the MySQL charset-collation code for MySQL string types. */
-UNIV_INLINE
-ulint
-dtype_get_charset_coll(
-/*===================*/
- ulint prtype);/* in: precise data type */
-/*************************************************************************
-Forms a precise type from the < 4.1.2 format precise type plus the
-charset-collation code. */
-
-ulint
-dtype_form_prtype(
-/*==============*/
- ulint old_prtype, /* in: the MySQL type code and the flags
- DATA_BINARY_TYPE etc. */
- ulint charset_coll); /* in: MySQL charset-collation code */
-/*************************************************************************
-Gets the type length. */
-UNIV_INLINE
-ulint
-dtype_get_len(
-/*==========*/
- dtype_t* type);
-/*************************************************************************
-Gets the minimum length of a character, in bytes. */
-UNIV_INLINE
-ulint
-dtype_get_mbminlen(
-/*===============*/
- /* out: minimum length of a char, in bytes,
- or 0 if this is not a character type */
- const dtype_t* type); /* in: type */
-/*************************************************************************
-Gets the maximum length of a character, in bytes. */
-UNIV_INLINE
-ulint
-dtype_get_mbmaxlen(
-/*===============*/
- /* out: maximum length of a char, in bytes,
- or 0 if this is not a character type */
- const dtype_t* type); /* in: type */
-/*************************************************************************
-Gets the padding character code for the type. */
-UNIV_INLINE
-ulint
-dtype_get_pad_char(
-/*===============*/
- /* out: padding character code, or
- ULINT_UNDEFINED if no padding specified */
- ulint mtype, /* in: main type */
- ulint prtype); /* in: precise type */
-/***************************************************************************
-Returns the size of a fixed size data type, 0 if not a fixed size type. */
-UNIV_INLINE
-ulint
-dtype_get_fixed_size_low(
-/*=====================*/
- /* out: fixed size, or 0 */
- ulint mtype, /* in: main type */
- ulint prtype, /* in: precise type */
- ulint len, /* in: length */
- ulint mbminlen, /* in: minimum length of a multibyte char */
- ulint mbmaxlen); /* in: maximum length of a multibyte char */
-/***************************************************************************
-Returns the minimum size of a data type. */
-UNIV_INLINE
-ulint
-dtype_get_min_size_low(
-/*===================*/
- /* out: minimum size */
- ulint mtype, /* in: main type */
- ulint prtype, /* in: precise type */
- ulint len, /* in: length */
- ulint mbminlen, /* in: minimum length of a multibyte char */
- ulint mbmaxlen); /* in: maximum length of a multibyte char */
-/***************************************************************************
-Returns the maximum size of a data type. Note: types in system tables may be
-incomplete and return incorrect information. */
-UNIV_INLINE
-ulint
-dtype_get_max_size_low(
-/*===================*/
- /* out: maximum size */
- ulint mtype, /* in: main type */
- ulint len); /* in: length */
-/***************************************************************************
-Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a type.
-For fixed length types it is the fixed length of the type, otherwise 0. */
-UNIV_INLINE
-ulint
-dtype_get_sql_null_size(
-/*====================*/
- /* out: SQL null storage size
- in ROW_FORMAT=REDUNDANT */
- const dtype_t* type); /* in: type */
-/**************************************************************************
-Reads to a type the stored information which determines its alphabetical
-ordering and the storage size of an SQL NULL value. */
-UNIV_INLINE
-void
-dtype_read_for_order_and_null_size(
-/*===============================*/
- dtype_t* type, /* in: type struct */
- byte* buf); /* in: buffer for the stored order info */
-/**************************************************************************
-Stores for a type the information which determines its alphabetical ordering
-and the storage size of an SQL NULL value. This is the >= 4.1.x storage
-format. */
-UNIV_INLINE
-void
-dtype_new_store_for_order_and_null_size(
-/*====================================*/
- byte* buf, /* in: buffer for
- DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
- bytes where we store the info */
- dtype_t* type, /* in: type struct */
- ulint prefix_len);/* in: prefix length to
- replace type->len, or 0 */
-/**************************************************************************
-Reads to a type the stored information which determines its alphabetical
-ordering and the storage size of an SQL NULL value. This is the 4.1.x storage
-format. */
-UNIV_INLINE
-void
-dtype_new_read_for_order_and_null_size(
-/*===================================*/
- dtype_t* type, /* in: type struct */
- byte* buf); /* in: buffer for stored type order info */
-
-/*************************************************************************
-Validates a data type structure. */
-
-ibool
-dtype_validate(
-/*===========*/
- /* out: TRUE if ok */
- dtype_t* type); /* in: type struct to validate */
-/*************************************************************************
-Prints a data type structure. */
-
-void
-dtype_print(
-/*========*/
- dtype_t* type); /* in: type */
-
-/* Structure for an SQL data type.
-If you add fields to this structure, be sure to initialize them everywhere.
-This structure is initialized in the following functions:
-dtype_set()
-dtype_read_for_order_and_null_size()
-dtype_new_read_for_order_and_null_size()
-sym_tab_add_null_lit() */
-
-struct dtype_struct{
- unsigned mtype:8; /* main data type */
- unsigned prtype:24; /* precise type; MySQL data
- type, charset code, flags to
- indicate nullability,
- signedness, whether this is a
- binary string, whether this is
- a true VARCHAR where MySQL
- uses 2 bytes to store the length */
-
- /* the remaining fields do not affect alphabetical ordering: */
-
- unsigned len:16; /* length; for MySQL data this
- is field->pack_length(),
- except that for a >= 5.0.3
- type true VARCHAR this is the
- maximum byte length of the
- string data (in addition to
- the string, MySQL uses 1 or 2
- bytes to store the string length) */
-
- unsigned mbminlen:2; /* minimum length of a
- character, in bytes */
- unsigned mbmaxlen:3; /* maximum length of a
- character, in bytes */
-};
-
-#ifndef UNIV_NONINL
-#include "data0type.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/data0type.ic b/storage/innobase/include/data0type.ic
deleted file mode 100644
index ad0f95755d2..00000000000
--- a/storage/innobase/include/data0type.ic
+++ /dev/null
@@ -1,562 +0,0 @@
-/******************************************************
-Data types
-
-(c) 1996 Innobase Oy
-
-Created 1/16/1996 Heikki Tuuri
-*******************************************************/
-
-#include "mach0data.h"
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************
-Get the variable length bounds of the given character set.
-
-NOTE: the prototype of this function is copied from ha_innodb.cc! If you change
-this function, you MUST change also the prototype here! */
-extern
-void
-innobase_get_cset_width(
-/*====================*/
- ulint cset, /* in: MySQL charset-collation code */
- ulint* mbminlen, /* out: minimum length of a char (in bytes) */
- ulint* mbmaxlen); /* out: maximum length of a char (in bytes) */
-#endif /* !UNIV_HOTBACKUP */
-
-/*************************************************************************
-Gets the MySQL charset-collation code for MySQL string types. */
-UNIV_INLINE
-ulint
-dtype_get_charset_coll(
-/*===================*/
- ulint prtype) /* in: precise data type */
-{
- return((prtype >> 16) & 0xFFUL);
-}
-
-/*************************************************************************
-Gets the MySQL type code from a dtype. */
-UNIV_INLINE
-ulint
-dtype_get_mysql_type(
-/*=================*/
- /* out: MySQL type code; this is NOT an InnoDB
- type code! */
- dtype_t* type) /* in: type struct */
-{
- return(type->prtype & 0xFFUL);
-}
-
-/*************************************************************************
-Compute the mbminlen and mbmaxlen members of a data type structure. */
-UNIV_INLINE
-void
-dtype_get_mblen(
-/*============*/
- ulint mtype, /* in: main type */
- ulint prtype, /* in: precise type (and collation) */
- ulint* mbminlen, /* out: minimum length of a
- multi-byte character */
- ulint* mbmaxlen) /* out: maximum length of a
- multi-byte character */
-{
- if (dtype_is_string_type(mtype)) {
-#ifndef UNIV_HOTBACKUP
- innobase_get_cset_width(dtype_get_charset_coll(prtype),
- mbminlen, mbmaxlen);
- ut_ad(*mbminlen <= *mbmaxlen);
- ut_ad(*mbminlen <= 2); /* mbminlen in dtype_t is 0..3 */
- ut_ad(*mbmaxlen < 1 << 3); /* mbmaxlen in dtype_t is 0..7 */
-#else /* !UNIV_HOTBACKUP */
- ut_a(mtype <= DATA_BINARY);
- *mbminlen = *mbmaxlen = 1;
-#endif /* !UNIV_HOTBACKUP */
- } else {
- *mbminlen = *mbmaxlen = 0;
- }
-}
-
-/*************************************************************************
-Compute the mbminlen and mbmaxlen members of a data type structure. */
-UNIV_INLINE
-void
-dtype_set_mblen(
-/*============*/
- dtype_t* type) /* in/out: type */
-{
- ulint mbminlen;
- ulint mbmaxlen;
-
- dtype_get_mblen(type->mtype, type->prtype, &mbminlen, &mbmaxlen);
- type->mbminlen = mbminlen;
- type->mbmaxlen = mbmaxlen;
-
- ut_ad(dtype_validate(type));
-}
-
-/*************************************************************************
-Sets a data type structure. */
-UNIV_INLINE
-void
-dtype_set(
-/*======*/
- dtype_t* type, /* in: type struct to init */
- ulint mtype, /* in: main data type */
- ulint prtype, /* in: precise type */
- ulint len) /* in: precision of type */
-{
- ut_ad(type);
- ut_ad(mtype <= DATA_MTYPE_MAX);
-
- type->mtype = mtype;
- type->prtype = prtype;
- type->len = len;
-
- dtype_set_mblen(type);
-}
-
-/*************************************************************************
-Copies a data type structure. */
-UNIV_INLINE
-void
-dtype_copy(
-/*=======*/
- dtype_t* type1, /* in: type struct to copy to */
- const dtype_t* type2) /* in: type struct to copy from */
-{
- *type1 = *type2;
-
- ut_ad(dtype_validate(type1));
-}
-
-/*************************************************************************
-Gets the SQL main data type. */
-UNIV_INLINE
-ulint
-dtype_get_mtype(
-/*============*/
- dtype_t* type)
-{
- ut_ad(type);
-
- return(type->mtype);
-}
-
-/*************************************************************************
-Gets the precise data type. */
-UNIV_INLINE
-ulint
-dtype_get_prtype(
-/*=============*/
- dtype_t* type)
-{
- ut_ad(type);
-
- return(type->prtype);
-}
-
-/*************************************************************************
-Gets the type length. */
-UNIV_INLINE
-ulint
-dtype_get_len(
-/*==========*/
- dtype_t* type)
-{
- ut_ad(type);
-
- return(type->len);
-}
-
-/*************************************************************************
-Gets the minimum length of a character, in bytes. */
-UNIV_INLINE
-ulint
-dtype_get_mbminlen(
-/*===============*/
- /* out: minimum length of a char, in bytes,
- or 0 if this is not a character type */
- const dtype_t* type) /* in: type */
-{
- ut_ad(type);
- return(type->mbminlen);
-}
-/*************************************************************************
-Gets the maximum length of a character, in bytes. */
-UNIV_INLINE
-ulint
-dtype_get_mbmaxlen(
-/*===============*/
- /* out: maximum length of a char, in bytes,
- or 0 if this is not a character type */
- const dtype_t* type) /* in: type */
-{
- ut_ad(type);
- return(type->mbmaxlen);
-}
-
-/*************************************************************************
-Gets the padding character code for a type. */
-UNIV_INLINE
-ulint
-dtype_get_pad_char(
-/*===============*/
- /* out: padding character code, or
- ULINT_UNDEFINED if no padding specified */
- ulint mtype, /* in: main type */
- ulint prtype) /* in: precise type */
-{
- switch (mtype) {
- case DATA_FIXBINARY:
- case DATA_BINARY:
- if (UNIV_UNLIKELY(dtype_get_charset_coll(prtype)
- == DATA_MYSQL_BINARY_CHARSET_COLL)) {
- /* Starting from 5.0.18, do not pad
- VARBINARY or BINARY columns. */
- return(ULINT_UNDEFINED);
- }
- /* Fall through */
- case DATA_CHAR:
- case DATA_VARCHAR:
- case DATA_MYSQL:
- case DATA_VARMYSQL:
- /* Space is the padding character for all char and binary
- strings, and starting from 5.0.3, also for TEXT strings. */
-
- return(0x20);
- case DATA_BLOB:
- if (!(prtype & DATA_BINARY_TYPE)) {
- return(0x20);
- }
- /* Fall through */
- default:
- /* No padding specified */
- return(ULINT_UNDEFINED);
- }
-}
-
-/**************************************************************************
-Stores for a type the information which determines its alphabetical ordering
-and the storage size of an SQL NULL value. This is the >= 4.1.x storage
-format. */
-UNIV_INLINE
-void
-dtype_new_store_for_order_and_null_size(
-/*====================================*/
- byte* buf, /* in: buffer for
- DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
- bytes where we store the info */
- dtype_t* type, /* in: type struct */
- ulint prefix_len)/* in: prefix length to
- replace type->len, or 0 */
-{
-#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
-#error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE"
-#endif
- ulint len;
-
- buf[0] = (byte)(type->mtype & 0xFFUL);
-
- if (type->prtype & DATA_BINARY_TYPE) {
- buf[0] = buf[0] | 128;
- }
-
- /* In versions < 4.1.2 we had: if (type->prtype & DATA_NONLATIN1) {
- buf[0] = buf[0] | 64;
- }
- */
-
- buf[1] = (byte)(type->prtype & 0xFFUL);
-
- len = prefix_len ? prefix_len : type->len;
-
- mach_write_to_2(buf + 2, len & 0xFFFFUL);
-
- ut_ad(dtype_get_charset_coll(type->prtype) < 256);
- mach_write_to_2(buf + 4, dtype_get_charset_coll(type->prtype));
-
- if (type->prtype & DATA_NOT_NULL) {
- buf[4] |= 128;
- }
-}
-
-/**************************************************************************
-Reads to a type the stored information which determines its alphabetical
-ordering and the storage size of an SQL NULL value. This is the < 4.1.x
-storage format. */
-UNIV_INLINE
-void
-dtype_read_for_order_and_null_size(
-/*===============================*/
- dtype_t* type, /* in: type struct */
- byte* buf) /* in: buffer for stored type order info */
-{
-#if 4 != DATA_ORDER_NULL_TYPE_BUF_SIZE
-# error "4 != DATA_ORDER_NULL_TYPE_BUF_SIZE"
-#endif
-
- type->mtype = buf[0] & 63;
- type->prtype = buf[1];
-
- if (buf[0] & 128) {
- type->prtype = type->prtype | DATA_BINARY_TYPE;
- }
-
- type->len = mach_read_from_2(buf + 2);
-
- type->prtype = dtype_form_prtype(type->prtype,
- data_mysql_default_charset_coll);
- dtype_set_mblen(type);
-}
-
-/**************************************************************************
-Reads to a type the stored information which determines its alphabetical
-ordering and the storage size of an SQL NULL value. This is the >= 4.1.x
-storage format. */
-UNIV_INLINE
-void
-dtype_new_read_for_order_and_null_size(
-/*===================================*/
- dtype_t* type, /* in: type struct */
- byte* buf) /* in: buffer for stored type order info */
-{
- ulint charset_coll;
-
-#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
-#error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE"
-#endif
-
- type->mtype = buf[0] & 63;
- type->prtype = buf[1];
-
- if (buf[0] & 128) {
- type->prtype |= DATA_BINARY_TYPE;
- }
-
- if (buf[4] & 128) {
- type->prtype |= DATA_NOT_NULL;
- }
-
- type->len = mach_read_from_2(buf + 2);
-
- mach_read_from_2(buf + 4);
-
- charset_coll = mach_read_from_2(buf + 4) & 0x7fff;
-
- if (dtype_is_string_type(type->mtype)) {
- ut_a(charset_coll < 256);
-
- if (charset_coll == 0) {
- /* This insert buffer record was inserted with MySQL
- version < 4.1.2, and the charset-collation code was not
- explicitly stored to dtype->prtype at that time. It
- must be the default charset-collation of this MySQL
- installation. */
-
- charset_coll = data_mysql_default_charset_coll;
- }
-
- type->prtype = dtype_form_prtype(type->prtype, charset_coll);
- }
- dtype_set_mblen(type);
-}
-
-/***************************************************************************
-Returns the size of a fixed size data type, 0 if not a fixed size type. */
-UNIV_INLINE
-ulint
-dtype_get_fixed_size_low(
-/*=====================*/
- /* out: fixed size, or 0 */
- ulint mtype, /* in: main type */
- ulint prtype, /* in: precise type */
- ulint len, /* in: length */
- ulint mbminlen, /* in: minimum length of a multibyte char */
- ulint mbmaxlen) /* in: maximum length of a multibyte char */
-{
- switch (mtype) {
- case DATA_SYS:
-#ifdef UNIV_DEBUG
- switch (prtype & DATA_MYSQL_TYPE_MASK) {
- case DATA_ROW_ID:
- ut_ad(len == DATA_ROW_ID_LEN);
- break;
- case DATA_TRX_ID:
- ut_ad(len == DATA_TRX_ID_LEN);
- break;
- case DATA_ROLL_PTR:
- ut_ad(len == DATA_ROLL_PTR_LEN);
- break;
- default:
- ut_ad(0);
- return(0);
- }
-#endif /* UNIV_DEBUG */
- case DATA_CHAR:
- case DATA_FIXBINARY:
- case DATA_INT:
- case DATA_FLOAT:
- case DATA_DOUBLE:
- return(len);
- case DATA_MYSQL:
- if (prtype & DATA_BINARY_TYPE) {
- return(len);
- } else {
-#ifdef UNIV_HOTBACKUP
- if (mbminlen == mbmaxlen) {
- return(len);
- }
-#else /* UNIV_HOTBACKUP */
- /* We play it safe here and ask MySQL for
- mbminlen and mbmaxlen. Although
- mbminlen and mbmaxlen are
- initialized if and only if prtype
- is (in one of the 3 functions in this file),
- it could be that none of these functions
- has been called. */
-
- ulint i_mbminlen, i_mbmaxlen;
-
- innobase_get_cset_width(
- dtype_get_charset_coll(prtype),
- &i_mbminlen, &i_mbmaxlen);
-
- if (UNIV_UNLIKELY(mbminlen != i_mbminlen)
- || UNIV_UNLIKELY(mbmaxlen != i_mbmaxlen)) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: "
- "mbminlen=%lu, "
- "mbmaxlen=%lu, "
- "type->mbminlen=%lu, "
- "type->mbmaxlen=%lu\n",
- (ulong) i_mbminlen,
- (ulong) i_mbmaxlen,
- (ulong) mbminlen,
- (ulong) mbmaxlen);
- }
- if (mbminlen == mbmaxlen) {
- return(len);
- }
-#endif /* !UNIV_HOTBACKUP */
- }
- /* fall through for variable-length charsets */
- case DATA_VARCHAR:
- case DATA_BINARY:
- case DATA_DECIMAL:
- case DATA_VARMYSQL:
- case DATA_BLOB:
- return(0);
- default:
- ut_error;
- }
-
- return(0);
-}
-
-/***************************************************************************
-Returns the minimum size of a data type. */
-UNIV_INLINE
-ulint
-dtype_get_min_size_low(
-/*===================*/
- /* out: minimum size */
- ulint mtype, /* in: main type */
- ulint prtype, /* in: precise type */
- ulint len, /* in: length */
- ulint mbminlen, /* in: minimum length of a multibyte char */
- ulint mbmaxlen) /* in: maximum length of a multibyte char */
-{
- switch (mtype) {
- case DATA_SYS:
-#ifdef UNIV_DEBUG
- switch (prtype & DATA_MYSQL_TYPE_MASK) {
- case DATA_ROW_ID:
- ut_ad(len == DATA_ROW_ID_LEN);
- break;
- case DATA_TRX_ID:
- ut_ad(len == DATA_TRX_ID_LEN);
- break;
- case DATA_ROLL_PTR:
- ut_ad(len == DATA_ROLL_PTR_LEN);
- break;
- default:
- ut_ad(0);
- return(0);
- }
-#endif /* UNIV_DEBUG */
- case DATA_CHAR:
- case DATA_FIXBINARY:
- case DATA_INT:
- case DATA_FLOAT:
- case DATA_DOUBLE:
- return(len);
- case DATA_MYSQL:
- if ((prtype & DATA_BINARY_TYPE) || mbminlen == mbmaxlen) {
- return(len);
- }
- /* this is a variable-length character set */
- ut_a(mbminlen > 0);
- ut_a(mbmaxlen > mbminlen);
- ut_a(len % mbmaxlen == 0);
- return(len * mbminlen / mbmaxlen);
- case DATA_VARCHAR:
- case DATA_BINARY:
- case DATA_DECIMAL:
- case DATA_VARMYSQL:
- case DATA_BLOB:
- return(0);
- default:
- ut_error;
- }
-
- return(0);
-}
-
-/***************************************************************************
-Returns the maximum size of a data type. Note: types in system tables may be
-incomplete and return incorrect information. */
-UNIV_INLINE
-ulint
-dtype_get_max_size_low(
-/*===================*/
- /* out: maximum size */
- ulint mtype, /* in: main type */
- ulint len) /* in: length */
-{
- switch (mtype) {
- case DATA_SYS:
- case DATA_CHAR:
- case DATA_FIXBINARY:
- case DATA_INT:
- case DATA_FLOAT:
- case DATA_DOUBLE:
- case DATA_MYSQL:
- case DATA_VARCHAR:
- case DATA_BINARY:
- case DATA_DECIMAL:
- case DATA_VARMYSQL:
- return(len);
- case DATA_BLOB:
- break;
- default:
- ut_error;
- }
-
- return(ULINT_MAX);
-}
-
-/***************************************************************************
-Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a type.
-For fixed length types it is the fixed length of the type, otherwise 0. */
-UNIV_INLINE
-ulint
-dtype_get_sql_null_size(
-/*====================*/
- /* out: SQL null storage size
- in ROW_FORMAT=REDUNDANT */
- const dtype_t* type) /* in: type */
-{
- return(dtype_get_fixed_size_low(type->mtype, type->prtype, type->len,
- type->mbminlen, type->mbmaxlen));
-}
diff --git a/storage/innobase/include/data0types.h b/storage/innobase/include/data0types.h
deleted file mode 100644
index ab314f8f471..00000000000
--- a/storage/innobase/include/data0types.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/************************************************************************
-Some type definitions
-
-(c) 1994-2000 Innobase Oy
-
-Created 9/21/2000 Heikki Tuuri
-*************************************************************************/
-
-#ifndef data0types_h
-#define data0types_h
-
-/* SQL data field struct */
-typedef struct dfield_struct dfield_t;
-
-/* SQL data tuple struct */
-typedef struct dtuple_struct dtuple_t;
-
-#endif
-
diff --git a/storage/innobase/include/db0err.h b/storage/innobase/include/db0err.h
deleted file mode 100644
index ed7ce151718..00000000000
--- a/storage/innobase/include/db0err.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/******************************************************
-Global error codes for the database
-
-(c) 1996 Innobase Oy
-
-Created 5/24/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef db0err_h
-#define db0err_h
-
-
-#define DB_SUCCESS 10
-
-/* The following are error codes */
-#define DB_ERROR 11
-#define DB_OUT_OF_MEMORY 12
-#define DB_OUT_OF_FILE_SPACE 13
-#define DB_LOCK_WAIT 14
-#define DB_DEADLOCK 15
-#define DB_ROLLBACK 16
-#define DB_DUPLICATE_KEY 17
-#define DB_QUE_THR_SUSPENDED 18
-#define DB_MISSING_HISTORY 19 /* required history data has been
- deleted due to lack of space in
- rollback segment */
-#define DB_CLUSTER_NOT_FOUND 30
-#define DB_TABLE_NOT_FOUND 31
-#define DB_MUST_GET_MORE_FILE_SPACE 32 /* the database has to be stopped
- and restarted with more file space */
-#define DB_TABLE_IS_BEING_USED 33
-#define DB_TOO_BIG_RECORD 34 /* a record in an index would become
- bigger than 1/2 free space in a page
- frame */
-#define DB_LOCK_WAIT_TIMEOUT 35 /* lock wait lasted too long */
-#define DB_NO_REFERENCED_ROW 36 /* referenced key value not found
- for a foreign key in an insert or
- update of a row */
-#define DB_ROW_IS_REFERENCED 37 /* cannot delete or update a row
- because it contains a key value
- which is referenced */
-#define DB_CANNOT_ADD_CONSTRAINT 38 /* adding a foreign key constraint
- to a table failed */
-#define DB_CORRUPTION 39 /* data structure corruption noticed */
-#define DB_COL_APPEARS_TWICE_IN_INDEX 40/* InnoDB cannot handle an index
- where same column appears twice */
-#define DB_CANNOT_DROP_CONSTRAINT 41 /* dropping a foreign key constraint
- from a table failed */
-#define DB_NO_SAVEPOINT 42 /* no savepoint exists with the given
- name */
-#define DB_TABLESPACE_ALREADY_EXISTS 43 /* we cannot create a new single-table
- tablespace because a file of the same
- name already exists */
-#define DB_TABLESPACE_DELETED 44 /* tablespace does not exist or is
- being dropped right now */
-#define DB_LOCK_TABLE_FULL 45 /* lock structs have exhausted the
- buffer pool (for big transactions,
- InnoDB stores the lock structs in the
- buffer pool) */
-#define DB_FOREIGN_DUPLICATE_KEY 46 /* foreign key constraints
- activated by the operation would
- lead to a duplicate key in some
- table */
-#define DB_TOO_MANY_CONCURRENT_TRXS 47 /* when InnoDB runs out of the
- preconfigured undo slots, this can
- only happen when there are too many
- concurrent transactions */
-#define DB_UNSUPPORTED 48 /* when InnoDB sees any artefact or
- a feature that it can't recoginize or
- work with e.g., FT indexes created by
- a later version of the engine. */
-/* The following are partial failure codes */
-#define DB_FAIL 1000
-#define DB_OVERFLOW 1001
-#define DB_UNDERFLOW 1002
-#define DB_STRONG_FAIL 1003
-#define DB_RECORD_NOT_FOUND 1500
-#define DB_END_OF_INDEX 1501
-
-#endif
diff --git a/storage/innobase/include/dict0boot.h b/storage/innobase/include/dict0boot.h
deleted file mode 100644
index cac79410b24..00000000000
--- a/storage/innobase/include/dict0boot.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/******************************************************
-Data dictionary creation and booting
-
-(c) 1996 Innobase Oy
-
-Created 4/18/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef dict0boot_h
-#define dict0boot_h
-
-#include "univ.i"
-
-#include "mtr0mtr.h"
-#include "mtr0log.h"
-#include "ut0byte.h"
-#include "buf0buf.h"
-#include "fsp0fsp.h"
-#include "dict0dict.h"
-
-typedef byte dict_hdr_t;
-
-/**************************************************************************
-Gets a pointer to the dictionary header and x-latches its page. */
-
-dict_hdr_t*
-dict_hdr_get(
-/*=========*/
- /* out: pointer to the dictionary header,
- page x-latched */
- mtr_t* mtr); /* in: mtr */
-/**************************************************************************
-Returns a new row, table, index, or tree id. */
-
-dulint
-dict_hdr_get_new_id(
-/*================*/
- /* out: the new id */
- ulint type); /* in: DICT_HDR_ROW_ID, ... */
-/**************************************************************************
-Returns a new row id. */
-UNIV_INLINE
-dulint
-dict_sys_get_new_row_id(void);
-/*=========================*/
- /* out: the new id */
-/**************************************************************************
-Reads a row id from a record or other 6-byte stored form. */
-UNIV_INLINE
-dulint
-dict_sys_read_row_id(
-/*=================*/
- /* out: row id */
- byte* field); /* in: record field */
-/**************************************************************************
-Writes a row id to a record or other 6-byte stored form. */
-UNIV_INLINE
-void
-dict_sys_write_row_id(
-/*==================*/
- byte* field, /* in: record field */
- dulint row_id);/* in: row id */
-/*********************************************************************
-Initializes the data dictionary memory structures when the database is
-started. This function is also called when the data dictionary is created. */
-
-void
-dict_boot(void);
-/*===========*/
-/*********************************************************************
-Creates and initializes the data dictionary at the database creation. */
-
-void
-dict_create(void);
-/*=============*/
-
-
-/* Space id and page no where the dictionary header resides */
-#define DICT_HDR_SPACE 0 /* the SYSTEM tablespace */
-#define DICT_HDR_PAGE_NO FSP_DICT_HDR_PAGE_NO
-
-/* The ids for the basic system tables and their indexes */
-#define DICT_TABLES_ID ut_dulint_create(0, 1)
-#define DICT_COLUMNS_ID ut_dulint_create(0, 2)
-#define DICT_INDEXES_ID ut_dulint_create(0, 3)
-#define DICT_FIELDS_ID ut_dulint_create(0, 4)
-/* The following is a secondary index on SYS_TABLES */
-#define DICT_TABLE_IDS_ID ut_dulint_create(0, 5)
-
-#define DICT_HDR_FIRST_ID 10 /* the ids for tables etc. start
- from this number, except for basic
- system tables and their above defined
- indexes; ibuf tables and indexes are
- assigned as the id the number
- DICT_IBUF_ID_MIN plus the space id */
-#define DICT_IBUF_ID_MIN ut_dulint_create(0xFFFFFFFFUL, 0)
-
-/* The offset of the dictionary header on the page */
-#define DICT_HDR FSEG_PAGE_DATA
-
-/*-------------------------------------------------------------*/
-/* Dictionary header offsets */
-#define DICT_HDR_ROW_ID 0 /* The latest assigned row id */
-#define DICT_HDR_TABLE_ID 8 /* The latest assigned table id */
-#define DICT_HDR_INDEX_ID 16 /* The latest assigned index id */
-#define DICT_HDR_MIX_ID 24 /* Obsolete, always 0. */
-#define DICT_HDR_TABLES 32 /* Root of the table index tree */
-#define DICT_HDR_TABLE_IDS 36 /* Root of the table index tree */
-#define DICT_HDR_COLUMNS 40 /* Root of the column index tree */
-#define DICT_HDR_INDEXES 44 /* Root of the index index tree */
-#define DICT_HDR_FIELDS 48 /* Root of the index field
- index tree */
-
-#define DICT_HDR_FSEG_HEADER 56 /* Segment header for the tablespace
- segment into which the dictionary
- header is created */
-/*-------------------------------------------------------------*/
-
-/* The field number of the page number field in the sys_indexes table
-clustered index */
-#define DICT_SYS_INDEXES_PAGE_NO_FIELD 8
-#define DICT_SYS_INDEXES_SPACE_NO_FIELD 7
-#define DICT_SYS_INDEXES_TYPE_FIELD 6
-
-/* When a row id which is zero modulo this number (which must be a power of
-two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is
-updated */
-#define DICT_HDR_ROW_ID_WRITE_MARGIN 256
-
-#ifndef UNIV_NONINL
-#include "dict0boot.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/dict0boot.ic b/storage/innobase/include/dict0boot.ic
deleted file mode 100644
index fe2a9e36653..00000000000
--- a/storage/innobase/include/dict0boot.ic
+++ /dev/null
@@ -1,76 +0,0 @@
-/******************************************************
-Data dictionary creation and booting
-
-(c) 1996 Innobase Oy
-
-Created 4/18/1996 Heikki Tuuri
-*******************************************************/
-
-/**************************************************************************
-Writes the current value of the row id counter to the dictionary header file
-page. */
-
-void
-dict_hdr_flush_row_id(void);
-/*=======================*/
-
-
-/**************************************************************************
-Returns a new row id. */
-UNIV_INLINE
-dulint
-dict_sys_get_new_row_id(void)
-/*=========================*/
- /* out: the new id */
-{
- dulint id;
-
- mutex_enter(&(dict_sys->mutex));
-
- id = dict_sys->row_id;
-
- if (0 == (ut_dulint_get_low(id) % DICT_HDR_ROW_ID_WRITE_MARGIN)) {
-
- dict_hdr_flush_row_id();
- }
-
- UT_DULINT_INC(dict_sys->row_id);
-
- mutex_exit(&(dict_sys->mutex));
-
- return(id);
-}
-
-/**************************************************************************
-Reads a row id from a record or other 6-byte stored form. */
-UNIV_INLINE
-dulint
-dict_sys_read_row_id(
-/*=================*/
- /* out: row id */
- byte* field) /* in: record field */
-{
-#if DATA_ROW_ID_LEN != 6
-# error "DATA_ROW_ID_LEN != 6"
-#endif
-
- return(mach_read_from_6(field));
-}
-
-/**************************************************************************
-Writes a row id to a record or other 6-byte stored form. */
-UNIV_INLINE
-void
-dict_sys_write_row_id(
-/*==================*/
- byte* field, /* in: record field */
- dulint row_id) /* in: row id */
-{
-#if DATA_ROW_ID_LEN != 6
-# error "DATA_ROW_ID_LEN != 6"
-#endif
-
- mach_write_to_6(field, row_id);
-}
-
-
diff --git a/storage/innobase/include/dict0crea.h b/storage/innobase/include/dict0crea.h
deleted file mode 100644
index f0f30481abe..00000000000
--- a/storage/innobase/include/dict0crea.h
+++ /dev/null
@@ -1,179 +0,0 @@
-/******************************************************
-Database object creation
-
-(c) 1996 Innobase Oy
-
-Created 1/8/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef dict0crea_h
-#define dict0crea_h
-
-#include "univ.i"
-#include "dict0types.h"
-#include "dict0dict.h"
-#include "que0types.h"
-#include "row0types.h"
-#include "mtr0mtr.h"
-
-/*************************************************************************
-Creates a table create graph. */
-
-tab_node_t*
-tab_create_graph_create(
-/*====================*/
- /* out, own: table create node */
- dict_table_t* table, /* in: table to create, built as a memory data
- structure */
- mem_heap_t* heap); /* in: heap where created */
-/*************************************************************************
-Creates an index create graph. */
-
-ind_node_t*
-ind_create_graph_create(
-/*====================*/
- /* out, own: index create node */
- dict_index_t* index, /* in: index to create, built as a memory data
- structure */
- mem_heap_t* heap); /* in: heap where created */
-/***************************************************************
-Creates a table. This is a high-level function used in SQL execution graphs. */
-
-que_thr_t*
-dict_create_table_step(
-/*===================*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr); /* in: query thread */
-/***************************************************************
-Creates an index. This is a high-level function used in SQL execution
-graphs. */
-
-que_thr_t*
-dict_create_index_step(
-/*===================*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr); /* in: query thread */
-/***********************************************************************
-Truncates the index tree associated with a row in SYS_INDEXES table. */
-
-ulint
-dict_truncate_index_tree(
-/*=====================*/
- /* out: new root page number, or
- FIL_NULL on failure */
- dict_table_t* table, /* in: the table the index belongs to */
- btr_pcur_t* pcur, /* in/out: persistent cursor pointing to
- record in the clustered index of
- SYS_INDEXES table. The cursor may be
- repositioned in this call. */
- mtr_t* mtr); /* in: mtr having the latch
- on the record page. The mtr may be
- committed and restarted in this call. */
-/***********************************************************************
-Drops the index tree associated with a row in SYS_INDEXES table. */
-
-void
-dict_drop_index_tree(
-/*=================*/
- rec_t* rec, /* in: record in the clustered index of SYS_INDEXES
- table */
- mtr_t* mtr); /* in: mtr having the latch on the record page */
-/********************************************************************
-Creates the foreign key constraints system tables inside InnoDB
-at database creation or database start if they are not found or are
-not of the right form. */
-
-ulint
-dict_create_or_check_foreign_constraint_tables(void);
-/*================================================*/
- /* out: DB_SUCCESS or error code */
-/************************************************************************
-Adds foreign key definitions to data dictionary tables in the database. We
-look at table->foreign_list, and also generate names to constraints that were
-not named by the user. A generated constraint has a name of the format
-databasename/tablename_ibfk_<number>, where the numbers start from 1, and are
-given locally for this table, that is, the number is not global, as in the
-old format constraints < 4.0.18 it used to be. */
-
-ulint
-dict_create_add_foreigns_to_dictionary(
-/*===================================*/
- /* out: error code or DB_SUCCESS */
- ulint start_id,/* in: if we are actually doing ALTER TABLE
- ADD CONSTRAINT, we want to generate constraint
- numbers which are bigger than in the table so
- far; we number the constraints from
- start_id + 1 up; start_id should be set to 0 if
- we are creating a new table, or if the table
- so far has no constraints for which the name
- was generated here */
- dict_table_t* table, /* in: table */
- trx_t* trx); /* in: transaction */
-
-
-/* Table create node structure */
-
-struct tab_node_struct{
- que_common_t common; /* node type: QUE_NODE_TABLE_CREATE */
- dict_table_t* table; /* table to create, built as a memory data
- structure with dict_mem_... functions */
- ins_node_t* tab_def; /* child node which does the insert of
- the table definition; the row to be inserted
- is built by the parent node */
- ins_node_t* col_def; /* child node which does the inserts of
- the column definitions; the row to be inserted
- is built by the parent node */
- commit_node_t* commit_node;
- /* child node which performs a commit after
- a successful table creation */
- /*----------------------*/
- /* Local storage for this graph node */
- ulint state; /* node execution state */
- ulint col_no; /* next column definition to insert */
- mem_heap_t* heap; /* memory heap used as auxiliary storage */
-};
-
-/* Table create node states */
-#define TABLE_BUILD_TABLE_DEF 1
-#define TABLE_BUILD_COL_DEF 2
-#define TABLE_COMMIT_WORK 3
-#define TABLE_ADD_TO_CACHE 4
-#define TABLE_COMPLETED 5
-
-/* Index create node struct */
-
-struct ind_node_struct{
- que_common_t common; /* node type: QUE_NODE_INDEX_CREATE */
- dict_index_t* index; /* index to create, built as a memory data
- structure with dict_mem_... functions */
- ins_node_t* ind_def; /* child node which does the insert of
- the index definition; the row to be inserted
- is built by the parent node */
- ins_node_t* field_def; /* child node which does the inserts of
- the field definitions; the row to be inserted
- is built by the parent node */
- commit_node_t* commit_node;
- /* child node which performs a commit after
- a successful index creation */
- /*----------------------*/
- /* Local storage for this graph node */
- ulint state; /* node execution state */
- ulint page_no;/* root page number of the index */
- dict_table_t* table; /* table which owns the index */
- dtuple_t* ind_row;/* index definition row built */
- ulint field_no;/* next field definition to insert */
- mem_heap_t* heap; /* memory heap used as auxiliary storage */
-};
-
-/* Index create node states */
-#define INDEX_BUILD_INDEX_DEF 1
-#define INDEX_BUILD_FIELD_DEF 2
-#define INDEX_CREATE_INDEX_TREE 3
-#define INDEX_COMMIT_WORK 4
-#define INDEX_ADD_TO_CACHE 5
-
-#ifndef UNIV_NONINL
-#include "dict0crea.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/dict0crea.ic b/storage/innobase/include/dict0crea.ic
deleted file mode 100644
index b4da2d7e03f..00000000000
--- a/storage/innobase/include/dict0crea.ic
+++ /dev/null
@@ -1,8 +0,0 @@
-/******************************************************
-Database object creation
-
-(c) 1996 Innobase Oy
-
-Created 1/8/1996 Heikki Tuuri
-*******************************************************/
-
diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h
deleted file mode 100644
index 7d5ff09c7a6..00000000000
--- a/storage/innobase/include/dict0dict.h
+++ /dev/null
@@ -1,1002 +0,0 @@
-/******************************************************
-Data dictionary system
-
-(c) 1996 Innobase Oy
-
-Created 1/8/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef dict0dict_h
-#define dict0dict_h
-
-#include "univ.i"
-#include "dict0types.h"
-#include "dict0mem.h"
-#include "data0type.h"
-#include "data0data.h"
-#include "sync0sync.h"
-#include "sync0rw.h"
-#include "mem0mem.h"
-#include "rem0types.h"
-#include "btr0types.h"
-#include "ut0mem.h"
-#include "ut0lst.h"
-#include "hash0hash.h"
-#include "ut0rnd.h"
-#include "ut0byte.h"
-#include "trx0types.h"
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************
-Makes all characters in a NUL-terminated UTF-8 string lower case. */
-
-void
-dict_casedn_str(
-/*============*/
- char* a); /* in/out: string to put in lower case */
-#endif /* !UNIV_HOTBACKUP */
-/************************************************************************
-Get the database name length in a table name. */
-
-ulint
-dict_get_db_name_len(
-/*=================*/
- /* out: database name length */
- const char* name); /* in: table name in the form
- dbname '/' tablename */
-/************************************************************************
-Return the end of table name where we have removed dbname and '/'. */
-
-const char*
-dict_remove_db_name(
-/*================*/
- /* out: table name */
- const char* name); /* in: table name in the form
- dbname '/' tablename */
-/************************************************************************
-Decrements the count of open MySQL handles to a table. */
-
-void
-dict_table_decrement_handle_count(
-/*==============================*/
- dict_table_t* table); /* in: table */
-/**************************************************************************
-Inits the data dictionary module. */
-
-void
-dict_init(void);
-/*===========*/
-/************************************************************************
-Gets the space id of every table of the data dictionary and makes a linear
-list and a hash table of them to the data dictionary cache. This function
-can be called at database startup if we did not need to do a crash recovery.
-In crash recovery we must scan the space id's from the .ibd files in MySQL
-database directories. */
-
-void
-dict_load_space_id_list(void);
-/*=========================*/
-/*************************************************************************
-Gets the column data type. */
-UNIV_INLINE
-void
-dict_col_copy_type(
-/*===============*/
- const dict_col_t* col, /* in: column */
- dtype_t* type); /* out: data type */
-/*************************************************************************
-Gets the column data type. */
-
-void
-dict_col_copy_type_noninline(
-/*=========================*/
- const dict_col_t* col, /* in: column */
- dtype_t* type); /* out: data type */
-#ifdef UNIV_DEBUG
-/*************************************************************************
-Assert that a column and a data type match. */
-UNIV_INLINE
-ibool
-dict_col_type_assert_equal(
-/*=======================*/
- /* out: TRUE */
- const dict_col_t* col, /* in: column */
- const dtype_t* type); /* in: data type */
-#endif /* UNIV_DEBUG */
-/***************************************************************************
-Returns the minimum size of the column. */
-UNIV_INLINE
-ulint
-dict_col_get_min_size(
-/*==================*/
- /* out: minimum size */
- const dict_col_t* col); /* in: column */
-/***************************************************************************
-Returns the maximum size of the column. */
-UNIV_INLINE
-ulint
-dict_col_get_max_size(
-/*==================*/
- /* out: maximum size */
- const dict_col_t* col); /* in: column */
-/***************************************************************************
-Returns the size of a fixed size column, 0 if not a fixed size column. */
-UNIV_INLINE
-ulint
-dict_col_get_fixed_size(
-/*====================*/
- /* out: fixed size, or 0 */
- const dict_col_t* col); /* in: column */
-/***************************************************************************
-Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a column.
-For fixed length types it is the fixed length of the type, otherwise 0. */
-UNIV_INLINE
-ulint
-dict_col_get_sql_null_size(
-/*=======================*/
- /* out: SQL null storage size
- in ROW_FORMAT=REDUNDANT */
- const dict_col_t* col); /* in: column */
-
-/*************************************************************************
-Gets the column number. */
-UNIV_INLINE
-ulint
-dict_col_get_no(
-/*============*/
- const dict_col_t* col);
-/*************************************************************************
-Gets the column position in the clustered index. */
-UNIV_INLINE
-ulint
-dict_col_get_clust_pos(
-/*===================*/
- const dict_col_t* col, /* in: table column */
- const dict_index_t* clust_index); /* in: clustered index */
-/*************************************************************************
-Gets the column position in the clustered index. */
-
-ulint
-dict_col_get_clust_pos_noninline(
-/*=============================*/
- const dict_col_t* col, /* in: table column */
- const dict_index_t* clust_index); /* in: clustered index */
-/********************************************************************
-If the given column name is reserved for InnoDB system columns, return
-TRUE. */
-
-ibool
-dict_col_name_is_reserved(
-/*======================*/
- /* out: TRUE if name is reserved */
- const char* name); /* in: column name */
-/************************************************************************
-Acquire the autoinc lock.*/
-
-void
-dict_table_autoinc_lock(
-/*====================*/
- dict_table_t* table); /* in: table */
-/************************************************************************
-Unconditionally set the autoinc counter. */
-
-void
-dict_table_autoinc_initialize(
-/*==========================*/
- dict_table_t* table, /* in: table */
- ib_ulonglong value); /* in: next value to assign to a row */
-/************************************************************************
-Reads the next autoinc value (== autoinc counter value), 0 if not yet
-initialized. */
-
-ib_ulonglong
-dict_table_autoinc_read(
-/*====================*/
- /* out: value for a new row, or 0 */
- dict_table_t* table); /* in: table */
-/************************************************************************
-Updates the autoinc counter if the value supplied is greater than the
-current value. */
-
-void
-dict_table_autoinc_update_if_greater(
-/*=================================*/
-
- dict_table_t* table, /* in: table */
- ib_ulonglong value); /* in: value which was assigned to a row */
-/************************************************************************
-Release the autoinc lock.*/
-
-void
-dict_table_autoinc_unlock(
-/*======================*/
- dict_table_t* table); /* in: table */
-/**************************************************************************
-Adds system columns to a table object. */
-
-void
-dict_table_add_system_columns(
-/*==========================*/
- dict_table_t* table, /* in/out: table */
- mem_heap_t* heap); /* in: temporary heap */
-/**************************************************************************
-Adds a table object to the dictionary cache. */
-
-void
-dict_table_add_to_cache(
-/*====================*/
- dict_table_t* table, /* in: table */
- mem_heap_t* heap); /* in: temporary heap */
-/**************************************************************************
-Removes a table object from the dictionary cache. */
-
-void
-dict_table_remove_from_cache(
-/*=========================*/
- dict_table_t* table); /* in, own: table */
-/**************************************************************************
-Renames a table object. */
-
-ibool
-dict_table_rename_in_cache(
-/*=======================*/
- /* out: TRUE if success */
- dict_table_t* table, /* in: table */
- const char* new_name, /* in: new name */
- ibool rename_also_foreigns);/* in: in ALTER TABLE we want
- to preserve the original table name
- in constraints which reference it */
-/**************************************************************************
-Change the id of a table object in the dictionary cache. This is used in
-DISCARD TABLESPACE. */
-
-void
-dict_table_change_id_in_cache(
-/*==========================*/
- dict_table_t* table, /* in: table object already in cache */
- dulint new_id);/* in: new id to set */
-/**************************************************************************
-Adds a foreign key constraint object to the dictionary cache. May free
-the object if there already is an object with the same identifier in.
-At least one of foreign table or referenced table must already be in
-the dictionary cache! */
-
-ulint
-dict_foreign_add_to_cache(
-/*======================*/
- /* out: DB_SUCCESS or error code */
- dict_foreign_t* foreign, /* in, own: foreign key constraint */
- ibool check_charsets);/* in: TRUE=check charset
- compatibility */
-/*************************************************************************
-Checks if a table is referenced by foreign keys. */
-
-ibool
-dict_table_referenced_by_foreign_key(
-/*=================================*/
- /* out: TRUE if table is referenced by a
- foreign key */
- dict_table_t* table); /* in: InnoDB table */
-/**************************************************************************
-Determines whether a string starts with the specified keyword. */
-
-ibool
-dict_str_starts_with_keyword(
-/*=========================*/
- /* out: TRUE if str starts
- with keyword */
- void* mysql_thd, /* in: MySQL thread handle */
- const char* str, /* in: string to scan for keyword */
- const char* keyword); /* in: keyword to look for */
-/*************************************************************************
-Scans a table create SQL string and adds to the data dictionary
-the foreign key constraints declared in the string. This function
-should be called after the indexes for a table have been created.
-Each foreign key constraint must be accompanied with indexes in
-bot participating tables. The indexes are allowed to contain more
-fields than mentioned in the constraint. */
-
-ulint
-dict_create_foreign_constraints(
-/*============================*/
- /* out: error code or DB_SUCCESS */
- trx_t* trx, /* in: transaction */
- const char* sql_string, /* in: table create statement where
- foreign keys are declared like:
- FOREIGN KEY (a, b) REFERENCES
- table2(c, d), table2 can be written
- also with the database
- name before it: test.table2; the
- default database id the database of
- parameter name */
- const char* name, /* in: table full name in the
- normalized form
- database_name/table_name */
- ibool reject_fks); /* in: if TRUE, fail with error
- code DB_CANNOT_ADD_CONSTRAINT if
- any foreign keys are found. */
-/**************************************************************************
-Parses the CONSTRAINT id's to be dropped in an ALTER TABLE statement. */
-
-ulint
-dict_foreign_parse_drop_constraints(
-/*================================*/
- /* out: DB_SUCCESS or
- DB_CANNOT_DROP_CONSTRAINT if
- syntax error or the constraint
- id does not match */
- mem_heap_t* heap, /* in: heap from which we can
- allocate memory */
- trx_t* trx, /* in: transaction */
- dict_table_t* table, /* in: table */
- ulint* n, /* out: number of constraints
- to drop */
- const char*** constraints_to_drop); /* out: id's of the
- constraints to drop */
-/**************************************************************************
-Returns a table object and optionally increment its MySQL open handle count.
-NOTE! This is a high-level function to be used mainly from outside the
-'dict' directory. Inside this directory dict_table_get_low is usually the
-appropriate function. */
-
-dict_table_t*
-dict_table_get(
-/*===========*/
- /* out: table, NULL if
- does not exist */
- const char* table_name, /* in: table name */
- ibool inc_mysql_count);
- /* in: whether to increment the open
- handle count on the table */
-/**************************************************************************
-Returns a table object based on table id. */
-
-dict_table_t*
-dict_table_get_on_id(
-/*=================*/
- /* out: table, NULL if does not exist */
- dulint table_id, /* in: table id */
- trx_t* trx); /* in: transaction handle */
-/**************************************************************************
-Returns a table object based on table id. */
-UNIV_INLINE
-dict_table_t*
-dict_table_get_on_id_low(
-/*=====================*/
- /* out: table, NULL if does not exist */
- dulint table_id); /* in: table id */
-/**************************************************************************
-Checks if a table is in the dictionary cache. */
-UNIV_INLINE
-dict_table_t*
-dict_table_check_if_in_cache_low(
-/*=============================*/
- /* out: table, NULL if not found */
- const char* table_name); /* in: table name */
-/**************************************************************************
-Gets a table; loads it to the dictionary cache if necessary. A low-level
-function. */
-UNIV_INLINE
-dict_table_t*
-dict_table_get_low(
-/*===============*/
- /* out: table, NULL if not found */
- const char* table_name); /* in: table name */
-/**************************************************************************
-A noninlined version of dict_table_get_low. */
-
-dict_table_t*
-dict_table_get_low_noninlined(
-/*==========================*/
- /* out: table, NULL if not found */
- const char* table_name); /* in: table name */
-/**************************************************************************
-Returns an index object. */
-UNIV_INLINE
-dict_index_t*
-dict_table_get_index(
-/*=================*/
- /* out: index, NULL if does not exist */
- dict_table_t* table, /* in: table */
- const char* name); /* in: index name */
-/**************************************************************************
-Returns an index object. */
-
-dict_index_t*
-dict_table_get_index_noninline(
-/*===========================*/
- /* out: index, NULL if does not exist */
- dict_table_t* table, /* in: table */
- const char* name); /* in: index name */
-/**************************************************************************
-Returns a column's name. */
-
-const char*
-dict_table_get_col_name(
-/*====================*/
- /* out: column name. NOTE: not
- guaranteed to stay valid if table is
- modified in any way (columns added,
- etc.). */
- const dict_table_t* table, /* in: table */
- ulint col_nr);/* in: column number */
-
-/**************************************************************************
-Prints a table definition. */
-
-void
-dict_table_print(
-/*=============*/
- dict_table_t* table); /* in: table */
-/**************************************************************************
-Prints a table data. */
-
-void
-dict_table_print_low(
-/*=================*/
- dict_table_t* table); /* in: table */
-/**************************************************************************
-Prints a table data when we know the table name. */
-
-void
-dict_table_print_by_name(
-/*=====================*/
- const char* name);
-/**************************************************************************
-Outputs info on foreign keys of a table. */
-
-void
-dict_print_info_on_foreign_keys(
-/*============================*/
- ibool create_table_format, /* in: if TRUE then print in
- a format suitable to be inserted into
- a CREATE TABLE, otherwise in the format
- of SHOW TABLE STATUS */
- FILE* file, /* in: file where to print */
- trx_t* trx, /* in: transaction */
- dict_table_t* table); /* in: table */
-/**************************************************************************
-Outputs info on a foreign key of a table in a format suitable for
-CREATE TABLE. */
-void
-dict_print_info_on_foreign_key_in_create_format(
-/*============================================*/
- FILE* file, /* in: file where to print */
- trx_t* trx, /* in: transaction */
- dict_foreign_t* foreign, /* in: foreign key constraint */
- ibool add_newline); /* in: whether to add a newline */
-/************************************************************************
-Displays the names of the index and the table. */
-void
-dict_index_name_print(
-/*==================*/
- FILE* file, /* in: output stream */
- trx_t* trx, /* in: transaction */
- const dict_index_t* index); /* in: index to print */
-/************************************************************************
-Gets the first index on the table (the clustered index). */
-UNIV_INLINE
-dict_index_t*
-dict_table_get_first_index(
-/*=======================*/
- /* out: index, NULL if none exists */
- dict_table_t* table); /* in: table */
-/************************************************************************
-Gets the first index on the table (the clustered index). */
-
-dict_index_t*
-dict_table_get_first_index_noninline(
-/*=================================*/
- /* out: index, NULL if none exists */
- dict_table_t* table); /* in: table */
-/************************************************************************
-Gets the next index on the table. */
-UNIV_INLINE
-dict_index_t*
-dict_table_get_next_index(
-/*======================*/
- /* out: index, NULL if none left */
- dict_index_t* index); /* in: index */
-/************************************************************************
-Gets the next index on the table. */
-
-dict_index_t*
-dict_table_get_next_index_noninline(
-/*================================*/
- /* out: index, NULL if none left */
- dict_index_t* index); /* in: index */
-/************************************************************************
-Gets the number of user-defined columns in a table in the dictionary
-cache. */
-UNIV_INLINE
-ulint
-dict_table_get_n_user_cols(
-/*=======================*/
- /* out: number of user-defined (e.g., not
- ROW_ID) columns of a table */
- dict_table_t* table); /* in: table */
-/************************************************************************
-Gets the number of system columns in a table in the dictionary cache. */
-UNIV_INLINE
-ulint
-dict_table_get_n_sys_cols(
-/*======================*/
- /* out: number of system (e.g.,
- ROW_ID) columns of a table */
- dict_table_t* table); /* in: table */
-/************************************************************************
-Gets the number of all columns (also system) in a table in the dictionary
-cache. */
-UNIV_INLINE
-ulint
-dict_table_get_n_cols(
-/*==================*/
- /* out: number of columns of a table */
- dict_table_t* table); /* in: table */
-/************************************************************************
-Gets the nth column of a table. */
-UNIV_INLINE
-const dict_col_t*
-dict_table_get_nth_col(
-/*===================*/
- /* out: pointer to column object */
- const dict_table_t* table, /* in: table */
- ulint pos); /* in: position of column */
-/************************************************************************
-Gets the nth column of a table. */
-
-const dict_col_t*
-dict_table_get_nth_col_noninline(
-/*=============================*/
- /* out: pointer to column object */
- const dict_table_t* table, /* in: table */
- ulint pos); /* in: position of column */
-/************************************************************************
-Gets the given system column of a table. */
-UNIV_INLINE
-const dict_col_t*
-dict_table_get_sys_col(
-/*===================*/
- /* out: pointer to column object */
- const dict_table_t* table, /* in: table */
- ulint sys); /* in: DATA_ROW_ID, ... */
-/************************************************************************
-Gets the given system column number of a table. */
-UNIV_INLINE
-ulint
-dict_table_get_sys_col_no(
-/*======================*/
- /* out: column number */
- dict_table_t* table, /* in: table */
- ulint sys); /* in: DATA_ROW_ID, ... */
-/************************************************************************
-Check whether the table uses the compact page format. */
-UNIV_INLINE
-ibool
-dict_table_is_comp(
-/*===============*/
- /* out: TRUE if table uses the
- compact page format */
- const dict_table_t* table); /* in: table */
-/************************************************************************
-Check whether the table uses the compact page format. */
-
-ibool
-dict_table_is_comp_noninline(
-/*=========================*/
- /* out: TRUE if table uses the
- compact page format */
- const dict_table_t* table); /* in: table */
-/************************************************************************
-Checks if a column is in the ordering columns of the clustered index of a
-table. Column prefixes are treated like whole columns. */
-
-ibool
-dict_table_col_in_clustered_key(
-/*============================*/
- /* out: TRUE if the column, or its prefix, is
- in the clustered key */
- dict_table_t* table, /* in: table */
- ulint n); /* in: column number */
-/***********************************************************************
-Copies types of columns contained in table to tuple. */
-
-void
-dict_table_copy_types(
-/*==================*/
- dtuple_t* tuple, /* in: data tuple */
- dict_table_t* table); /* in: index */
-/**************************************************************************
-Looks for an index with the given id. NOTE that we do not reserve
-the dictionary mutex: this function is for emergency purposes like
-printing info of a corrupt database page! */
-
-dict_index_t*
-dict_index_find_on_id_low(
-/*======================*/
- /* out: index or NULL if not found from cache */
- dulint id); /* in: index id */
-/**************************************************************************
-Adds an index to the dictionary cache. */
-
-void
-dict_index_add_to_cache(
-/*====================*/
- dict_table_t* table, /* in: table on which the index is */
- dict_index_t* index, /* in, own: index; NOTE! The index memory
- object is freed in this function! */
- ulint page_no);/* in: root page number of the index */
-/************************************************************************
-Gets the number of fields in the internal representation of an index,
-including fields added by the dictionary system. */
-UNIV_INLINE
-ulint
-dict_index_get_n_fields(
-/*====================*/
- /* out: number of fields */
- dict_index_t* index); /* in: an internal representation of index
- (in the dictionary cache) */
-/************************************************************************
-Gets the number of fields in the internal representation of an index
-that uniquely determine the position of an index entry in the index, if
-we do not take multiversioning into account: in the B-tree use the value
-returned by dict_index_get_n_unique_in_tree. */
-UNIV_INLINE
-ulint
-dict_index_get_n_unique(
-/*====================*/
- /* out: number of fields */
- dict_index_t* index); /* in: an internal representation of index
- (in the dictionary cache) */
-/************************************************************************
-Gets the number of fields in the internal representation of an index
-which uniquely determine the position of an index entry in the index, if
-we also take multiversioning into account. */
-UNIV_INLINE
-ulint
-dict_index_get_n_unique_in_tree(
-/*============================*/
- /* out: number of fields */
- dict_index_t* index); /* in: an internal representation of index
- (in the dictionary cache) */
-/************************************************************************
-Gets the number of user-defined ordering fields in the index. In the internal
-representation we add the row id to the ordering fields to make all indexes
-unique, but this function returns the number of fields the user defined
-in the index as ordering fields. */
-UNIV_INLINE
-ulint
-dict_index_get_n_ordering_defined_by_user(
-/*======================================*/
- /* out: number of fields */
- dict_index_t* index); /* in: an internal representation of index
- (in the dictionary cache) */
-/************************************************************************
-Gets the nth field of an index. */
-UNIV_INLINE
-dict_field_t*
-dict_index_get_nth_field(
-/*=====================*/
- /* out: pointer to field object */
- dict_index_t* index, /* in: index */
- ulint pos); /* in: position of field */
-/************************************************************************
-Gets pointer to the nth column in an index. */
-UNIV_INLINE
-const dict_col_t*
-dict_index_get_nth_col(
-/*===================*/
- /* out: column */
- const dict_index_t* index, /* in: index */
- ulint pos); /* in: position of the field */
-/************************************************************************
-Gets the column number of the nth field in an index. */
-UNIV_INLINE
-ulint
-dict_index_get_nth_col_no(
-/*======================*/
- /* out: column number */
- const dict_index_t* index, /* in: index */
- ulint pos); /* in: position of the field */
-/************************************************************************
-Looks for column n in an index. */
-
-ulint
-dict_index_get_nth_col_pos(
-/*=======================*/
- /* out: position in internal representation
- of the index; if not contained, returns
- ULINT_UNDEFINED */
- dict_index_t* index, /* in: index */
- ulint n); /* in: column number */
-/************************************************************************
-Returns TRUE if the index contains a column or a prefix of that column. */
-
-ibool
-dict_index_contains_col_or_prefix(
-/*==============================*/
- /* out: TRUE if contains the column or its
- prefix */
- dict_index_t* index, /* in: index */
- ulint n); /* in: column number */
-/************************************************************************
-Looks for a matching field in an index. The column has to be the same. The
-column in index must be complete, or must contain a prefix longer than the
-column in index2. That is, we must be able to construct the prefix in index2
-from the prefix in index. */
-
-ulint
-dict_index_get_nth_field_pos(
-/*=========================*/
- /* out: position in internal representation
- of the index; if not contained, returns
- ULINT_UNDEFINED */
- dict_index_t* index, /* in: index from which to search */
- dict_index_t* index2, /* in: index */
- ulint n); /* in: field number in index2 */
-/************************************************************************
-Looks for column n position in the clustered index. */
-
-ulint
-dict_table_get_nth_col_pos(
-/*=======================*/
- /* out: position in internal representation
- of the clustered index */
- dict_table_t* table, /* in: table */
- ulint n); /* in: column number */
-/************************************************************************
-Returns the position of a system column in an index. */
-UNIV_INLINE
-ulint
-dict_index_get_sys_col_pos(
-/*=======================*/
- /* out: position, ULINT_UNDEFINED if not
- contained */
- dict_index_t* index, /* in: index */
- ulint type); /* in: DATA_ROW_ID, ... */
-/***********************************************************************
-Adds a column to index. */
-
-void
-dict_index_add_col(
-/*===============*/
- dict_index_t* index, /* in: index */
- dict_table_t* table, /* in: table */
- dict_col_t* col, /* in: column */
- ulint prefix_len); /* in: column prefix length */
-/***********************************************************************
-Copies types of fields contained in index to tuple. */
-
-void
-dict_index_copy_types(
-/*==================*/
- dtuple_t* tuple, /* in: data tuple */
- dict_index_t* index, /* in: index */
- ulint n_fields); /* in: number of field types to copy */
-/*************************************************************************
-Gets the field column. */
-UNIV_INLINE
-const dict_col_t*
-dict_field_get_col(
-/*===============*/
- const dict_field_t* field);
-
-#ifdef UNIV_DEBUG
-/**************************************************************************
-Returns an index object if it is found in the dictionary cache. */
-
-dict_index_t*
-dict_index_get_if_in_cache(
-/*=======================*/
- /* out: index, NULL if not found */
- dulint index_id); /* in: index id */
-/**************************************************************************
-Checks that a tuple has n_fields_cmp value in a sensible range, so that
-no comparison can occur with the page number field in a node pointer. */
-
-ibool
-dict_index_check_search_tuple(
-/*==========================*/
- /* out: TRUE if ok */
- dict_index_t* index, /* in: index */
- dtuple_t* tuple); /* in: tuple used in a search */
-#endif /* UNIV_DEBUG */
-/**************************************************************************
-Builds a node pointer out of a physical record and a page number. */
-
-dtuple_t*
-dict_index_build_node_ptr(
-/*======================*/
- /* out, own: node pointer */
- dict_index_t* index, /* in: index */
- rec_t* rec, /* in: record for which to build node
- pointer */
- ulint page_no,/* in: page number to put in node pointer */
- mem_heap_t* heap, /* in: memory heap where pointer created */
- ulint level); /* in: level of rec in tree: 0 means leaf
- level */
-/**************************************************************************
-Copies an initial segment of a physical record, long enough to specify an
-index entry uniquely. */
-
-rec_t*
-dict_index_copy_rec_order_prefix(
-/*=============================*/
- /* out: pointer to the prefix record */
- dict_index_t* index, /* in: index */
- rec_t* rec, /* in: record for which to copy prefix */
- ulint* n_fields,/* out: number of fields copied */
- byte** buf, /* in/out: memory buffer for the copied prefix,
- or NULL */
- ulint* buf_size);/* in/out: buffer size */
-/**************************************************************************
-Builds a typed data tuple out of a physical record. */
-
-dtuple_t*
-dict_index_build_data_tuple(
-/*========================*/
- /* out, own: data tuple */
- dict_index_t* index, /* in: index */
- rec_t* rec, /* in: record for which to build data tuple */
- ulint n_fields,/* in: number of data fields */
- mem_heap_t* heap); /* in: memory heap where tuple created */
-/*************************************************************************
-Gets the space id of the root of the index tree. */
-UNIV_INLINE
-ulint
-dict_index_get_space(
-/*=================*/
- /* out: space id */
- dict_index_t* index); /* in: index */
-/*************************************************************************
-Sets the space id of the root of the index tree. */
-UNIV_INLINE
-void
-dict_index_set_space(
-/*=================*/
- dict_index_t* index, /* in: index */
- ulint space); /* in: space id */
-/*************************************************************************
-Gets the page number of the root of the index tree. */
-UNIV_INLINE
-ulint
-dict_index_get_page(
-/*================*/
- /* out: page number */
- dict_index_t* tree); /* in: index */
-/*************************************************************************
-Sets the page number of the root of index tree. */
-UNIV_INLINE
-void
-dict_index_set_page(
-/*================*/
- dict_index_t* index, /* in: index */
- ulint page); /* in: page number */
-/*************************************************************************
-Gets the type of the index tree. */
-UNIV_INLINE
-ulint
-dict_index_get_type(
-/*================*/
- /* out: type */
- dict_index_t* index); /* in: index */
-/*************************************************************************
-Gets the read-write lock of the index tree. */
-UNIV_INLINE
-rw_lock_t*
-dict_index_get_lock(
-/*================*/
- /* out: read-write lock */
- dict_index_t* index); /* in: index */
-/************************************************************************
-Returns free space reserved for future updates of records. This is
-relevant only in the case of many consecutive inserts, as updates
-which make the records bigger might fragment the index. */
-UNIV_INLINE
-ulint
-dict_index_get_space_reserve(void);
-/*==============================*/
- /* out: number of free bytes on page,
- reserved for updates */
-/*************************************************************************
-Calculates the minimum record length in an index. */
-
-ulint
-dict_index_calc_min_rec_len(
-/*========================*/
- dict_index_t* index); /* in: index */
-/*************************************************************************
-Calculates new estimates for table and index statistics. The statistics
-are used in query optimization. */
-
-void
-dict_update_statistics_low(
-/*=======================*/
- dict_table_t* table, /* in: table */
- ibool has_dict_mutex);/* in: TRUE if the caller has the
- dictionary mutex */
-/*************************************************************************
-Calculates new estimates for table and index statistics. The statistics
-are used in query optimization. */
-
-void
-dict_update_statistics(
-/*===================*/
- dict_table_t* table); /* in: table */
-/************************************************************************
-Reserves the dictionary system mutex for MySQL. */
-
-void
-dict_mutex_enter_for_mysql(void);
-/*============================*/
-/************************************************************************
-Releases the dictionary system mutex for MySQL. */
-
-void
-dict_mutex_exit_for_mysql(void);
-/*===========================*/
-/************************************************************************
-Checks if the database name in two table names is the same. */
-
-ibool
-dict_tables_have_same_db(
-/*=====================*/
- /* out: TRUE if same db name */
- const char* name1, /* in: table name in the form
- dbname '/' tablename */
- const char* name2); /* in: table name in the form
- dbname '/' tablename */
-/*************************************************************************
-Scans from pointer onwards. Stops if is at the start of a copy of
-'string' where characters are compared without case sensitivity. Stops
-also at '\0'. */
-
-const char*
-dict_scan_to(
-/*=========*/
- /* out: scanned up to this */
- const char* ptr, /* in: scan from */
- const char* string);/* in: look for this */
-/* Buffers for storing detailed information about the latest foreign key
-and unique key errors */
-extern FILE* dict_foreign_err_file;
-extern mutex_t dict_foreign_err_mutex; /* mutex protecting the buffers */
-
-extern dict_sys_t* dict_sys; /* the dictionary system */
-extern rw_lock_t dict_operation_lock;
-
-/* Dictionary system struct */
-struct dict_sys_struct{
- mutex_t mutex; /* mutex protecting the data
- dictionary; protects also the
- disk-based dictionary system tables;
- this mutex serializes CREATE TABLE
- and DROP TABLE, as well as reading
- the dictionary data for a table from
- system tables */
- dulint row_id; /* the next row id to assign;
- NOTE that at a checkpoint this
- must be written to the dict system
- header and flushed to a file; in
- recovery this must be derived from
- the log records */
- hash_table_t* table_hash; /* hash table of the tables, based
- on name */
- hash_table_t* table_id_hash; /* hash table of the tables, based
- on id */
- UT_LIST_BASE_NODE_T(dict_table_t)
- table_LRU; /* LRU list of tables */
- ulint size; /* varying space in bytes occupied
- by the data dictionary table and
- index objects */
- dict_table_t* sys_tables; /* SYS_TABLES table */
- dict_table_t* sys_columns; /* SYS_COLUMNS table */
- dict_table_t* sys_indexes; /* SYS_INDEXES table */
- dict_table_t* sys_fields; /* SYS_FIELDS table */
-};
-
-#ifndef UNIV_NONINL
-#include "dict0dict.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic
deleted file mode 100644
index 7d38cbcd1fa..00000000000
--- a/storage/innobase/include/dict0dict.ic
+++ /dev/null
@@ -1,664 +0,0 @@
-/**********************************************************************
-Data dictionary system
-
-(c) 1996 Innobase Oy
-
-Created 1/8/1996 Heikki Tuuri
-***********************************************************************/
-
-#include "dict0load.h"
-#include "trx0undo.h"
-#include "trx0sys.h"
-#include "rem0types.h"
-#include "data0type.h"
-
-/*************************************************************************
-Gets the column data type. */
-UNIV_INLINE
-void
-dict_col_copy_type(
-/*===============*/
- const dict_col_t* col, /* in: column */
- dtype_t* type) /* out: data type */
-{
- ut_ad(col && type);
-
- type->mtype = col->mtype;
- type->prtype = col->prtype;
- type->len = col->len;
- type->mbminlen = col->mbminlen;
- type->mbmaxlen = col->mbmaxlen;
-}
-
-#ifdef UNIV_DEBUG
-/*************************************************************************
-Assert that a column and a data type match. */
-UNIV_INLINE
-ibool
-dict_col_type_assert_equal(
-/*=======================*/
- /* out: TRUE */
- const dict_col_t* col, /* in: column */
- const dtype_t* type) /* in: data type */
-{
- ut_ad(col);
- ut_ad(type);
-
- ut_ad(col->mtype == type->mtype);
- ut_ad(col->prtype == type->prtype);
- ut_ad(col->len == type->len);
- ut_ad(col->mbminlen == type->mbminlen);
- ut_ad(col->mbmaxlen == type->mbmaxlen);
-
- return(TRUE);
-}
-#endif /* UNIV_DEBUG */
-
-/***************************************************************************
-Returns the minimum size of the column. */
-UNIV_INLINE
-ulint
-dict_col_get_min_size(
-/*==================*/
- /* out: minimum size */
- const dict_col_t* col) /* in: column */
-{
- return(dtype_get_min_size_low(col->mtype, col->prtype, col->len,
- col->mbminlen, col->mbmaxlen));
-}
-/***************************************************************************
-Returns the maximum size of the column. */
-UNIV_INLINE
-ulint
-dict_col_get_max_size(
-/*==================*/
- /* out: maximum size */
- const dict_col_t* col) /* in: column */
-{
- return(dtype_get_max_size_low(col->mtype, col->len));
-}
-/***************************************************************************
-Returns the size of a fixed size column, 0 if not a fixed size column. */
-UNIV_INLINE
-ulint
-dict_col_get_fixed_size(
-/*====================*/
- /* out: fixed size, or 0 */
- const dict_col_t* col) /* in: column */
-{
- return(dtype_get_fixed_size_low(col->mtype, col->prtype, col->len,
- col->mbminlen, col->mbmaxlen));
-}
-/***************************************************************************
-Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a column.
-For fixed length types it is the fixed length of the type, otherwise 0. */
-UNIV_INLINE
-ulint
-dict_col_get_sql_null_size(
-/*=======================*/
- /* out: SQL null storage size
- in ROW_FORMAT=REDUNDANT */
- const dict_col_t* col) /* in: column */
-{
- return(dict_col_get_fixed_size(col));
-}
-
-/*************************************************************************
-Gets the column number. */
-UNIV_INLINE
-ulint
-dict_col_get_no(
-/*============*/
- const dict_col_t* col)
-{
- ut_ad(col);
-
- return(col->ind);
-}
-
-/*************************************************************************
-Gets the column position in the clustered index. */
-UNIV_INLINE
-ulint
-dict_col_get_clust_pos(
-/*===================*/
- const dict_col_t* col, /* in: table column */
- const dict_index_t* clust_index) /* in: clustered index */
-{
- ulint i;
-
- ut_ad(col);
- ut_ad(clust_index && clust_index->type & DICT_CLUSTERED);
-
- for (i = 0; i < clust_index->n_def; i++) {
- const dict_field_t* field = &clust_index->fields[i];
-
- if (!field->prefix_len && field->col == col) {
- return(i);
- }
- }
-
- return(ULINT_UNDEFINED);
-}
-
-/************************************************************************
-Gets the first index on the table (the clustered index). */
-UNIV_INLINE
-dict_index_t*
-dict_table_get_first_index(
-/*=======================*/
- /* out: index, NULL if none exists */
- dict_table_t* table) /* in: table */
-{
- ut_ad(table);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- return(UT_LIST_GET_FIRST(table->indexes));
-}
-
-/************************************************************************
-Gets the next index on the table. */
-UNIV_INLINE
-dict_index_t*
-dict_table_get_next_index(
-/*======================*/
- /* out: index, NULL if none left */
- dict_index_t* index) /* in: index */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- return(UT_LIST_GET_NEXT(indexes, index));
-}
-
-/************************************************************************
-Gets the number of user-defined columns in a table in the dictionary
-cache. */
-UNIV_INLINE
-ulint
-dict_table_get_n_user_cols(
-/*=======================*/
- /* out: number of user-defined (e.g., not
- ROW_ID) columns of a table */
- dict_table_t* table) /* in: table */
-{
- ut_ad(table);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- return(table->n_cols - DATA_N_SYS_COLS);
-}
-
-/************************************************************************
-Gets the number of system columns in a table in the dictionary cache. */
-UNIV_INLINE
-ulint
-dict_table_get_n_sys_cols(
-/*======================*/
- /* out: number of system (e.g.,
- ROW_ID) columns of a table */
- dict_table_t* table __attribute__((unused))) /* in: table */
-{
- ut_ad(table);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
- ut_ad(table->cached);
-
- return(DATA_N_SYS_COLS);
-}
-
-/************************************************************************
-Gets the number of all columns (also system) in a table in the dictionary
-cache. */
-UNIV_INLINE
-ulint
-dict_table_get_n_cols(
-/*==================*/
- /* out: number of columns of a table */
- dict_table_t* table) /* in: table */
-{
- ut_ad(table);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- return(table->n_cols);
-}
-
-/************************************************************************
-Gets the nth column of a table. */
-UNIV_INLINE
-const dict_col_t*
-dict_table_get_nth_col(
-/*===================*/
- /* out: pointer to column object */
- const dict_table_t* table, /* in: table */
- ulint pos) /* in: position of column */
-{
- ut_ad(table);
- ut_ad(pos < table->n_def);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- return((table->cols) + pos);
-}
-
-/************************************************************************
-Gets the given system column of a table. */
-UNIV_INLINE
-const dict_col_t*
-dict_table_get_sys_col(
-/*===================*/
- /* out: pointer to column object */
- const dict_table_t* table, /* in: table */
- ulint sys) /* in: DATA_ROW_ID, ... */
-{
- const dict_col_t* col;
-
- ut_ad(table);
- ut_ad(sys < DATA_N_SYS_COLS);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- col = dict_table_get_nth_col(table, table->n_cols
- - DATA_N_SYS_COLS + sys);
- ut_ad(col->mtype == DATA_SYS);
- ut_ad(col->prtype == (sys | DATA_NOT_NULL));
-
- return(col);
-}
-
-/************************************************************************
-Gets the given system column number of a table. */
-UNIV_INLINE
-ulint
-dict_table_get_sys_col_no(
-/*======================*/
- /* out: column number */
- dict_table_t* table, /* in: table */
- ulint sys) /* in: DATA_ROW_ID, ... */
-{
- ut_ad(table);
- ut_ad(sys < DATA_N_SYS_COLS);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- return(table->n_cols - DATA_N_SYS_COLS + sys);
-}
-
-/************************************************************************
-Check whether the table uses the compact page format. */
-UNIV_INLINE
-ibool
-dict_table_is_comp(
-/*===============*/
- /* out: TRUE if table uses the
- compact page format */
- const dict_table_t* table) /* in: table */
-{
- ut_ad(table);
-
-#if DICT_TF_COMPACT != TRUE
-#error
-#endif
-
- return(UNIV_LIKELY(table->flags & DICT_TF_COMPACT));
-}
-
-/************************************************************************
-Gets the number of fields in the internal representation of an index,
-including fields added by the dictionary system. */
-UNIV_INLINE
-ulint
-dict_index_get_n_fields(
-/*====================*/
- /* out: number of fields */
- dict_index_t* index) /* in: an internal representation of index
- (in the dictionary cache) */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- return(index->n_fields);
-}
-
-/************************************************************************
-Gets the number of fields in the internal representation of an index
-that uniquely determine the position of an index entry in the index, if
-we do not take multiversioning into account: in the B-tree use the value
-returned by dict_index_get_n_unique_in_tree. */
-UNIV_INLINE
-ulint
-dict_index_get_n_unique(
-/*====================*/
- /* out: number of fields */
- dict_index_t* index) /* in: an internal representation of index
- (in the dictionary cache) */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
- ut_ad(index->cached);
-
- return(index->n_uniq);
-}
-
-/************************************************************************
-Gets the number of fields in the internal representation of an index
-which uniquely determine the position of an index entry in the index, if
-we also take multiversioning into account. */
-UNIV_INLINE
-ulint
-dict_index_get_n_unique_in_tree(
-/*============================*/
- /* out: number of fields */
- dict_index_t* index) /* in: an internal representation of index
- (in the dictionary cache) */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
- ut_ad(index->cached);
-
- if (index->type & DICT_CLUSTERED) {
-
- return(dict_index_get_n_unique(index));
- }
-
- return(dict_index_get_n_fields(index));
-}
-
-/************************************************************************
-Gets the number of user-defined ordering fields in the index. In the internal
-representation of clustered indexes we add the row id to the ordering fields
-to make a clustered index unique, but this function returns the number of
-fields the user defined in the index as ordering fields. */
-UNIV_INLINE
-ulint
-dict_index_get_n_ordering_defined_by_user(
-/*======================================*/
- /* out: number of fields */
- dict_index_t* index) /* in: an internal representation of index
- (in the dictionary cache) */
-{
- return(index->n_user_defined_cols);
-}
-
-/************************************************************************
-Gets the nth field of an index. */
-UNIV_INLINE
-dict_field_t*
-dict_index_get_nth_field(
-/*=====================*/
- /* out: pointer to field object */
- dict_index_t* index, /* in: index */
- ulint pos) /* in: position of field */
-{
- ut_ad(index);
- ut_ad(pos < index->n_def);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- return((index->fields) + pos);
-}
-
-/************************************************************************
-Returns the position of a system column in an index. */
-UNIV_INLINE
-ulint
-dict_index_get_sys_col_pos(
-/*=======================*/
- /* out: position, ULINT_UNDEFINED if not
- contained */
- dict_index_t* index, /* in: index */
- ulint type) /* in: DATA_ROW_ID, ... */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
- ut_ad(!(index->type & DICT_UNIVERSAL));
-
- if (index->type & DICT_CLUSTERED) {
-
- return(dict_col_get_clust_pos(
- dict_table_get_sys_col(index->table, type),
- index));
- }
-
- return(dict_index_get_nth_col_pos(
- index, dict_table_get_sys_col_no(index->table, type)));
-}
-
-/*************************************************************************
-Gets the field column. */
-UNIV_INLINE
-const dict_col_t*
-dict_field_get_col(
-/*===============*/
- const dict_field_t* field)
-{
- ut_ad(field);
-
- return(field->col);
-}
-
-/************************************************************************
-Gets pointer to the nth column in an index. */
-UNIV_INLINE
-const dict_col_t*
-dict_index_get_nth_col(
-/*===================*/
- /* out: column */
- const dict_index_t* index, /* in: index */
- ulint pos) /* in: position of the field */
-{
- return(dict_field_get_col(dict_index_get_nth_field((dict_index_t*)
- index, pos)));
-}
-
-/************************************************************************
-Gets the column number the nth field in an index. */
-UNIV_INLINE
-ulint
-dict_index_get_nth_col_no(
-/*======================*/
- /* out: column number */
- const dict_index_t* index, /* in: index */
- ulint pos) /* in: position of the field */
-{
- return(dict_col_get_no(dict_index_get_nth_col(index, pos)));
-}
-
-/*************************************************************************
-Gets the space id of the root of the index tree. */
-UNIV_INLINE
-ulint
-dict_index_get_space(
-/*=================*/
- /* out: space id */
- dict_index_t* index) /* in: index */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- return(index->space);
-}
-
-/*************************************************************************
-Sets the space id of the root of the index tree. */
-UNIV_INLINE
-void
-dict_index_set_space(
-/*=================*/
- dict_index_t* index, /* in: index */
- ulint space) /* in: space id */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- index->space = space;
-}
-
-/*************************************************************************
-Gets the page number of the root of the index tree. */
-UNIV_INLINE
-ulint
-dict_index_get_page(
-/*================*/
- /* out: page number */
- dict_index_t* index) /* in: index */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- return(index->page);
-}
-
-/*************************************************************************
-Sets the page number of the root of index tree. */
-UNIV_INLINE
-void
-dict_index_set_page(
-/*================*/
- dict_index_t* index, /* in: index */
- ulint page) /* in: page number */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- index->page = page;
-}
-
-/*************************************************************************
-Gets the type of the index tree. */
-UNIV_INLINE
-ulint
-dict_index_get_type(
-/*================*/
- /* out: type */
- dict_index_t* index) /* in: index */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- return(index->type);
-}
-
-/*************************************************************************
-Gets the read-write lock of the index tree. */
-UNIV_INLINE
-rw_lock_t*
-dict_index_get_lock(
-/*================*/
- /* out: read-write lock */
- dict_index_t* index) /* in: index */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- return(&(index->lock));
-}
-
-/************************************************************************
-Returns free space reserved for future updates of records. This is
-relevant only in the case of many consecutive inserts, as updates
-which make the records bigger might fragment the index. */
-UNIV_INLINE
-ulint
-dict_index_get_space_reserve(void)
-/*==============================*/
- /* out: number of free bytes on page,
- reserved for updates */
-{
- return(UNIV_PAGE_SIZE / 16);
-}
-
-/**************************************************************************
-Checks if a table is in the dictionary cache. */
-UNIV_INLINE
-dict_table_t*
-dict_table_check_if_in_cache_low(
-/*=============================*/
- /* out: table, NULL if not found */
- const char* table_name) /* in: table name */
-{
- dict_table_t* table;
- ulint table_fold;
-
- ut_ad(table_name);
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- /* Look for the table name in the hash table */
- table_fold = ut_fold_string(table_name);
-
- HASH_SEARCH(name_hash, dict_sys->table_hash, table_fold, table,
- ut_strcmp(table->name, table_name) == 0);
- return(table);
-}
-
-/**************************************************************************
-Gets a table; loads it to the dictionary cache if necessary. A low-level
-function. */
-UNIV_INLINE
-dict_table_t*
-dict_table_get_low(
-/*===============*/
- /* out: table, NULL if not found */
- const char* table_name) /* in: table name */
-{
- dict_table_t* table;
-
- ut_ad(table_name);
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- table = dict_table_check_if_in_cache_low(table_name);
-
- if (table == NULL) {
- table = dict_load_table(table_name);
- }
-
- return(table);
-}
-
-/**************************************************************************
-Returns a table object based on table id. */
-UNIV_INLINE
-dict_table_t*
-dict_table_get_on_id_low(
-/*=====================*/
- /* out: table, NULL if does not exist */
- dulint table_id) /* in: table id */
-{
- dict_table_t* table;
- ulint fold;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- /* Look for the table name in the hash table */
- fold = ut_fold_dulint(table_id);
-
- HASH_SEARCH(id_hash, dict_sys->table_id_hash, fold, table,
- ut_dulint_cmp(table->id, table_id) == 0);
- if (table == NULL) {
- table = dict_load_table_on_id(table_id);
- }
-
- /* TODO: should get the type information from MySQL */
-
- return(table);
-}
-
-/**************************************************************************
-Returns an index object. */
-UNIV_INLINE
-dict_index_t*
-dict_table_get_index(
-/*=================*/
- /* out: index, NULL if does not exist */
- dict_table_t* table, /* in: table */
- const char* name) /* in: index name */
-{
- dict_index_t* index = NULL;
-
- index = dict_table_get_first_index(table);
-
- while (index != NULL) {
- if (ut_strcmp(name, index->name) == 0) {
-
- break;
- }
-
- index = dict_table_get_next_index(index);
- }
-
- return(index);
-}
diff --git a/storage/innobase/include/dict0load.h b/storage/innobase/include/dict0load.h
deleted file mode 100644
index 7e19c2eb3c0..00000000000
--- a/storage/innobase/include/dict0load.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/******************************************************
-Loads to the memory cache database object definitions
-from dictionary tables
-
-(c) 1996 Innobase Oy
-
-Created 4/24/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef dict0load_h
-#define dict0load_h
-
-#include "univ.i"
-#include "dict0types.h"
-#include "ut0byte.h"
-
-/************************************************************************
-In a crash recovery we already have all the tablespace objects created.
-This function compares the space id information in the InnoDB data dictionary
-to what we already read with fil_load_single_table_tablespaces().
-
-In a normal startup, we create the tablespace objects for every table in
-InnoDB's data dictionary, if the corresponding .ibd file exists.
-We also scan the biggest space id, and store it to fil_system. */
-
-void
-dict_check_tablespaces_and_store_max_id(
-/*====================================*/
- ibool in_crash_recovery); /* in: are we doing a crash recovery */
-/************************************************************************
-Finds the first table name in the given database. */
-
-char*
-dict_get_first_table_name_in_db(
-/*============================*/
- /* out, own: table name, NULL if
- does not exist; the caller must free
- the memory in the string! */
- const char* name); /* in: database name which ends to '/' */
-/************************************************************************
-Loads a table definition and also all its index definitions, and also
-the cluster definition if the table is a member in a cluster. Also loads
-all foreign key constraints where the foreign key is in the table or where
-a foreign key references columns in this table. */
-
-dict_table_t*
-dict_load_table(
-/*============*/
- /* out: table, NULL if does not exist;
- if the table is stored in an .ibd file,
- but the file does not exist,
- then we set the ibd_file_missing flag TRUE
- in the table object we return */
- const char* name); /* in: table name in the
- databasename/tablename format */
-/***************************************************************************
-Loads a table object based on the table id. */
-
-dict_table_t*
-dict_load_table_on_id(
-/*==================*/
- /* out: table; NULL if table does not exist */
- dulint table_id); /* in: table id */
-/************************************************************************
-This function is called when the database is booted.
-Loads system table index definitions except for the clustered index which
-is added to the dictionary cache at booting before calling this function. */
-
-void
-dict_load_sys_table(
-/*================*/
- dict_table_t* table); /* in: system table */
-/***************************************************************************
-Loads foreign key constraints where the table is either the foreign key
-holder or where the table is referenced by a foreign key. Adds these
-constraints to the data dictionary. Note that we know that the dictionary
-cache already contains all constraints where the other relevant table is
-already in the dictionary cache. */
-
-ulint
-dict_load_foreigns(
-/*===============*/
- /* out: DB_SUCCESS or error code */
- const char* table_name, /* in: table name */
- ibool check_charsets);/* in: TRUE=check charsets
- compatibility */
-/************************************************************************
-Prints to the standard output information on all tables found in the data
-dictionary system table. */
-
-void
-dict_print(void);
-/*============*/
-
-
-#ifndef UNIV_NONINL
-#include "dict0load.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/dict0load.ic b/storage/innobase/include/dict0load.ic
deleted file mode 100644
index 1a207fbf0fd..00000000000
--- a/storage/innobase/include/dict0load.ic
+++ /dev/null
@@ -1,9 +0,0 @@
-/******************************************************
-Loads to the memory cache database object definitions
-from dictionary tables
-
-(c) 1996 Innobase Oy
-
-Created 4/24/1996 Heikki Tuuri
-*******************************************************/
-
diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h
deleted file mode 100644
index ac28fdb1bae..00000000000
--- a/storage/innobase/include/dict0mem.h
+++ /dev/null
@@ -1,431 +0,0 @@
-/******************************************************
-Data dictionary memory object creation
-
-(c) 1996 Innobase Oy
-
-Created 1/8/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef dict0mem_h
-#define dict0mem_h
-
-#include "univ.i"
-#include "dict0types.h"
-#include "data0type.h"
-#include "data0data.h"
-#include "mem0mem.h"
-#include "rem0types.h"
-#include "btr0types.h"
-#include "ut0mem.h"
-#include "ut0lst.h"
-#include "ut0rnd.h"
-#include "ut0byte.h"
-#include "sync0rw.h"
-#include "lock0types.h"
-#include "hash0hash.h"
-#include "que0types.h"
-
-/* Type flags of an index: OR'ing of the flags is allowed to define a
-combination of types */
-#define DICT_CLUSTERED 1 /* clustered index */
-#define DICT_UNIQUE 2 /* unique index */
-#define DICT_UNIVERSAL 4 /* index which can contain records from any
- other index */
-#define DICT_IBUF 8 /* insert buffer tree */
-
-/* Types for a table object */
-#define DICT_TABLE_ORDINARY 1
-#if 0 /* not implemented */
-#define DICT_TABLE_CLUSTER_MEMBER 2
-#define DICT_TABLE_CLUSTER 3 /* this means that the table is
- really a cluster definition */
-#endif
-
-/* Table flags */
-#define DICT_TF_COMPACT 1 /* compact page format */
-
-/**************************************************************************
-Creates a table memory object. */
-
-dict_table_t*
-dict_mem_table_create(
-/*==================*/
- /* out, own: table object */
- const char* name, /* in: table name */
- ulint space, /* in: space where the clustered index
- of the table is placed; this parameter
- is ignored if the table is made
- a member of a cluster */
- ulint n_cols, /* in: number of columns */
- ulint flags); /* in: table flags */
-/********************************************************************
-Free a table memory object. */
-
-void
-dict_mem_table_free(
-/*================*/
- dict_table_t* table); /* in: table */
-/**************************************************************************
-Adds a column definition to a table. */
-
-void
-dict_mem_table_add_col(
-/*===================*/
- dict_table_t* table, /* in: table */
- mem_heap_t* heap, /* in: temporary memory heap, or NULL */
- const char* name, /* in: column name, or NULL */
- ulint mtype, /* in: main datatype */
- ulint prtype, /* in: precise type */
- ulint len); /* in: precision */
-/**************************************************************************
-Creates an index memory object. */
-
-dict_index_t*
-dict_mem_index_create(
-/*==================*/
- /* out, own: index object */
- const char* table_name, /* in: table name */
- const char* index_name, /* in: index name */
- ulint space, /* in: space where the index tree is
- placed, ignored if the index is of
- the clustered type */
- ulint type, /* in: DICT_UNIQUE,
- DICT_CLUSTERED, ... ORed */
- ulint n_fields); /* in: number of fields */
-/**************************************************************************
-Adds a field definition to an index. NOTE: does not take a copy
-of the column name if the field is a column. The memory occupied
-by the column name may be released only after publishing the index. */
-
-void
-dict_mem_index_add_field(
-/*=====================*/
- dict_index_t* index, /* in: index */
- const char* name, /* in: column name */
- ulint prefix_len); /* in: 0 or the column prefix length
- in a MySQL index like
- INDEX (textcol(25)) */
-/**************************************************************************
-Frees an index memory object. */
-
-void
-dict_mem_index_free(
-/*================*/
- dict_index_t* index); /* in: index */
-/**************************************************************************
-Creates and initializes a foreign constraint memory object. */
-
-dict_foreign_t*
-dict_mem_foreign_create(void);
-/*=========================*/
- /* out, own: foreign constraint struct */
-
-/* Data structure for a column in a table */
-struct dict_col_struct{
- /*----------------------*/
- /* The following are copied from dtype_t,
- so that all bit-fields can be packed tightly. */
- unsigned mtype:8; /* main data type */
- unsigned prtype:24; /* precise type; MySQL data
- type, charset code, flags to
- indicate nullability,
- signedness, whether this is a
- binary string, whether this is
- a true VARCHAR where MySQL
- uses 2 bytes to store the length */
-
- /* the remaining fields do not affect alphabetical ordering: */
-
- unsigned len:16; /* length; for MySQL data this
- is field->pack_length(),
- except that for a >= 5.0.3
- type true VARCHAR this is the
- maximum byte length of the
- string data (in addition to
- the string, MySQL uses 1 or 2
- bytes to store the string length) */
-
- unsigned mbminlen:2; /* minimum length of a
- character, in bytes */
- unsigned mbmaxlen:3; /* maximum length of a
- character, in bytes */
- /*----------------------*/
- /* End of definitions copied from dtype_t */
-
- unsigned ind:10; /* table column position
- (starting from 0) */
- unsigned ord_part:1; /* nonzero if this column
- appears in the ordering fields
- of an index */
-};
-
-/* DICT_MAX_INDEX_COL_LEN is measured in bytes and is the maximum
-indexed column length (or indexed prefix length). It is set to 3*256,
-so that one can create a column prefix index on 256 characters of a
-TEXT or VARCHAR column also in the UTF-8 charset. In that charset,
-a character may take at most 3 bytes.
-This constant MUST NOT BE CHANGED, or the compatibility of InnoDB data
-files would be at risk! */
-
-#define DICT_MAX_INDEX_COL_LEN 768
-
-/* Data structure for a field in an index */
-struct dict_field_struct{
- dict_col_t* col; /* pointer to the table column */
- const char* name; /* name of the column */
- unsigned prefix_len:10; /* 0 or the length of the column
- prefix in bytes in a MySQL index of
- type, e.g., INDEX (textcol(25));
- must be smaller than
- DICT_MAX_INDEX_COL_LEN; NOTE that
- in the UTF-8 charset, MySQL sets this
- to 3 * the prefix len in UTF-8 chars */
- unsigned fixed_len:10; /* 0 or the fixed length of the
- column if smaller than
- DICT_MAX_INDEX_COL_LEN */
-};
-
-/* Data structure for an index */
-struct dict_index_struct{
- dulint id; /* id of the index */
- mem_heap_t* heap; /* memory heap */
- ulint type; /* index type */
- const char* name; /* index name */
- const char* table_name; /* table name */
- dict_table_t* table; /* back pointer to table */
- unsigned space:32;
- /* space where the index tree is placed */
- unsigned page:32;/* index tree root page number */
- unsigned trx_id_offset:10;/* position of the the trx id column
- in a clustered index record, if the fields
- before it are known to be of a fixed size,
- 0 otherwise */
- unsigned n_user_defined_cols:10;
- /* number of columns the user defined to
- be in the index: in the internal
- representation we add more columns */
- unsigned n_uniq:10;/* number of fields from the beginning
- which are enough to determine an index
- entry uniquely */
- unsigned n_def:10;/* number of fields defined so far */
- unsigned n_fields:10;/* number of fields in the index */
- unsigned n_nullable:10;/* number of nullable fields */
- unsigned cached:1;/* TRUE if the index object is in the
- dictionary cache */
- dict_field_t* fields; /* array of field descriptions */
- UT_LIST_NODE_T(dict_index_t)
- indexes;/* list of indexes of the table */
- btr_search_t* search_info; /* info used in optimistic searches */
- /*----------------------*/
- ib_longlong* stat_n_diff_key_vals;
- /* approximate number of different key values
- for this index, for each n-column prefix
- where n <= dict_get_n_unique(index); we
- periodically calculate new estimates */
- ulint stat_index_size;
- /* approximate index size in database pages */
- ulint stat_n_leaf_pages;
- /* approximate number of leaf pages in the
- index tree */
- rw_lock_t lock; /* read-write lock protecting the upper levels
- of the index tree */
-#ifdef UNIV_DEBUG
- ulint magic_n;/* magic number */
-# define DICT_INDEX_MAGIC_N 76789786
-#endif
-};
-
-/* Data structure for a foreign key constraint; an example:
-FOREIGN KEY (A, B) REFERENCES TABLE2 (C, D) */
-
-struct dict_foreign_struct{
- mem_heap_t* heap; /* this object is allocated from
- this memory heap */
- char* id; /* id of the constraint as a
- null-terminated string */
- unsigned n_fields:10; /* number of indexes' first fields
- for which the the foreign key
- constraint is defined: we allow the
- indexes to contain more fields than
- mentioned in the constraint, as long
- as the first fields are as mentioned */
- unsigned type:6; /* 0 or DICT_FOREIGN_ON_DELETE_CASCADE
- or DICT_FOREIGN_ON_DELETE_SET_NULL */
- char* foreign_table_name;/* foreign table name */
- dict_table_t* foreign_table; /* table where the foreign key is */
- const char** foreign_col_names;/* names of the columns in the
- foreign key */
- char* referenced_table_name;/* referenced table name */
- dict_table_t* referenced_table;/* table where the referenced key
- is */
- const char** referenced_col_names;/* names of the referenced
- columns in the referenced table */
- dict_index_t* foreign_index; /* foreign index; we require that
- both tables contain explicitly defined
- indexes for the constraint: InnoDB
- does not generate new indexes
- implicitly */
- dict_index_t* referenced_index;/* referenced index */
- UT_LIST_NODE_T(dict_foreign_t)
- foreign_list; /* list node for foreign keys of the
- table */
- UT_LIST_NODE_T(dict_foreign_t)
- referenced_list;/* list node for referenced keys of the
- table */
-};
-
-/* The flags for ON_UPDATE and ON_DELETE can be ORed; the default is that
-a foreign key constraint is enforced, therefore RESTRICT just means no flag */
-#define DICT_FOREIGN_ON_DELETE_CASCADE 1
-#define DICT_FOREIGN_ON_DELETE_SET_NULL 2
-#define DICT_FOREIGN_ON_UPDATE_CASCADE 4
-#define DICT_FOREIGN_ON_UPDATE_SET_NULL 8
-#define DICT_FOREIGN_ON_DELETE_NO_ACTION 16
-#define DICT_FOREIGN_ON_UPDATE_NO_ACTION 32
-
-
-/* Data structure for a database table */
-struct dict_table_struct{
- dulint id; /* id of the table */
- mem_heap_t* heap; /* memory heap */
- const char* name; /* table name */
- const char* dir_path_of_temp_table;/* NULL or the directory path
- where a TEMPORARY table that was explicitly
- created by a user should be placed if
- innodb_file_per_table is defined in my.cnf;
- in Unix this is usually /tmp/..., in Windows
- \temp\... */
- unsigned space:32;
- /* space where the clustered index of the
- table is placed */
- unsigned ibd_file_missing:1;
- /* TRUE if this is in a single-table
- tablespace and the .ibd file is missing; then
- we must return in ha_innodb.cc an error if the
- user tries to query such an orphaned table */
- unsigned tablespace_discarded:1;
- /* this flag is set TRUE when the user
- calls DISCARD TABLESPACE on this
- table, and reset to FALSE in IMPORT
- TABLESPACE */
- unsigned cached:1;/* TRUE if the table object has been added
- to the dictionary cache */
- unsigned flags:8;/* DICT_TF_COMPACT, ... */
- unsigned n_def:10;/* number of columns defined so far */
- unsigned n_cols:10;/* number of columns */
- dict_col_t* cols; /* array of column descriptions */
- const char* col_names;
- /* Column names packed in a character string
- "name1\0name2\0...nameN\0". Until
- the string contains n_cols, it will be
- allocated from a temporary heap. The final
- string will be allocated from table->heap. */
- hash_node_t name_hash; /* hash chain node */
- hash_node_t id_hash; /* hash chain node */
- UT_LIST_BASE_NODE_T(dict_index_t)
- indexes; /* list of indexes of the table */
- UT_LIST_BASE_NODE_T(dict_foreign_t)
- foreign_list;/* list of foreign key constraints
- in the table; these refer to columns
- in other tables */
- UT_LIST_BASE_NODE_T(dict_foreign_t)
- referenced_list;/* list of foreign key constraints
- which refer to this table */
- UT_LIST_NODE_T(dict_table_t)
- table_LRU; /* node of the LRU list of tables */
- ulint n_mysql_handles_opened;
- /* count of how many handles MySQL has opened
- to this table; dropping of the table is
- NOT allowed until this count gets to zero;
- MySQL does NOT itself check the number of
- open handles at drop */
- ulint n_foreign_key_checks_running;
- /* count of how many foreign key check
- operations are currently being performed
- on the table: we cannot drop the table while
- there are foreign key checks running on
- it! */
- lock_t* auto_inc_lock;/* a buffer for an auto-inc lock
- for this table: we allocate the memory here
- so that individual transactions can get it
- and release it without a need to allocate
- space from the lock heap of the trx:
- otherwise the lock heap would grow rapidly
- if we do a large insert from a select */
- dulint query_cache_inv_trx_id;
- /* transactions whose trx id < than this
- number are not allowed to store to the MySQL
- query cache or retrieve from it; when a trx
- with undo logs commits, it sets this to the
- value of the trx id counter for the tables it
- had an IX lock on */
- UT_LIST_BASE_NODE_T(lock_t)
- locks; /* list of locks on the table */
-#ifdef UNIV_DEBUG
- /*----------------------*/
- ibool does_not_fit_in_memory;
- /* this field is used to specify in simulations
- tables which are so big that disk should be
- accessed: disk access is simulated by
- putting the thread to sleep for a while;
- NOTE that this flag is not stored to the data
- dictionary on disk, and the database will
- forget about value TRUE if it has to reload
- the table definition from disk */
-#endif /* UNIV_DEBUG */
- /*----------------------*/
- unsigned big_rows:1;
- /* flag: TRUE if the maximum length of
- a single row exceeds BIG_ROW_SIZE;
- initialized in dict_table_add_to_cache() */
- unsigned stat_initialized:1; /* TRUE if statistics have
- been calculated the first time
- after database startup or table creation */
- ib_longlong stat_n_rows;
- /* approximate number of rows in the table;
- we periodically calculate new estimates */
- ulint stat_clustered_index_size;
- /* approximate clustered index size in
- database pages */
- ulint stat_sum_of_other_index_sizes;
- /* other indexes in database pages */
- ulint stat_modified_counter;
- /* when a row is inserted, updated, or deleted,
- we add 1 to this number; we calculate new
- estimates for the stat_... values for the
- table and the indexes at an interval of 2 GB
- or when about 1 / 16 of table has been
- modified; also when the estimate operation is
- called for MySQL SHOW TABLE STATUS; the
- counter is reset to zero at statistics
- calculation; this counter is not protected by
- any latch, because this is only used for
- heuristics */
- /*----------------------*/
- mutex_t autoinc_mutex;
- /* mutex protecting the autoincrement
- counter */
- ib_ulonglong autoinc;/* autoinc counter value to give to the
- next inserted row */
- ulong n_waiting_or_granted_auto_inc_locks;
- /* This counter is used to track the number
- of granted and pending autoinc locks on this
- table. This value is set after acquiring the
- kernel mutex but we peek the contents to
- determine whether other transactions have
- acquired the AUTOINC lock or not. Of course
- only one transaction can be granted the
- lock but there can be multiple waiters. */
- /*----------------------*/
-
-#ifdef UNIV_DEBUG
- ulint magic_n;/* magic number */
-# define DICT_TABLE_MAGIC_N 76333786
-#endif /* UNIV_DEBUG */
-};
-
-#ifndef UNIV_NONINL
-#include "dict0mem.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/dict0mem.ic b/storage/innobase/include/dict0mem.ic
deleted file mode 100644
index 9bcefc2a51f..00000000000
--- a/storage/innobase/include/dict0mem.ic
+++ /dev/null
@@ -1,9 +0,0 @@
-/**********************************************************************
-Data dictionary memory object creation
-
-(c) 1996 Innobase Oy
-
-Created 1/8/1996 Heikki Tuuri
-***********************************************************************/
-
-
diff --git a/storage/innobase/include/dict0types.h b/storage/innobase/include/dict0types.h
deleted file mode 100644
index b90545f2105..00000000000
--- a/storage/innobase/include/dict0types.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/******************************************************
-Data dictionary global types
-
-(c) 1996 Innobase Oy
-
-Created 1/8/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef dict0types_h
-#define dict0types_h
-
-typedef struct dict_sys_struct dict_sys_t;
-typedef struct dict_col_struct dict_col_t;
-typedef struct dict_field_struct dict_field_t;
-typedef struct dict_index_struct dict_index_t;
-typedef struct dict_table_struct dict_table_t;
-typedef struct dict_foreign_struct dict_foreign_t;
-
-/* A cluster object is a table object with the type field set to
-DICT_CLUSTERED */
-
-typedef dict_table_t dict_cluster_t;
-
-typedef struct ind_node_struct ind_node_t;
-typedef struct tab_node_struct tab_node_t;
-
-#endif
diff --git a/storage/innobase/include/dyn0dyn.h b/storage/innobase/include/dyn0dyn.h
deleted file mode 100644
index 7affccbf67e..00000000000
--- a/storage/innobase/include/dyn0dyn.h
+++ /dev/null
@@ -1,166 +0,0 @@
-/******************************************************
-The dynamically allocated array
-
-(c) 1996 Innobase Oy
-
-Created 2/5/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef dyn0dyn_h
-#define dyn0dyn_h
-
-#include "univ.i"
-#include "ut0lst.h"
-#include "mem0mem.h"
-
-typedef struct dyn_block_struct dyn_block_t;
-typedef dyn_block_t dyn_array_t;
-
-
-/* This is the initial 'payload' size of a dynamic array;
-this must be > MLOG_BUF_MARGIN + 30! */
-#define DYN_ARRAY_DATA_SIZE 512
-
-/*************************************************************************
-Initializes a dynamic array. */
-UNIV_INLINE
-dyn_array_t*
-dyn_array_create(
-/*=============*/
- /* out: initialized dyn array */
- dyn_array_t* arr); /* in: pointer to a memory buffer of
- size sizeof(dyn_array_t) */
-/****************************************************************
-Frees a dynamic array. */
-UNIV_INLINE
-void
-dyn_array_free(
-/*===========*/
- dyn_array_t* arr); /* in: dyn array */
-/*************************************************************************
-Makes room on top of a dyn array and returns a pointer to a buffer in it.
-After copying the elements, the caller must close the buffer using
-dyn_array_close. */
-UNIV_INLINE
-byte*
-dyn_array_open(
-/*===========*/
- /* out: pointer to the buffer */
- dyn_array_t* arr, /* in: dynamic array */
- ulint size); /* in: size in bytes of the buffer; MUST be
- smaller than DYN_ARRAY_DATA_SIZE! */
-/*************************************************************************
-Closes the buffer returned by dyn_array_open. */
-UNIV_INLINE
-void
-dyn_array_close(
-/*============*/
- dyn_array_t* arr, /* in: dynamic array */
- byte* ptr); /* in: buffer space from ptr up was not used */
-/*************************************************************************
-Makes room on top of a dyn array and returns a pointer to
-the added element. The caller must copy the element to
-the pointer returned. */
-UNIV_INLINE
-void*
-dyn_array_push(
-/*===========*/
- /* out: pointer to the element */
- dyn_array_t* arr, /* in: dynamic array */
- ulint size); /* in: size in bytes of the element */
-/****************************************************************
-Returns pointer to an element in dyn array. */
-UNIV_INLINE
-void*
-dyn_array_get_element(
-/*==================*/
- /* out: pointer to element */
- dyn_array_t* arr, /* in: dyn array */
- ulint pos); /* in: position of element as bytes
- from array start */
-/****************************************************************
-Returns the size of stored data in a dyn array. */
-UNIV_INLINE
-ulint
-dyn_array_get_data_size(
-/*====================*/
- /* out: data size in bytes */
- dyn_array_t* arr); /* in: dyn array */
-/****************************************************************
-Gets the first block in a dyn array. */
-UNIV_INLINE
-dyn_block_t*
-dyn_array_get_first_block(
-/*======================*/
- dyn_array_t* arr); /* in: dyn array */
-/****************************************************************
-Gets the last block in a dyn array. */
-UNIV_INLINE
-dyn_block_t*
-dyn_array_get_last_block(
-/*=====================*/
- dyn_array_t* arr); /* in: dyn array */
-/************************************************************************
-Gets the next block in a dyn array. */
-UNIV_INLINE
-dyn_block_t*
-dyn_array_get_next_block(
-/*=====================*/
- /* out: pointer to next, NULL if end of list */
- dyn_array_t* arr, /* in: dyn array */
- dyn_block_t* block); /* in: dyn array block */
-/************************************************************************
-Gets the number of used bytes in a dyn array block. */
-UNIV_INLINE
-ulint
-dyn_block_get_used(
-/*===============*/
- /* out: number of bytes used */
- dyn_block_t* block); /* in: dyn array block */
-/************************************************************************
-Gets pointer to the start of data in a dyn array block. */
-UNIV_INLINE
-byte*
-dyn_block_get_data(
-/*===============*/
- /* out: pointer to data */
- dyn_block_t* block); /* in: dyn array block */
-/************************************************************
-Pushes n bytes to a dyn array. */
-UNIV_INLINE
-void
-dyn_push_string(
-/*============*/
- dyn_array_t* arr, /* in: dyn array */
- const byte* str, /* in: string to write */
- ulint len); /* in: string length */
-
-/*#################################################################*/
-
-/* NOTE! Do not use the fields of the struct directly: the definition
-appears here only for the compiler to know its size! */
-struct dyn_block_struct{
- mem_heap_t* heap; /* in the first block this is != NULL
- if dynamic allocation has been needed */
- ulint used; /* number of data bytes used in this block */
- byte data[DYN_ARRAY_DATA_SIZE];
- /* storage for array elements */
- UT_LIST_BASE_NODE_T(dyn_block_t) base;
- /* linear list of dyn blocks: this node is
- used only in the first block */
- UT_LIST_NODE_T(dyn_block_t) list;
- /* linear list node: used in all blocks */
-#ifdef UNIV_DEBUG
- ulint buf_end;/* only in the debug version: if dyn array is
- opened, this is the buffer end offset, else
- this is 0 */
- ulint magic_n;
-#endif
-};
-
-
-#ifndef UNIV_NONINL
-#include "dyn0dyn.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/dyn0dyn.ic b/storage/innobase/include/dyn0dyn.ic
deleted file mode 100644
index fcb3c17287a..00000000000
--- a/storage/innobase/include/dyn0dyn.ic
+++ /dev/null
@@ -1,346 +0,0 @@
-/******************************************************
-The dynamically allocated array
-
-(c) 1996 Innobase Oy
-
-Created 2/5/1996 Heikki Tuuri
-*******************************************************/
-
-#define DYN_BLOCK_MAGIC_N 375767
-#define DYN_BLOCK_FULL_FLAG 0x1000000UL
-
-/****************************************************************
-Adds a new block to a dyn array. */
-
-dyn_block_t*
-dyn_array_add_block(
-/*================*/
- /* out: created block */
- dyn_array_t* arr); /* in: dyn array */
-
-
-/****************************************************************
-Gets the first block in a dyn array. */
-UNIV_INLINE
-dyn_block_t*
-dyn_array_get_first_block(
-/*======================*/
- dyn_array_t* arr) /* in: dyn array */
-{
- return(arr);
-}
-
-/****************************************************************
-Gets the last block in a dyn array. */
-UNIV_INLINE
-dyn_block_t*
-dyn_array_get_last_block(
-/*=====================*/
- dyn_array_t* arr) /* in: dyn array */
-{
- if (arr->heap == NULL) {
-
- return(arr);
- }
-
- return(UT_LIST_GET_LAST(arr->base));
-}
-
-/************************************************************************
-Gets the next block in a dyn array. */
-UNIV_INLINE
-dyn_block_t*
-dyn_array_get_next_block(
-/*=====================*/
- /* out: pointer to next, NULL if end of list */
- dyn_array_t* arr, /* in: dyn array */
- dyn_block_t* block) /* in: dyn array block */
-{
- ut_ad(arr && block);
-
- if (arr->heap == NULL) {
- ut_ad(arr == block);
-
- return(NULL);
- }
-
- return(UT_LIST_GET_NEXT(list, block));
-}
-
-/************************************************************************
-Gets the number of used bytes in a dyn array block. */
-UNIV_INLINE
-ulint
-dyn_block_get_used(
-/*===============*/
- /* out: number of bytes used */
- dyn_block_t* block) /* in: dyn array block */
-{
- ut_ad(block);
-
- return((block->used) & ~DYN_BLOCK_FULL_FLAG);
-}
-
-/************************************************************************
-Gets pointer to the start of data in a dyn array block. */
-UNIV_INLINE
-byte*
-dyn_block_get_data(
-/*===============*/
- /* out: pointer to data */
- dyn_block_t* block) /* in: dyn array block */
-{
- ut_ad(block);
-
- return(block->data);
-}
-
-/*************************************************************************
-Initializes a dynamic array. */
-UNIV_INLINE
-dyn_array_t*
-dyn_array_create(
-/*=============*/
- /* out: initialized dyn array */
- dyn_array_t* arr) /* in: pointer to a memory buffer of
- size sizeof(dyn_array_t) */
-{
- ut_ad(arr);
-#if DYN_ARRAY_DATA_SIZE >= DYN_BLOCK_FULL_FLAG
-# error "DYN_ARRAY_DATA_SIZE >= DYN_BLOCK_FULL_FLAG"
-#endif
-
- arr->heap = NULL;
- arr->used = 0;
-
-#ifdef UNIV_DEBUG
- arr->buf_end = 0;
- arr->magic_n = DYN_BLOCK_MAGIC_N;
-#endif
- return(arr);
-}
-
-/****************************************************************
-Frees a dynamic array. */
-UNIV_INLINE
-void
-dyn_array_free(
-/*===========*/
- dyn_array_t* arr) /* in: dyn array */
-{
- if (arr->heap != NULL) {
- mem_heap_free(arr->heap);
- }
-
-#ifdef UNIV_DEBUG
- arr->magic_n = 0;
-#endif
-}
-
-/*************************************************************************
-Makes room on top of a dyn array and returns a pointer to the added element.
-The caller must copy the element to the pointer returned. */
-UNIV_INLINE
-void*
-dyn_array_push(
-/*===========*/
- /* out: pointer to the element */
- dyn_array_t* arr, /* in: dynamic array */
- ulint size) /* in: size in bytes of the element */
-{
- dyn_block_t* block;
- ulint used;
-
- ut_ad(arr);
- ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
- ut_ad(size <= DYN_ARRAY_DATA_SIZE);
- ut_ad(size);
-
- block = arr;
- used = block->used;
-
- if (used + size > DYN_ARRAY_DATA_SIZE) {
- /* Get the last array block */
-
- block = dyn_array_get_last_block(arr);
- used = block->used;
-
- if (used + size > DYN_ARRAY_DATA_SIZE) {
- block = dyn_array_add_block(arr);
- used = block->used;
- }
- }
-
- block->used = used + size;
- ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
-
- return((block->data) + used);
-}
-
-/*************************************************************************
-Makes room on top of a dyn array and returns a pointer to a buffer in it.
-After copying the elements, the caller must close the buffer using
-dyn_array_close. */
-UNIV_INLINE
-byte*
-dyn_array_open(
-/*===========*/
- /* out: pointer to the buffer */
- dyn_array_t* arr, /* in: dynamic array */
- ulint size) /* in: size in bytes of the buffer; MUST be
- smaller than DYN_ARRAY_DATA_SIZE! */
-{
- dyn_block_t* block;
- ulint used;
-
- ut_ad(arr);
- ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
- ut_ad(size <= DYN_ARRAY_DATA_SIZE);
- ut_ad(size);
-
- block = arr;
- used = block->used;
-
- if (used + size > DYN_ARRAY_DATA_SIZE) {
- /* Get the last array block */
-
- block = dyn_array_get_last_block(arr);
- used = block->used;
-
- if (used + size > DYN_ARRAY_DATA_SIZE) {
- block = dyn_array_add_block(arr);
- used = block->used;
- ut_a(size <= DYN_ARRAY_DATA_SIZE);
- }
- }
-
- ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
-#ifdef UNIV_DEBUG
- ut_ad(arr->buf_end == 0);
-
- arr->buf_end = used + size;
-#endif
- return((block->data) + used);
-}
-
-/*************************************************************************
-Closes the buffer returned by dyn_array_open. */
-UNIV_INLINE
-void
-dyn_array_close(
-/*============*/
- dyn_array_t* arr, /* in: dynamic array */
- byte* ptr) /* in: buffer space from ptr up was not used */
-{
- dyn_block_t* block;
-
- ut_ad(arr);
- ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
-
- block = dyn_array_get_last_block(arr);
-
- ut_ad(arr->buf_end + block->data >= ptr);
-
- block->used = ptr - block->data;
-
- ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
-
-#ifdef UNIV_DEBUG
- arr->buf_end = 0;
-#endif
-}
-
-/****************************************************************
-Returns pointer to an element in dyn array. */
-UNIV_INLINE
-void*
-dyn_array_get_element(
-/*==================*/
- /* out: pointer to element */
- dyn_array_t* arr, /* in: dyn array */
- ulint pos) /* in: position of element as bytes
- from array start */
-{
- dyn_block_t* block;
- ulint used;
-
- ut_ad(arr);
- ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
-
- /* Get the first array block */
- block = dyn_array_get_first_block(arr);
-
- if (arr->heap != NULL) {
- used = dyn_block_get_used(block);
-
- while (pos >= used) {
- pos -= used;
- block = UT_LIST_GET_NEXT(list, block);
- ut_ad(block);
-
- used = dyn_block_get_used(block);
- }
- }
-
- ut_ad(block);
- ut_ad(dyn_block_get_used(block) >= pos);
-
- return(block->data + pos);
-}
-
-/****************************************************************
-Returns the size of stored data in a dyn array. */
-UNIV_INLINE
-ulint
-dyn_array_get_data_size(
-/*====================*/
- /* out: data size in bytes */
- dyn_array_t* arr) /* in: dyn array */
-{
- dyn_block_t* block;
- ulint sum = 0;
-
- ut_ad(arr);
- ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
-
- if (arr->heap == NULL) {
-
- return(arr->used);
- }
-
- /* Get the first array block */
- block = dyn_array_get_first_block(arr);
-
- while (block != NULL) {
- sum += dyn_block_get_used(block);
- block = dyn_array_get_next_block(arr, block);
- }
-
- return(sum);
-}
-
-/************************************************************
-Pushes n bytes to a dyn array. */
-UNIV_INLINE
-void
-dyn_push_string(
-/*============*/
- dyn_array_t* arr, /* in: dyn array */
- const byte* str, /* in: string to write */
- ulint len) /* in: string length */
-{
- ulint n_copied;
-
- while (len > 0) {
- if (len > DYN_ARRAY_DATA_SIZE) {
- n_copied = DYN_ARRAY_DATA_SIZE;
- } else {
- n_copied = len;
- }
-
- memcpy(dyn_array_push(arr, n_copied), str, n_copied);
-
- str += n_copied;
- len -= n_copied;
- }
-}
diff --git a/storage/innobase/include/eval0eval.h b/storage/innobase/include/eval0eval.h
deleted file mode 100644
index f950512adfd..00000000000
--- a/storage/innobase/include/eval0eval.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/******************************************************
-SQL evaluator: evaluates simple data structures, like expressions, in
-a query graph
-
-(c) 1997 Innobase Oy
-
-Created 12/29/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef eval0eval_h
-#define eval0eval_h
-
-#include "univ.i"
-#include "que0types.h"
-#include "pars0sym.h"
-#include "pars0pars.h"
-
-/*********************************************************************
-Free the buffer from global dynamic memory for a value of a que_node,
-if it has been allocated in the above function. The freeing for pushed
-column values is done in sel_col_prefetch_buf_free. */
-
-void
-eval_node_free_val_buf(
-/*===================*/
- que_node_t* node); /* in: query graph node */
-/*********************************************************************
-Evaluates a symbol table symbol. */
-UNIV_INLINE
-void
-eval_sym(
-/*=====*/
- sym_node_t* sym_node); /* in: symbol table node */
-/*********************************************************************
-Evaluates an expression. */
-UNIV_INLINE
-void
-eval_exp(
-/*=====*/
- que_node_t* exp_node); /* in: expression */
-/*********************************************************************
-Sets an integer value as the value of an expression node. */
-UNIV_INLINE
-void
-eval_node_set_int_val(
-/*==================*/
- que_node_t* node, /* in: expression node */
- lint val); /* in: value to set */
-/*********************************************************************
-Gets an integer value from an expression node. */
-UNIV_INLINE
-lint
-eval_node_get_int_val(
-/*==================*/
- /* out: integer value */
- que_node_t* node); /* in: expression node */
-/*********************************************************************
-Copies a binary string value as the value of a query graph node. Allocates a
-new buffer if necessary. */
-UNIV_INLINE
-void
-eval_node_copy_and_alloc_val(
-/*=========================*/
- que_node_t* node, /* in: query graph node */
- byte* str, /* in: binary string */
- ulint len); /* in: string length or UNIV_SQL_NULL */
-/*********************************************************************
-Copies a query node value to another node. */
-UNIV_INLINE
-void
-eval_node_copy_val(
-/*===============*/
- que_node_t* node1, /* in: node to copy to */
- que_node_t* node2); /* in: node to copy from */
-/*********************************************************************
-Gets a iboolean value from a query node. */
-UNIV_INLINE
-ibool
-eval_node_get_ibool_val(
-/*====================*/
- /* out: iboolean value */
- que_node_t* node); /* in: query graph node */
-/*********************************************************************
-Evaluates a comparison node. */
-
-ibool
-eval_cmp(
-/*=====*/
- /* out: the result of the comparison */
- func_node_t* cmp_node); /* in: comparison node */
-
-
-#ifndef UNIV_NONINL
-#include "eval0eval.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/eval0eval.ic b/storage/innobase/include/eval0eval.ic
deleted file mode 100644
index caffa2e0bfd..00000000000
--- a/storage/innobase/include/eval0eval.ic
+++ /dev/null
@@ -1,234 +0,0 @@
-/******************************************************
-SQL evaluator: evaluates simple data structures, like expressions, in
-a query graph
-
-(c) 1997 Innobase Oy
-
-Created 12/29/1997 Heikki Tuuri
-*******************************************************/
-
-#include "que0que.h"
-#include "rem0cmp.h"
-#include "pars0grm.h"
-
-/*********************************************************************
-Evaluates a function node. */
-
-void
-eval_func(
-/*======*/
- func_node_t* func_node); /* in: function node */
-/*********************************************************************
-Allocate a buffer from global dynamic memory for a value of a que_node.
-NOTE that this memory must be explicitly freed when the query graph is
-freed. If the node already has allocated buffer, that buffer is freed
-here. NOTE that this is the only function where dynamic memory should be
-allocated for a query node val field. */
-
-byte*
-eval_node_alloc_val_buf(
-/*====================*/
- /* out: pointer to allocated buffer */
- que_node_t* node, /* in: query graph node; sets the val field
- data field to point to the new buffer, and
- len field equal to size */
- ulint size); /* in: buffer size */
-
-
-/*********************************************************************
-Allocates a new buffer if needed. */
-UNIV_INLINE
-byte*
-eval_node_ensure_val_buf(
-/*=====================*/
- /* out: pointer to buffer */
- que_node_t* node, /* in: query graph node; sets the val field
- data field to point to the new buffer, and
- len field equal to size */
- ulint size) /* in: buffer size */
-{
- dfield_t* dfield;
- byte* data;
-
- dfield = que_node_get_val(node);
- dfield_set_len(dfield, size);
-
- data = dfield_get_data(dfield);
-
- if (!data || que_node_get_val_buf_size(node) < size) {
-
- data = eval_node_alloc_val_buf(node, size);
- }
-
- return(data);
-}
-
-/*********************************************************************
-Evaluates a symbol table symbol. */
-UNIV_INLINE
-void
-eval_sym(
-/*=====*/
- sym_node_t* sym_node) /* in: symbol table node */
-{
-
- ut_ad(que_node_get_type(sym_node) == QUE_NODE_SYMBOL);
-
- if (sym_node->indirection) {
- /* The symbol table node is an alias for a variable or a
- column */
-
- dfield_copy_data(que_node_get_val(sym_node),
- que_node_get_val(sym_node->indirection));
- }
-}
-
-/*********************************************************************
-Evaluates an expression. */
-UNIV_INLINE
-void
-eval_exp(
-/*=====*/
- que_node_t* exp_node) /* in: expression */
-{
- if (que_node_get_type(exp_node) == QUE_NODE_SYMBOL) {
-
- eval_sym((sym_node_t*)exp_node);
-
- return;
- }
-
- eval_func(exp_node);
-}
-
-/*********************************************************************
-Sets an integer value as the value of an expression node. */
-UNIV_INLINE
-void
-eval_node_set_int_val(
-/*==================*/
- que_node_t* node, /* in: expression node */
- lint val) /* in: value to set */
-{
- dfield_t* dfield;
- byte* data;
-
- dfield = que_node_get_val(node);
-
- data = dfield_get_data(dfield);
-
- if (data == NULL) {
- data = eval_node_alloc_val_buf(node, 4);
- }
-
- ut_ad(dfield_get_len(dfield) == 4);
-
- mach_write_to_4(data, (ulint)val);
-}
-
-/*********************************************************************
-Gets an integer non-SQL null value from an expression node. */
-UNIV_INLINE
-lint
-eval_node_get_int_val(
-/*==================*/
- /* out: integer value */
- que_node_t* node) /* in: expression node */
-{
- dfield_t* dfield;
-
- dfield = que_node_get_val(node);
-
- ut_ad(dfield_get_len(dfield) == 4);
-
- return((int)mach_read_from_4(dfield_get_data(dfield)));
-}
-
-/*********************************************************************
-Gets a iboolean value from a query node. */
-UNIV_INLINE
-ibool
-eval_node_get_ibool_val(
-/*====================*/
- /* out: iboolean value */
- que_node_t* node) /* in: query graph node */
-{
- dfield_t* dfield;
- byte* data;
-
- dfield = que_node_get_val(node);
-
- data = dfield_get_data(dfield);
-
- ut_ad(data != NULL);
-
- return(mach_read_from_1(data));
-}
-
-/*********************************************************************
-Sets a iboolean value as the value of a function node. */
-UNIV_INLINE
-void
-eval_node_set_ibool_val(
-/*====================*/
- func_node_t* func_node, /* in: function node */
- ibool val) /* in: value to set */
-{
- dfield_t* dfield;
- byte* data;
-
- dfield = que_node_get_val(func_node);
-
- data = dfield_get_data(dfield);
-
- if (data == NULL) {
- /* Allocate 1 byte to hold the value */
-
- data = eval_node_alloc_val_buf(func_node, 1);
- }
-
- ut_ad(dfield_get_len(dfield) == 1);
-
- mach_write_to_1(data, val);
-}
-
-/*********************************************************************
-Copies a binary string value as the value of a query graph node. Allocates a
-new buffer if necessary. */
-UNIV_INLINE
-void
-eval_node_copy_and_alloc_val(
-/*=========================*/
- que_node_t* node, /* in: query graph node */
- byte* str, /* in: binary string */
- ulint len) /* in: string length or UNIV_SQL_NULL */
-{
- byte* data;
-
- if (len == UNIV_SQL_NULL) {
- dfield_set_len(que_node_get_val(node), len);
-
- return;
- }
-
- data = eval_node_ensure_val_buf(node, len);
-
- ut_memcpy(data, str, len);
-}
-
-/*********************************************************************
-Copies a query node value to another node. */
-UNIV_INLINE
-void
-eval_node_copy_val(
-/*===============*/
- que_node_t* node1, /* in: node to copy to */
- que_node_t* node2) /* in: node to copy from */
-{
- dfield_t* dfield2;
-
- dfield2 = que_node_get_val(node2);
-
- eval_node_copy_and_alloc_val(node1, dfield_get_data(dfield2),
- dfield_get_len(dfield2));
-}
diff --git a/storage/innobase/include/eval0proc.h b/storage/innobase/include/eval0proc.h
deleted file mode 100644
index 8416551d0ba..00000000000
--- a/storage/innobase/include/eval0proc.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/******************************************************
-Executes SQL stored procedures and their control structures
-
-(c) 1998 Innobase Oy
-
-Created 1/20/1998 Heikki Tuuri
-*******************************************************/
-
-#ifndef eval0proc_h
-#define eval0proc_h
-
-#include "univ.i"
-#include "que0types.h"
-#include "pars0sym.h"
-#include "pars0pars.h"
-
-/**************************************************************************
-Performs an execution step of a procedure node. */
-UNIV_INLINE
-que_thr_t*
-proc_step(
-/*======*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr); /* in: query thread */
-/**************************************************************************
-Performs an execution step of an if-statement node. */
-
-que_thr_t*
-if_step(
-/*====*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr); /* in: query thread */
-/**************************************************************************
-Performs an execution step of a while-statement node. */
-
-que_thr_t*
-while_step(
-/*=======*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr); /* in: query thread */
-/**************************************************************************
-Performs an execution step of a for-loop node. */
-
-que_thr_t*
-for_step(
-/*=====*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr); /* in: query thread */
-/**************************************************************************
-Performs an execution step of an assignment statement node. */
-
-que_thr_t*
-assign_step(
-/*========*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr); /* in: query thread */
-/**************************************************************************
-Performs an execution step of a procedure call node. */
-UNIV_INLINE
-que_thr_t*
-proc_eval_step(
-/*===========*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr); /* in: query thread */
-/**************************************************************************
-Performs an execution step of an exit statement node. */
-
-que_thr_t*
-exit_step(
-/*======*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr); /* in: query thread */
-/**************************************************************************
-Performs an execution step of a return-statement node. */
-
-que_thr_t*
-return_step(
-/*========*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr); /* in: query thread */
-
-
-#ifndef UNIV_NONINL
-#include "eval0proc.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/eval0proc.ic b/storage/innobase/include/eval0proc.ic
deleted file mode 100644
index cf738056576..00000000000
--- a/storage/innobase/include/eval0proc.ic
+++ /dev/null
@@ -1,71 +0,0 @@
-/******************************************************
-Executes SQL stored procedures and their control structures
-
-(c) 1998 Innobase Oy
-
-Created 1/20/1998 Heikki Tuuri
-*******************************************************/
-
-#include "pars0pars.h"
-#include "que0que.h"
-#include "eval0eval.h"
-
-/**************************************************************************
-Performs an execution step of a procedure node. */
-UNIV_INLINE
-que_thr_t*
-proc_step(
-/*======*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr) /* in: query thread */
-{
- proc_node_t* node;
-
- ut_ad(thr);
-
- node = thr->run_node;
- ut_ad(que_node_get_type(node) == QUE_NODE_PROC);
-
- if (thr->prev_node == que_node_get_parent(node)) {
- /* Start execution from the first statement in the statement
- list */
-
- thr->run_node = node->stat_list;
- } else {
- /* Move to the next statement */
- ut_ad(que_node_get_next(thr->prev_node) == NULL);
-
- thr->run_node = NULL;
- }
-
- if (thr->run_node == NULL) {
- thr->run_node = que_node_get_parent(node);
- }
-
- return(thr);
-}
-
-/**************************************************************************
-Performs an execution step of a procedure call node. */
-UNIV_INLINE
-que_thr_t*
-proc_eval_step(
-/*===========*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr) /* in: query thread */
-{
- func_node_t* node;
-
- ut_ad(thr);
-
- node = thr->run_node;
- ut_ad(que_node_get_type(node) == QUE_NODE_FUNC);
-
- /* Evaluate the procedure */
-
- eval_exp(node);
-
- thr->run_node = que_node_get_parent(node);
-
- return(thr);
-}
diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
deleted file mode 100644
index 6b8fd4b03d5..00000000000
--- a/storage/innobase/include/fil0fil.h
+++ /dev/null
@@ -1,716 +0,0 @@
-/******************************************************
-The low-level file system
-
-(c) 1995 Innobase Oy
-
-Created 10/25/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef fil0fil_h
-#define fil0fil_h
-
-#include "univ.i"
-#include "sync0rw.h"
-#include "dict0types.h"
-#include "ibuf0types.h"
-#include "ut0byte.h"
-#include "os0file.h"
-
-/* When mysqld is run, the default directory "." is the mysqld datadir, but in
-ibbackup we must set it explicitly; the patgh must NOT contain the trailing
-'/' or '\' */
-extern const char* fil_path_to_mysql_datadir;
-
-/* Initial size of a single-table tablespace in pages */
-#define FIL_IBD_FILE_INITIAL_SIZE 4
-
-/* 'null' (undefined) page offset in the context of file spaces */
-#define FIL_NULL ULINT32_UNDEFINED
-
-/* Space address data type; this is intended to be used when
-addresses accurate to a byte are stored in file pages. If the page part
-of the address is FIL_NULL, the address is considered undefined. */
-
-typedef byte fil_faddr_t; /* 'type' definition in C: an address
- stored in a file page is a string of bytes */
-#define FIL_ADDR_PAGE 0 /* first in address is the page offset */
-#define FIL_ADDR_BYTE 4 /* then comes 2-byte byte offset within page*/
-
-#define FIL_ADDR_SIZE 6 /* address size is 6 bytes */
-
-/* A struct for storing a space address FIL_ADDR, when it is used
-in C program data structures. */
-
-typedef struct fil_addr_struct fil_addr_t;
-struct fil_addr_struct{
- ulint page; /* page number within a space */
- ulint boffset; /* byte offset within the page */
-};
-
-/* Null file address */
-extern fil_addr_t fil_addr_null;
-
-/* The byte offsets on a file page for various variables */
-#define FIL_PAGE_SPACE_OR_CHKSUM 0 /* in < MySQL-4.0.14 space id the
- page belongs to (== 0) but in later
- versions the 'new' checksum of the
- page */
-#define FIL_PAGE_OFFSET 4 /* page offset inside space */
-#define FIL_PAGE_PREV 8 /* if there is a 'natural' predecessor
- of the page, its offset.
- Otherwise FIL_NULL.
- This field is not set on BLOB pages,
- which are stored as a singly-linked
- list. See also FIL_PAGE_NEXT. */
-#define FIL_PAGE_NEXT 12 /* if there is a 'natural' successor
- of the page, its offset.
- Otherwise FIL_NULL.
- B-tree index pages
- (FIL_PAGE_TYPE contains FIL_PAGE_INDEX)
- on the same PAGE_LEVEL are maintained
- as a doubly linked list via
- FIL_PAGE_PREV and FIL_PAGE_NEXT
- in the collation order of the
- smallest user record on each page. */
-#define FIL_PAGE_LSN 16 /* lsn of the end of the newest
- modification log record to the page */
-#define FIL_PAGE_TYPE 24 /* file page type: FIL_PAGE_INDEX,...,
- 2 bytes.
-
- The contents of this field can only
- be trusted in the following case:
- if the page is an uncompressed
- B-tree index page, then it is
- guaranteed that the value is
- FIL_PAGE_INDEX.
- The opposite does not hold.
-
- In tablespaces created by
- MySQL/InnoDB 5.1.7 or later, the
- contents of this field is valid
- for all uncompressed pages. */
-#define FIL_PAGE_FILE_FLUSH_LSN 26 /* this is only defined for the
- first page in a data file: the file
- has been flushed to disk at least up
- to this lsn */
-#define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID 34 /* starting from 4.1.x this
- contains the space id of the page */
-#define FIL_PAGE_DATA 38 /* start of the data on the page */
-
-/* File page trailer */
-#define FIL_PAGE_END_LSN_OLD_CHKSUM 8 /* the low 4 bytes of this are used
- to store the page checksum, the
- last 4 bytes should be identical
- to the last 4 bytes of FIL_PAGE_LSN */
-#define FIL_PAGE_DATA_END 8
-
-/* File page types (values of FIL_PAGE_TYPE) */
-#define FIL_PAGE_INDEX 17855 /* B-tree node */
-#define FIL_PAGE_UNDO_LOG 2 /* Undo log page */
-#define FIL_PAGE_INODE 3 /* Index node */
-#define FIL_PAGE_IBUF_FREE_LIST 4 /* Insert buffer free list */
-/* File page types introduced in MySQL/InnoDB 5.1.7 */
-#define FIL_PAGE_TYPE_ALLOCATED 0 /* Freshly allocated page */
-#define FIL_PAGE_IBUF_BITMAP 5 /* Insert buffer bitmap */
-#define FIL_PAGE_TYPE_SYS 6 /* System page */
-#define FIL_PAGE_TYPE_TRX_SYS 7 /* Transaction system data */
-#define FIL_PAGE_TYPE_FSP_HDR 8 /* File space header */
-#define FIL_PAGE_TYPE_XDES 9 /* Extent descriptor page */
-#define FIL_PAGE_TYPE_BLOB 10 /* Uncompressed BLOB page */
-
-/* Space types */
-#define FIL_TABLESPACE 501
-#define FIL_LOG 502
-
-extern ulint fil_n_log_flushes;
-
-extern ulint fil_n_pending_log_flushes;
-extern ulint fil_n_pending_tablespace_flushes;
-
-
-/***********************************************************************
-Returns the version number of a tablespace, -1 if not found. */
-
-ib_longlong
-fil_space_get_version(
-/*==================*/
- /* out: version number, -1 if the tablespace does not
- exist in the memory cache */
- ulint id); /* in: space id */
-/***********************************************************************
-Returns the latch of a file space. */
-
-rw_lock_t*
-fil_space_get_latch(
-/*================*/
- /* out: latch protecting storage allocation */
- ulint id); /* in: space id */
-/***********************************************************************
-Returns the type of a file space. */
-
-ulint
-fil_space_get_type(
-/*===============*/
- /* out: FIL_TABLESPACE or FIL_LOG */
- ulint id); /* in: space id */
-/***********************************************************************
-Returns the ibuf data of a file space. */
-
-ibuf_data_t*
-fil_space_get_ibuf_data(
-/*====================*/
- /* out: ibuf data for this space */
- ulint id); /* in: space id */
-/***********************************************************************
-Appends a new file to the chain of files of a space. File must be closed. */
-
-void
-fil_node_create(
-/*============*/
- const char* name, /* in: file name (file must be closed) */
- ulint size, /* in: file size in database blocks, rounded
- downwards to an integer */
- ulint id, /* in: space id where to append */
- ibool is_raw);/* in: TRUE if a raw device or
- a raw disk partition */
-/********************************************************************
-Drops files from the start of a file space, so that its size is cut by
-the amount given. */
-
-void
-fil_space_truncate_start(
-/*=====================*/
- ulint id, /* in: space id */
- ulint trunc_len); /* in: truncate by this much; it is an error
- if this does not equal to the combined size of
- some initial files in the space */
-/***********************************************************************
-Creates a space memory object and puts it to the 'fil system' hash table. If
-there is an error, prints an error message to the .err log. */
-
-ibool
-fil_space_create(
-/*=============*/
- /* out: TRUE if success */
- const char* name, /* in: space name */
- ulint id, /* in: space id */
- ulint purpose);/* in: FIL_TABLESPACE, or FIL_LOG if log */
-/***********************************************************************
-Frees a space object from a the tablespace memory cache. Closes the files in
-the chain but does not delete them. */
-
-ibool
-fil_space_free(
-/*===========*/
- /* out: TRUE if success */
- ulint id); /* in: space id */
-/***********************************************************************
-Returns the size of the space in pages. The tablespace must be cached in the
-memory cache. */
-
-ulint
-fil_space_get_size(
-/*===============*/
- /* out: space size, 0 if space not found */
- ulint id); /* in: space id */
-/***********************************************************************
-Checks if the pair space, page_no refers to an existing page in a tablespace
-file space. The tablespace must be cached in the memory cache. */
-
-ibool
-fil_check_adress_in_tablespace(
-/*===========================*/
- /* out: TRUE if the address is meaningful */
- ulint id, /* in: space id */
- ulint page_no);/* in: page number */
-/********************************************************************
-Initializes the tablespace memory cache. */
-
-void
-fil_init(
-/*=====*/
- ulint max_n_open); /* in: max number of open files */
-/***********************************************************************
-Opens all log files and system tablespace data files. They stay open until the
-database server shutdown. This should be called at a server startup after the
-space objects for the log and the system tablespace have been created. The
-purpose of this operation is to make sure we never run out of file descriptors
-if we need to read from the insert buffer or to write to the log. */
-
-void
-fil_open_log_and_system_tablespace_files(void);
-/*==========================================*/
-/***********************************************************************
-Closes all open files. There must not be any pending i/o's or not flushed
-modifications in the files. */
-
-void
-fil_close_all_files(void);
-/*=====================*/
-/***********************************************************************
-Sets the max tablespace id counter if the given number is bigger than the
-previous value. */
-
-void
-fil_set_max_space_id_if_bigger(
-/*===========================*/
- ulint max_id);/* in: maximum known id */
-/********************************************************************
-Initializes the ibuf data structure for space 0 == the system tablespace.
-This can be called after the file space headers have been created and the
-dictionary system has been initialized. */
-
-void
-fil_ibuf_init_at_db_start(void);
-/*===========================*/
-/********************************************************************
-Writes the flushed lsn and the latest archived log number to the page
-header of the first page of each data file in the system tablespace. */
-
-ulint
-fil_write_flushed_lsn_to_data_files(
-/*================================*/
- /* out: DB_SUCCESS or error number */
- dulint lsn, /* in: lsn to write */
- ulint arch_log_no); /* in: latest archived log file number */
-/***********************************************************************
-Reads the flushed lsn and arch no fields from a data file at database
-startup. */
-
-void
-fil_read_flushed_lsn_and_arch_log_no(
-/*=================================*/
- os_file_t data_file, /* in: open data file */
- ibool one_read_already, /* in: TRUE if min and max parameters
- below already contain sensible data */
-#ifdef UNIV_LOG_ARCHIVE
- ulint* min_arch_log_no, /* in/out: */
- ulint* max_arch_log_no, /* in/out: */
-#endif /* UNIV_LOG_ARCHIVE */
- dulint* min_flushed_lsn, /* in/out: */
- dulint* max_flushed_lsn); /* in/out: */
-/***********************************************************************
-Increments the count of pending insert buffer page merges, if space is not
-being deleted. */
-
-ibool
-fil_inc_pending_ibuf_merges(
-/*========================*/
- /* out: TRUE if being deleted, and ibuf merges should
- be skipped */
- ulint id); /* in: space id */
-/***********************************************************************
-Decrements the count of pending insert buffer page merges. */
-
-void
-fil_decr_pending_ibuf_merges(
-/*=========================*/
- ulint id); /* in: space id */
-/***********************************************************************
-Parses the body of a log record written about an .ibd file operation. That is,
-the log record part after the standard (type, space id, page no) header of the
-log record.
-
-If desired, also replays the delete or rename operation if the .ibd file
-exists and the space id in it matches. Replays the create operation if a file
-at that path does not exist yet. If the database directory for the file to be
-created does not exist, then we create the directory, too.
-
-Note that ibbackup --apply-log sets fil_path_to_mysql_datadir to point to the
-datadir that we should use in replaying the file operations. */
-
-byte*
-fil_op_log_parse_or_replay(
-/*=======================*/
- /* out: end of log record, or NULL if the
- record was not completely contained between
- ptr and end_ptr */
- byte* ptr, /* in: buffer containing the log record body,
- or an initial segment of it, if the record does
- not fir completely between ptr and end_ptr */
- byte* end_ptr, /* in: buffer end */
- ulint type, /* in: the type of this log record */
- ibool do_replay, /* in: TRUE if we want to replay the
- operation, and not just parse the log record */
- ulint space_id); /* in: if do_replay is TRUE, the space id of
- the tablespace in question; otherwise
- ignored */
-/***********************************************************************
-Deletes a single-table tablespace. The tablespace must be cached in the
-memory cache. */
-
-ibool
-fil_delete_tablespace(
-/*==================*/
- /* out: TRUE if success */
- ulint id); /* in: space id */
-/***********************************************************************
-Discards a single-table tablespace. The tablespace must be cached in the
-memory cache. Discarding is like deleting a tablespace, but
-1) we do not drop the table from the data dictionary;
-2) we remove all insert buffer entries for the tablespace immediately; in DROP
-TABLE they are only removed gradually in the background;
-3) when the user does IMPORT TABLESPACE, the tablespace will have the same id
-as it originally had. */
-
-ibool
-fil_discard_tablespace(
-/*===================*/
- /* out: TRUE if success */
- ulint id); /* in: space id */
-/***********************************************************************
-Renames a single-table tablespace. The tablespace must be cached in the
-tablespace memory cache. */
-
-ibool
-fil_rename_tablespace(
-/*==================*/
- /* out: TRUE if success */
- const char* old_name, /* in: old table name in the standard
- databasename/tablename format of
- InnoDB, or NULL if we do the rename
- based on the space id only */
- ulint id, /* in: space id */
- const char* new_name); /* in: new table name in the standard
- databasename/tablename format
- of InnoDB */
-
-/***********************************************************************
-Creates a new single-table tablespace to a database directory of MySQL.
-Database directories are under the 'datadir' of MySQL. The datadir is the
-directory of a running mysqld program. We can refer to it by simply the
-path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp
-dir of the mysqld server. */
-
-ulint
-fil_create_new_single_table_tablespace(
-/*===================================*/
- /* out: DB_SUCCESS or error code */
- ulint* space_id, /* in/out: space id; if this is != 0,
- then this is an input parameter,
- otherwise output */
- const char* tablename, /* in: the table name in the usual
- databasename/tablename format
- of InnoDB, or a dir path to a temp
- table */
- ibool is_temp, /* in: TRUE if a table created with
- CREATE TEMPORARY TABLE */
- ulint size); /* in: the initial size of the
- tablespace file in pages,
- must be >= FIL_IBD_FILE_INITIAL_SIZE */
-/************************************************************************
-Tries to open a single-table tablespace and optionally checks the space id is
-right in it. If does not succeed, prints an error message to the .err log. This
-function is used to open a tablespace when we start up mysqld, and also in
-IMPORT TABLESPACE.
-NOTE that we assume this operation is used either at the database startup
-or under the protection of the dictionary mutex, so that two users cannot
-race here. This operation does not leave the file associated with the
-tablespace open, but closes it after we have looked at the space id in it. */
-
-ibool
-fil_open_single_table_tablespace(
-/*=============================*/
- /* out: TRUE if success */
- ibool check_space_id, /* in: should we check that the space
- id in the file is right; we assume
- that this function runs much faster
- if no check is made, since accessing
- the file inode probably is much
- faster (the OS caches them) than
- accessing the first page of the file */
- ulint id, /* in: space id */
- const char* name); /* in: table name in the
- databasename/tablename format */
-/************************************************************************
-It is possible, though very improbable, that the lsn's in the tablespace to be
-imported have risen above the current system lsn, if a lengthy purge, ibuf
-merge, or rollback was performed on a backup taken with ibbackup. If that is
-the case, reset page lsn's in the file. We assume that mysqld was shut down
-after it performed these cleanup operations on the .ibd file, so that it at
-the shutdown stamped the latest lsn to the FIL_PAGE_FILE_FLUSH_LSN in the
-first page of the .ibd file, and we can determine whether we need to reset the
-lsn's just by looking at that flush lsn. */
-
-ibool
-fil_reset_too_high_lsns(
-/*====================*/
- /* out: TRUE if success */
- const char* name, /* in: table name in the
- databasename/tablename format */
- dulint current_lsn); /* in: reset lsn's if the lsn stamped
- to FIL_PAGE_FILE_FLUSH_LSN in the
- first page is too high */
-/************************************************************************
-At the server startup, if we need crash recovery, scans the database
-directories under the MySQL datadir, looking for .ibd files. Those files are
-single-table tablespaces. We need to know the space id in each of them so that
-we know into which file we should look to check the contents of a page stored
-in the doublewrite buffer, also to know where to apply log records where the
-space id is != 0. */
-
-ulint
-fil_load_single_table_tablespaces(void);
-/*===================================*/
- /* out: DB_SUCCESS or error number */
-/************************************************************************
-If we need crash recovery, and we have called
-fil_load_single_table_tablespaces() and dict_load_single_table_tablespaces(),
-we can call this function to print an error message of orphaned .ibd files
-for which there is not a data dictionary entry with a matching table name
-and space id. */
-
-void
-fil_print_orphaned_tablespaces(void);
-/*================================*/
-/***********************************************************************
-Returns TRUE if a single-table tablespace does not exist in the memory cache,
-or is being deleted there. */
-
-ibool
-fil_tablespace_deleted_or_being_deleted_in_mem(
-/*===========================================*/
- /* out: TRUE if does not exist or is being\
- deleted */
- ulint id, /* in: space id */
- ib_longlong version);/* in: tablespace_version should be this; if
- you pass -1 as the value of this, then this
- parameter is ignored */
-/***********************************************************************
-Returns TRUE if a single-table tablespace exists in the memory cache. */
-
-ibool
-fil_tablespace_exists_in_mem(
-/*=========================*/
- /* out: TRUE if exists */
- ulint id); /* in: space id */
-/***********************************************************************
-Returns TRUE if a matching tablespace exists in the InnoDB tablespace memory
-cache. Note that if we have not done a crash recovery at the database startup,
-there may be many tablespaces which are not yet in the memory cache. */
-
-ibool
-fil_space_for_table_exists_in_mem(
-/*==============================*/
- /* out: TRUE if a matching tablespace
- exists in the memory cache */
- ulint id, /* in: space id */
- const char* name, /* in: table name in the standard
- 'databasename/tablename' format or
- the dir path to a temp table */
- ibool is_temp, /* in: TRUE if created with CREATE
- TEMPORARY TABLE */
- ibool mark_space, /* in: in crash recovery, at database
- startup we mark all spaces which have
- an associated table in the InnoDB
- data dictionary, so that
- we can print a warning about orphaned
- tablespaces */
- ibool print_error_if_does_not_exist);
- /* in: print detailed error
- information to the .err log if a
- matching tablespace is not found from
- memory */
-/**************************************************************************
-Tries to extend a data file so that it would accommodate the number of pages
-given. The tablespace must be cached in the memory cache. If the space is big
-enough already, does nothing. */
-
-ibool
-fil_extend_space_to_desired_size(
-/*=============================*/
- /* out: TRUE if success */
- ulint* actual_size, /* out: size of the space after extension;
- if we ran out of disk space this may be lower
- than the desired size */
- ulint space_id, /* in: space id */
- ulint size_after_extend);/* in: desired size in pages after the
- extension; if the current space size is bigger
- than this already, the function does nothing */
-#ifdef UNIV_HOTBACKUP
-/************************************************************************
-Extends all tablespaces to the size stored in the space header. During the
-ibbackup --apply-log phase we extended the spaces on-demand so that log records
-could be appllied, but that may have left spaces still too small compared to
-the size stored in the space header. */
-
-void
-fil_extend_tablespaces_to_stored_len(void);
-/*======================================*/
-#endif
-/***********************************************************************
-Tries to reserve free extents in a file space. */
-
-ibool
-fil_space_reserve_free_extents(
-/*===========================*/
- /* out: TRUE if succeed */
- ulint id, /* in: space id */
- ulint n_free_now, /* in: number of free extents now */
- ulint n_to_reserve); /* in: how many one wants to reserve */
-/***********************************************************************
-Releases free extents in a file space. */
-
-void
-fil_space_release_free_extents(
-/*===========================*/
- ulint id, /* in: space id */
- ulint n_reserved); /* in: how many one reserved */
-/***********************************************************************
-Gets the number of reserved extents. If the database is silent, this number
-should be zero. */
-
-ulint
-fil_space_get_n_reserved_extents(
-/*=============================*/
- ulint id); /* in: space id */
-/************************************************************************
-Reads or writes data. This operation is asynchronous (aio). */
-
-ulint
-fil_io(
-/*===*/
- /* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
- if we are trying to do i/o on a tablespace
- which does not exist */
- ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE,
- ORed to OS_FILE_LOG, if a log i/o
- and ORed to OS_AIO_SIMULATED_WAKE_LATER
- if simulated aio and we want to post a
- batch of i/os; NOTE that a simulated batch
- may introduce hidden chances of deadlocks,
- because i/os are not actually handled until
- all have been posted: use with great
- caution! */
- ibool sync, /* in: TRUE if synchronous aio is desired */
- ulint space_id, /* in: space id */
- ulint block_offset, /* in: offset in number of blocks */
- ulint byte_offset, /* in: remainder of offset in bytes; in
- aio this must be divisible by the OS block
- size */
- ulint len, /* in: how many bytes to read or write; this
- must not cross a file boundary; in aio this
- must be a block size multiple */
- void* buf, /* in/out: buffer where to store read data
- or from where to write; in aio this must be
- appropriately aligned */
- void* message); /* in: message for aio handler if non-sync
- aio used, else ignored */
-/************************************************************************
-Reads data from a space to a buffer. Remember that the possible incomplete
-blocks at the end of file are ignored: they are not taken into account when
-calculating the byte offset within a space. */
-
-ulint
-fil_read(
-/*=====*/
- /* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
- if we are trying to do i/o on a tablespace
- which does not exist */
- ibool sync, /* in: TRUE if synchronous aio is desired */
- ulint space_id, /* in: space id */
- ulint block_offset, /* in: offset in number of blocks */
- ulint byte_offset, /* in: remainder of offset in bytes; in aio
- this must be divisible by the OS block size */
- ulint len, /* in: how many bytes to read; this must not
- cross a file boundary; in aio this must be a
- block size multiple */
- void* buf, /* in/out: buffer where to store data read;
- in aio this must be appropriately aligned */
- void* message); /* in: message for aio handler if non-sync
- aio used, else ignored */
-/************************************************************************
-Writes data to a space from a buffer. Remember that the possible incomplete
-blocks at the end of file are ignored: they are not taken into account when
-calculating the byte offset within a space. */
-
-ulint
-fil_write(
-/*======*/
- /* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
- if we are trying to do i/o on a tablespace
- which does not exist */
- ibool sync, /* in: TRUE if synchronous aio is desired */
- ulint space_id, /* in: space id */
- ulint block_offset, /* in: offset in number of blocks */
- ulint byte_offset, /* in: remainder of offset in bytes; in aio
- this must be divisible by the OS block size */
- ulint len, /* in: how many bytes to write; this must
- not cross a file boundary; in aio this must
- be a block size multiple */
- void* buf, /* in: buffer from which to write; in aio
- this must be appropriately aligned */
- void* message); /* in: message for aio handler if non-sync
- aio used, else ignored */
-/**************************************************************************
-Waits for an aio operation to complete. This function is used to write the
-handler for completed requests. The aio array of pending requests is divided
-into segments (see os0file.c for more info). The thread specifies which
-segment it wants to wait for. */
-
-void
-fil_aio_wait(
-/*=========*/
- ulint segment); /* in: the number of the segment in the aio
- array to wait for */
-/**************************************************************************
-Flushes to disk possible writes cached by the OS. If the space does not exist
-or is being dropped, does not do anything. */
-
-void
-fil_flush(
-/*======*/
- ulint space_id); /* in: file space id (this can be a group of
- log files or a tablespace of the database) */
-/**************************************************************************
-Flushes to disk writes in file spaces of the given type possibly cached by
-the OS. */
-
-void
-fil_flush_file_spaces(
-/*==================*/
- ulint purpose); /* in: FIL_TABLESPACE, FIL_LOG */
-/**********************************************************************
-Checks the consistency of the tablespace cache. */
-
-ibool
-fil_validate(void);
-/*==============*/
- /* out: TRUE if ok */
-/************************************************************************
-Returns TRUE if file address is undefined. */
-
-ibool
-fil_addr_is_null(
-/*=============*/
- /* out: TRUE if undefined */
- fil_addr_t addr); /* in: address */
-/************************************************************************
-Accessor functions for a file page */
-
-ulint
-fil_page_get_prev(byte* page);
-ulint
-fil_page_get_next(byte* page);
-/*************************************************************************
-Sets the file page type. */
-
-void
-fil_page_set_type(
-/*==============*/
- byte* page, /* in: file page */
- ulint type); /* in: type */
-/*************************************************************************
-Gets the file page type. */
-
-ulint
-fil_page_get_type(
-/*==============*/
- /* out: type; NOTE that if the type has not been
- written to page, the return value not defined */
- byte* page); /* in: file page */
-
-
-typedef struct fil_space_struct fil_space_t;
-
-#endif
diff --git a/storage/innobase/include/fsp0fsp.h b/storage/innobase/include/fsp0fsp.h
deleted file mode 100644
index 82e95a2e920..00000000000
--- a/storage/innobase/include/fsp0fsp.h
+++ /dev/null
@@ -1,391 +0,0 @@
-/******************************************************
-File space management
-
-(c) 1995 Innobase Oy
-
-Created 12/18/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef fsp0fsp_h
-#define fsp0fsp_h
-
-#include "univ.i"
-
-#include "mtr0mtr.h"
-#include "fut0lst.h"
-#include "ut0byte.h"
-#include "page0types.h"
-
-/* If records are inserted in order, there are the following
-flags to tell this (their type is made byte for the compiler
-to warn if direction and hint parameters are switched in
-fseg_alloc_free_page): */
-#define FSP_UP ((byte)111) /* alphabetically upwards */
-#define FSP_DOWN ((byte)112) /* alphabetically downwards */
-#define FSP_NO_DIR ((byte)113) /* no order */
-
-/* File space extent size in pages */
-#define FSP_EXTENT_SIZE 64
-
-/* On a page of any file segment, data may be put starting from this offset: */
-#define FSEG_PAGE_DATA FIL_PAGE_DATA
-
-/* File segment header which points to the inode describing the file segment */
-typedef byte fseg_header_t;
-
-#define FSEG_HDR_SPACE 0 /* space id of the inode */
-#define FSEG_HDR_PAGE_NO 4 /* page number of the inode */
-#define FSEG_HDR_OFFSET 8 /* byte offset of the inode */
-
-#define FSEG_HEADER_SIZE 10
-
-/**************************************************************************
-Initializes the file space system. */
-
-void
-fsp_init(void);
-/*==========*/
-/**************************************************************************
-Gets the current free limit of a tablespace. The free limit means the
-place of the first page which has never been put to the the free list
-for allocation. The space above that address is initialized to zero.
-Sets also the global variable log_fsp_current_free_limit. */
-
-ulint
-fsp_header_get_free_limit(
-/*======================*/
- /* out: free limit in megabytes */
- ulint space); /* in: space id, must be 0 */
-/**************************************************************************
-Gets the size of the tablespace from the tablespace header. If we do not
-have an auto-extending data file, this should be equal to the size of the
-data files. If there is an auto-extending data file, this can be smaller. */
-
-ulint
-fsp_header_get_tablespace_size(
-/*===========================*/
- /* out: size in pages */
- ulint space); /* in: space id, must be 0 */
-/**************************************************************************
-Reads the file space size stored in the header page. */
-
-ulint
-fsp_get_size_low(
-/*=============*/
- /* out: tablespace size stored in the space header */
- page_t* page); /* in: header page (page 0 in the tablespace) */
-/**************************************************************************
-Reads the space id from the first page of a tablespace. */
-
-ulint
-fsp_header_get_space_id(
-/*====================*/
- /* out: space id, ULINT UNDEFINED if error */
- page_t* page); /* in: first page of a tablespace */
-/**************************************************************************
-Writes the space id to a tablespace header. This function is used past the
-buffer pool when we in fil0fil.c create a new single-table tablespace. */
-
-void
-fsp_header_write_space_id(
-/*======================*/
- page_t* page, /* in: first page in the space */
- ulint space_id); /* in: space id */
-/**************************************************************************
-Initializes the space header of a new created space and creates also the
-insert buffer tree root if space == 0. */
-
-void
-fsp_header_init(
-/*============*/
- ulint space, /* in: space id */
- ulint size, /* in: current size in blocks */
- mtr_t* mtr); /* in: mini-transaction handle */
-/**************************************************************************
-Increases the space size field of a space. */
-
-void
-fsp_header_inc_size(
-/*================*/
- ulint space, /* in: space id */
- ulint size_inc,/* in: size increment in pages */
- mtr_t* mtr); /* in: mini-transaction handle */
-/**************************************************************************
-Creates a new segment. */
-
-page_t*
-fseg_create(
-/*========*/
- /* out: the page where the segment header is placed,
- x-latched, NULL if could not create segment
- because of lack of space */
- ulint space, /* in: space id */
- ulint page, /* in: page where the segment header is placed: if
- this is != 0, the page must belong to another segment,
- if this is 0, a new page will be allocated and it
- will belong to the created segment */
- ulint byte_offset, /* in: byte offset of the created segment header
- on the page */
- mtr_t* mtr); /* in: mtr */
-/**************************************************************************
-Creates a new segment. */
-
-page_t*
-fseg_create_general(
-/*================*/
- /* out: the page where the segment header is placed,
- x-latched, NULL if could not create segment
- because of lack of space */
- ulint space, /* in: space id */
- ulint page, /* in: page where the segment header is placed: if
- this is != 0, the page must belong to another segment,
- if this is 0, a new page will be allocated and it
- will belong to the created segment */
- ulint byte_offset, /* in: byte offset of the created segment header
- on the page */
- ibool has_done_reservation, /* in: TRUE if the caller has already
- done the reservation for the pages with
- fsp_reserve_free_extents (at least 2 extents: one for
- the inode and the other for the segment) then there is
- no need to do the check for this individual
- operation */
- mtr_t* mtr); /* in: mtr */
-/**************************************************************************
-Calculates the number of pages reserved by a segment, and how many pages are
-currently used. */
-
-ulint
-fseg_n_reserved_pages(
-/*==================*/
- /* out: number of reserved pages */
- fseg_header_t* header, /* in: segment header */
- ulint* used, /* out: number of pages used (<= reserved) */
- mtr_t* mtr); /* in: mtr handle */
-/**************************************************************************
-Allocates a single free page from a segment. This function implements
-the intelligent allocation strategy which tries to minimize
-file space fragmentation. */
-
-ulint
-fseg_alloc_free_page(
-/*=================*/
- /* out: the allocated page offset
- FIL_NULL if no page could be allocated */
- fseg_header_t* seg_header, /* in: segment header */
- ulint hint, /* in: hint of which page would be desirable */
- byte direction, /* in: if the new page is needed because
- of an index page split, and records are
- inserted there in order, into which
- direction they go alphabetically: FSP_DOWN,
- FSP_UP, FSP_NO_DIR */
- mtr_t* mtr); /* in: mtr handle */
-/**************************************************************************
-Allocates a single free page from a segment. This function implements
-the intelligent allocation strategy which tries to minimize file space
-fragmentation. */
-
-ulint
-fseg_alloc_free_page_general(
-/*=========================*/
- /* out: allocated page offset, FIL_NULL if no
- page could be allocated */
- fseg_header_t* seg_header,/* in: segment header */
- ulint hint, /* in: hint of which page would be desirable */
- byte direction,/* in: if the new page is needed because
- of an index page split, and records are
- inserted there in order, into which
- direction they go alphabetically: FSP_DOWN,
- FSP_UP, FSP_NO_DIR */
- ibool has_done_reservation, /* in: TRUE if the caller has
- already done the reservation for the page
- with fsp_reserve_free_extents, then there
- is no need to do the check for this individual
- page */
- mtr_t* mtr); /* in: mtr handle */
-/**************************************************************************
-Reserves free pages from a tablespace. All mini-transactions which may
-use several pages from the tablespace should call this function beforehand
-and reserve enough free extents so that they certainly will be able
-to do their operation, like a B-tree page split, fully. Reservations
-must be released with function fil_space_release_free_extents!
-
-The alloc_type below has the following meaning: FSP_NORMAL means an
-operation which will probably result in more space usage, like an
-insert in a B-tree; FSP_UNDO means allocation to undo logs: if we are
-deleting rows, then this allocation will in the long run result in
-less space usage (after a purge); FSP_CLEANING means allocation done
-in a physical record delete (like in a purge) or other cleaning operation
-which will result in less space usage in the long run. We prefer the latter
-two types of allocation: when space is scarce, FSP_NORMAL allocations
-will not succeed, but the latter two allocations will succeed, if possible.
-The purpose is to avoid dead end where the database is full but the
-user cannot free any space because these freeing operations temporarily
-reserve some space.
-
-Single-table tablespaces whose size is < 32 pages are a special case. In this
-function we would liberally reserve several 64 page extents for every page
-split or merge in a B-tree. But we do not want to waste disk space if the table
-only occupies < 32 pages. That is why we apply different rules in that special
-case, just ensuring that there are 3 free pages available. */
-
-ibool
-fsp_reserve_free_extents(
-/*=====================*/
- /* out: TRUE if we were able to make the reservation */
- ulint* n_reserved,/* out: number of extents actually reserved; if we
- return TRUE and the tablespace size is < 64 pages,
- then this can be 0, otherwise it is n_ext */
- ulint space, /* in: space id */
- ulint n_ext, /* in: number of extents to reserve */
- ulint alloc_type,/* in: FSP_NORMAL, FSP_UNDO, or FSP_CLEANING */
- mtr_t* mtr); /* in: mtr */
-/**************************************************************************
-This function should be used to get information on how much we still
-will be able to insert new data to the database without running out the
-tablespace. Only free extents are taken into account and we also subtract
-the safety margin required by the above function fsp_reserve_free_extents. */
-
-ullint
-fsp_get_available_space_in_free_extents(
-/*====================================*/
- /* out: available space in kB */
- ulint space); /* in: space id */
-/**************************************************************************
-Frees a single page of a segment. */
-
-void
-fseg_free_page(
-/*===========*/
- fseg_header_t* seg_header, /* in: segment header */
- ulint space, /* in: space id */
- ulint page, /* in: page offset */
- mtr_t* mtr); /* in: mtr handle */
-/***********************************************************************
-Frees a segment. The freeing is performed in several mini-transactions,
-so that there is no danger of bufferfixing too many buffer pages. */
-
-void
-fseg_free(
-/*======*/
- ulint space, /* in: space id */
- ulint page_no,/* in: page number where the segment header is
- placed */
- ulint offset);/* in: byte offset of the segment header on that
- page */
-/**************************************************************************
-Frees part of a segment. This function can be used to free a segment
-by repeatedly calling this function in different mini-transactions.
-Doing the freeing in a single mini-transaction might result in
-too big a mini-transaction. */
-
-ibool
-fseg_free_step(
-/*===========*/
- /* out: TRUE if freeing completed */
- fseg_header_t* header, /* in, own: segment header; NOTE: if the header
- resides on the first page of the frag list
- of the segment, this pointer becomes obsolete
- after the last freeing step */
- mtr_t* mtr); /* in: mtr */
-/**************************************************************************
-Frees part of a segment. Differs from fseg_free_step because this function
-leaves the header page unfreed. */
-
-ibool
-fseg_free_step_not_header(
-/*======================*/
- /* out: TRUE if freeing completed, except the
- header page */
- fseg_header_t* header, /* in: segment header which must reside on
- the first fragment page of the segment */
- mtr_t* mtr); /* in: mtr */
-/***************************************************************************
-Checks if a page address is an extent descriptor page address. */
-UNIV_INLINE
-ibool
-fsp_descr_page(
-/*===========*/
- /* out: TRUE if a descriptor page */
- ulint page_no);/* in: page number */
-/***************************************************************
-Parses a redo log record of a file page init. */
-
-byte*
-fsp_parse_init_file_page(
-/*=====================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page); /* in: page or NULL */
-/***********************************************************************
-Validates the file space system and its segments. */
-
-ibool
-fsp_validate(
-/*=========*/
- /* out: TRUE if ok */
- ulint space); /* in: space id */
-/***********************************************************************
-Prints info of a file space. */
-
-void
-fsp_print(
-/*======*/
- ulint space); /* in: space id */
-/***********************************************************************
-Validates a segment. */
-
-ibool
-fseg_validate(
-/*==========*/
- /* out: TRUE if ok */
- fseg_header_t* header, /* in: segment header */
- mtr_t* mtr2); /* in: mtr */
-/***********************************************************************
-Writes info of a segment. */
-
-void
-fseg_print(
-/*=======*/
- fseg_header_t* header, /* in: segment header */
- mtr_t* mtr); /* in: mtr */
-
-/* Flags for fsp_reserve_free_extents */
-#define FSP_NORMAL 1000000
-#define FSP_UNDO 2000000
-#define FSP_CLEANING 3000000
-
-/* Number of pages described in a single descriptor page: currently each page
-description takes less than 1 byte; a descriptor page is repeated every
-this many file pages */
-#define XDES_DESCRIBED_PER_PAGE UNIV_PAGE_SIZE
-
-/* The space low address page map */
-/*--------------------------------------*/
- /* The following two pages are repeated
- every XDES_DESCRIBED_PER_PAGE pages in
- every tablespace. */
-#define FSP_XDES_OFFSET 0 /* extent descriptor */
-#define FSP_IBUF_BITMAP_OFFSET 1 /* insert buffer bitmap */
- /* The ibuf bitmap pages are the ones whose
- page number is the number above plus a
- multiple of XDES_DESCRIBED_PER_PAGE */
-
-#define FSP_FIRST_INODE_PAGE_NO 2 /* in every tablespace */
- /* The following pages exist
- in the system tablespace (space 0). */
-#define FSP_IBUF_HEADER_PAGE_NO 3 /* in tablespace 0 */
-#define FSP_IBUF_TREE_ROOT_PAGE_NO 4 /* in tablespace 0 */
- /* The ibuf tree root page number in
- tablespace 0; its fseg inode is on the page
- number FSP_FIRST_INODE_PAGE_NO */
-#define FSP_TRX_SYS_PAGE_NO 5 /* in tablespace 0 */
-#define FSP_FIRST_RSEG_PAGE_NO 6 /* in tablespace 0 */
-#define FSP_DICT_HDR_PAGE_NO 7 /* in tablespace 0 */
-/*--------------------------------------*/
-
-#ifndef UNIV_NONINL
-#include "fsp0fsp.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/fsp0fsp.ic b/storage/innobase/include/fsp0fsp.ic
deleted file mode 100644
index 89cd9263bd6..00000000000
--- a/storage/innobase/include/fsp0fsp.ic
+++ /dev/null
@@ -1,24 +0,0 @@
-/******************************************************
-File space management
-
-(c) 1995 Innobase Oy
-
-Created 12/18/1995 Heikki Tuuri
-*******************************************************/
-
-/***************************************************************************
-Checks if a page address is an extent descriptor page address. */
-UNIV_INLINE
-ibool
-fsp_descr_page(
-/*===========*/
- /* out: TRUE if a descriptor page */
- ulint page_no)/* in: page number */
-{
- if (page_no % XDES_DESCRIBED_PER_PAGE == FSP_XDES_OFFSET) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
diff --git a/storage/innobase/include/fut0fut.h b/storage/innobase/include/fut0fut.h
deleted file mode 100644
index b9546b4e1a0..00000000000
--- a/storage/innobase/include/fut0fut.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/**********************************************************************
-File-based utilities
-
-(c) 1995 Innobase Oy
-
-Created 12/13/1995 Heikki Tuuri
-***********************************************************************/
-
-
-#ifndef fut0fut_h
-#define fut0fut_h
-
-#include "univ.i"
-
-#include "fil0fil.h"
-#include "mtr0mtr.h"
-
-/************************************************************************
-Gets a pointer to a file address and latches the page. */
-UNIV_INLINE
-byte*
-fut_get_ptr(
-/*========*/
- /* out: pointer to a byte in a frame; the file
- page in the frame is bufferfixed and latched */
- ulint space, /* in: space id */
- fil_addr_t addr, /* in: file address */
- ulint rw_latch, /* in: RW_S_LATCH, RW_X_LATCH */
- mtr_t* mtr); /* in: mtr handle */
-
-#ifndef UNIV_NONINL
-#include "fut0fut.ic"
-#endif
-
-#endif
-
diff --git a/storage/innobase/include/fut0fut.ic b/storage/innobase/include/fut0fut.ic
deleted file mode 100644
index 6a107786376..00000000000
--- a/storage/innobase/include/fut0fut.ic
+++ /dev/null
@@ -1,38 +0,0 @@
-/**********************************************************************
-File-based utilities
-
-(c) 1995 Innobase Oy
-
-Created 12/13/1995 Heikki Tuuri
-***********************************************************************/
-
-#include "sync0rw.h"
-#include "buf0buf.h"
-
-/************************************************************************
-Gets a pointer to a file address and latches the page. */
-UNIV_INLINE
-byte*
-fut_get_ptr(
-/*========*/
- /* out: pointer to a byte in a frame; the file
- page in the frame is bufferfixed and latched */
- ulint space, /* in: space id */
- fil_addr_t addr, /* in: file address */
- ulint rw_latch, /* in: RW_S_LATCH, RW_X_LATCH */
- mtr_t* mtr) /* in: mtr handle */
-{
- byte* ptr;
-
- ut_ad(mtr);
- ut_ad(addr.boffset < UNIV_PAGE_SIZE);
- ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
-
- ptr = buf_page_get(space, addr.page, rw_latch, mtr) + addr.boffset;
-
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(ptr, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
-
- return(ptr);
-}
diff --git a/storage/innobase/include/fut0lst.h b/storage/innobase/include/fut0lst.h
deleted file mode 100644
index 5427e2248da..00000000000
--- a/storage/innobase/include/fut0lst.h
+++ /dev/null
@@ -1,198 +0,0 @@
-/**********************************************************************
-File-based list utilities
-
-(c) 1995 Innobase Oy
-
-Created 11/28/1995 Heikki Tuuri
-***********************************************************************/
-
-#ifndef fut0lst_h
-#define fut0lst_h
-
-#include "univ.i"
-
-#include "fil0fil.h"
-#include "mtr0mtr.h"
-
-
-/* The C 'types' of base node and list node: these should be used to
-write self-documenting code. Of course, the sizeof macro cannot be
-applied to these types! */
-
-typedef byte flst_base_node_t;
-typedef byte flst_node_t;
-
-/* The physical size of a list base node in bytes */
-#define FLST_BASE_NODE_SIZE (4 + 2 * FIL_ADDR_SIZE)
-
-/* The physical size of a list node in bytes */
-#define FLST_NODE_SIZE (2 * FIL_ADDR_SIZE)
-
-
-/************************************************************************
-Initializes a list base node. */
-UNIV_INLINE
-void
-flst_init(
-/*======*/
- flst_base_node_t* base, /* in: pointer to base node */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************************
-Adds a node as the last node in a list. */
-
-void
-flst_add_last(
-/*==========*/
- flst_base_node_t* base, /* in: pointer to base node of list */
- flst_node_t* node, /* in: node to add */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************************
-Adds a node as the first node in a list. */
-
-void
-flst_add_first(
-/*===========*/
- flst_base_node_t* base, /* in: pointer to base node of list */
- flst_node_t* node, /* in: node to add */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************************
-Inserts a node after another in a list. */
-
-void
-flst_insert_after(
-/*==============*/
- flst_base_node_t* base, /* in: pointer to base node of list */
- flst_node_t* node1, /* in: node to insert after */
- flst_node_t* node2, /* in: node to add */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************************
-Inserts a node before another in a list. */
-
-void
-flst_insert_before(
-/*===============*/
- flst_base_node_t* base, /* in: pointer to base node of list */
- flst_node_t* node2, /* in: node to insert */
- flst_node_t* node3, /* in: node to insert before */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************************
-Removes a node. */
-
-void
-flst_remove(
-/*========*/
- flst_base_node_t* base, /* in: pointer to base node of list */
- flst_node_t* node2, /* in: node to remove */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************************
-Cuts off the tail of the list, including the node given. The number of
-nodes which will be removed must be provided by the caller, as this function
-does not measure the length of the tail. */
-
-void
-flst_cut_end(
-/*=========*/
- flst_base_node_t* base, /* in: pointer to base node of list */
- flst_node_t* node2, /* in: first node to remove */
- ulint n_nodes,/* in: number of nodes to remove,
- must be >= 1 */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************************
-Cuts off the tail of the list, not including the given node. The number of
-nodes which will be removed must be provided by the caller, as this function
-does not measure the length of the tail. */
-
-void
-flst_truncate_end(
-/*==============*/
- flst_base_node_t* base, /* in: pointer to base node of list */
- flst_node_t* node2, /* in: first node not to remove */
- ulint n_nodes,/* in: number of nodes to remove */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************************
-Gets list length. */
-UNIV_INLINE
-ulint
-flst_get_len(
-/*=========*/
- /* out: length */
- flst_base_node_t* base, /* in: pointer to base node */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************************
-Gets list first node address. */
-UNIV_INLINE
-fil_addr_t
-flst_get_first(
-/*===========*/
- /* out: file address */
- flst_base_node_t* base, /* in: pointer to base node */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************************
-Gets list last node address. */
-UNIV_INLINE
-fil_addr_t
-flst_get_last(
-/*==========*/
- /* out: file address */
- flst_base_node_t* base, /* in: pointer to base node */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************************
-Gets list next node address. */
-UNIV_INLINE
-fil_addr_t
-flst_get_next_addr(
-/*===============*/
- /* out: file address */
- flst_node_t* node, /* in: pointer to node */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************************
-Gets list prev node address. */
-UNIV_INLINE
-fil_addr_t
-flst_get_prev_addr(
-/*===============*/
- /* out: file address */
- flst_node_t* node, /* in: pointer to node */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************************
-Writes a file address. */
-UNIV_INLINE
-void
-flst_write_addr(
-/*============*/
- fil_faddr_t* faddr, /* in: pointer to file faddress */
- fil_addr_t addr, /* in: file address */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************************
-Reads a file address. */
-UNIV_INLINE
-fil_addr_t
-flst_read_addr(
-/*===========*/
- /* out: file address */
- fil_faddr_t* faddr, /* in: pointer to file faddress */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************************
-Validates a file-based list. */
-
-ibool
-flst_validate(
-/*==========*/
- /* out: TRUE if ok */
- flst_base_node_t* base, /* in: pointer to base node of list */
- mtr_t* mtr1); /* in: mtr */
-/************************************************************************
-Prints info of a file-based list. */
-
-void
-flst_print(
-/*=======*/
- flst_base_node_t* base, /* in: pointer to base node of list */
- mtr_t* mtr); /* in: mtr */
-
-
-#ifndef UNIV_NONINL
-#include "fut0lst.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/fut0lst.ic b/storage/innobase/include/fut0lst.ic
deleted file mode 100644
index 6c7e863b078..00000000000
--- a/storage/innobase/include/fut0lst.ic
+++ /dev/null
@@ -1,147 +0,0 @@
-/**********************************************************************
-File-based list utilities
-
-(c) 1995 Innobase Oy
-
-Created 11/28/1995 Heikki Tuuri
-***********************************************************************/
-
-#include "fut0fut.h"
-#include "mtr0log.h"
-#include "buf0buf.h"
-
-/* We define the field offsets of a node for the list */
-#define FLST_PREV 0 /* 6-byte address of the previous list element;
- the page part of address is FIL_NULL, if no
- previous element */
-#define FLST_NEXT FIL_ADDR_SIZE /* 6-byte address of the next
- list element; the page part of address
- is FIL_NULL, if no next element */
-
-/* We define the field offsets of a base node for the list */
-#define FLST_LEN 0 /* 32-bit list length field */
-#define FLST_FIRST 4 /* 6-byte address of the first element
- of the list; undefined if empty list */
-#define FLST_LAST (4 + FIL_ADDR_SIZE) /* 6-byte address of the
- last element of the list; undefined
- if empty list */
-
-/************************************************************************
-Writes a file address. */
-UNIV_INLINE
-void
-flst_write_addr(
-/*============*/
- fil_faddr_t* faddr, /* in: pointer to file faddress */
- fil_addr_t addr, /* in: file address */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- ut_ad(faddr && mtr);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(faddr),
- MTR_MEMO_PAGE_X_FIX));
-
- mlog_write_ulint(faddr + FIL_ADDR_PAGE, addr.page, MLOG_4BYTES, mtr);
- mlog_write_ulint(faddr + FIL_ADDR_BYTE, addr.boffset,
- MLOG_2BYTES, mtr);
-}
-
-/************************************************************************
-Reads a file address. */
-UNIV_INLINE
-fil_addr_t
-flst_read_addr(
-/*===========*/
- /* out: file address */
- fil_faddr_t* faddr, /* in: pointer to file faddress */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- fil_addr_t addr;
-
- ut_ad(faddr && mtr);
-
- addr.page = mtr_read_ulint(faddr + FIL_ADDR_PAGE, MLOG_4BYTES, mtr);
- addr.boffset = mtr_read_ulint(faddr + FIL_ADDR_BYTE, MLOG_2BYTES,
- mtr);
- return(addr);
-}
-
-/************************************************************************
-Initializes a list base node. */
-UNIV_INLINE
-void
-flst_init(
-/*======*/
- flst_base_node_t* base, /* in: pointer to base node */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
- MTR_MEMO_PAGE_X_FIX));
- mlog_write_ulint(base + FLST_LEN, 0, MLOG_4BYTES, mtr);
- flst_write_addr(base + FLST_FIRST, fil_addr_null, mtr);
- flst_write_addr(base + FLST_LAST, fil_addr_null, mtr);
-}
-
-/************************************************************************
-Gets list length. */
-UNIV_INLINE
-ulint
-flst_get_len(
-/*=========*/
- /* out: length */
- flst_base_node_t* base, /* in: pointer to base node */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- return(mtr_read_ulint(base + FLST_LEN, MLOG_4BYTES, mtr));
-}
-
-/************************************************************************
-Gets list first node address. */
-UNIV_INLINE
-fil_addr_t
-flst_get_first(
-/*===========*/
- /* out: file address */
- flst_base_node_t* base, /* in: pointer to base node */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- return(flst_read_addr(base + FLST_FIRST, mtr));
-}
-
-/************************************************************************
-Gets list last node address. */
-UNIV_INLINE
-fil_addr_t
-flst_get_last(
-/*==========*/
- /* out: file address */
- flst_base_node_t* base, /* in: pointer to base node */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- return(flst_read_addr(base + FLST_LAST, mtr));
-}
-
-/************************************************************************
-Gets list next node address. */
-UNIV_INLINE
-fil_addr_t
-flst_get_next_addr(
-/*===============*/
- /* out: file address */
- flst_node_t* node, /* in: pointer to node */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- return(flst_read_addr(node + FLST_NEXT, mtr));
-}
-
-/************************************************************************
-Gets list prev node address. */
-UNIV_INLINE
-fil_addr_t
-flst_get_prev_addr(
-/*===============*/
- /* out: file address */
- flst_node_t* node, /* in: pointer to node */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- return(flst_read_addr(node + FLST_PREV, mtr));
-}
diff --git a/storage/innobase/include/ha0ha.h b/storage/innobase/include/ha0ha.h
deleted file mode 100644
index beaa06ae755..00000000000
--- a/storage/innobase/include/ha0ha.h
+++ /dev/null
@@ -1,140 +0,0 @@
-/******************************************************
-The hash table with external chains
-
-(c) 1994-1997 Innobase Oy
-
-Created 8/18/1994 Heikki Tuuri
-*******************************************************/
-
-#ifndef ha0ha_h
-#define ha0ha_h
-
-#include "univ.i"
-
-#include "hash0hash.h"
-#include "page0types.h"
-
-/*****************************************************************
-Looks for an element in a hash table. */
-UNIV_INLINE
-void*
-ha_search_and_get_data(
-/*===================*/
- /* out: pointer to the data of the first hash
- table node in chain having the fold number,
- NULL if not found */
- hash_table_t* table, /* in: hash table */
- ulint fold); /* in: folded value of the searched data */
-/*************************************************************
-Looks for an element when we know the pointer to the data and updates
-the pointer to data if found. */
-
-void
-ha_search_and_update_if_found(
-/*==========================*/
- hash_table_t* table, /* in: hash table */
- ulint fold, /* in: folded value of the searched data */
- void* data, /* in: pointer to the data */
- void* new_data);/* in: new pointer to the data */
-/*****************************************************************
-Creates a hash table with >= n array cells. The actual number of cells is
-chosen to be a prime number slightly bigger than n. */
-
-hash_table_t*
-ha_create_func(
-/*===========*/
- /* out, own: created table */
- ibool in_btr_search, /* in: TRUE if the hash table is used in
- the btr_search module */
- ulint n, /* in: number of array cells */
-#ifdef UNIV_SYNC_DEBUG
- ulint mutex_level, /* in: level of the mutexes in the latching
- order: this is used in the debug version */
-#endif /* UNIV_SYNC_DEBUG */
- ulint n_mutexes); /* in: number of mutexes to protect the
- hash table: must be a power of 2 */
-#ifdef UNIV_SYNC_DEBUG
-# define ha_create(b,n_c,n_m,level) ha_create_func(b,n_c,level,n_m)
-#else /* UNIV_SYNC_DEBUG */
-# define ha_create(b,n_c,n_m,level) ha_create_func(b,n_c,n_m)
-#endif /* UNIV_SYNC_DEBUG */
-/*****************************************************************
-Inserts an entry into a hash table. If an entry with the same fold number
-is found, its node is updated to point to the new data, and no new node
-is inserted. */
-
-ibool
-ha_insert_for_fold(
-/*===============*/
- /* out: TRUE if succeed, FALSE if no more
- memory could be allocated */
- hash_table_t* table, /* in: hash table */
- ulint fold, /* in: folded value of data; if a node with
- the same fold value already exists, it is
- updated to point to the same data, and no new
- node is created! */
- void* data); /* in: data, must not be NULL */
-/*****************************************************************
-Deletes an entry from a hash table. */
-
-void
-ha_delete(
-/*======*/
- hash_table_t* table, /* in: hash table */
- ulint fold, /* in: folded value of data */
- void* data); /* in: data, must not be NULL and must exist
- in the hash table */
-/*************************************************************
-Looks for an element when we know the pointer to the data and deletes
-it from the hash table if found. */
-UNIV_INLINE
-ibool
-ha_search_and_delete_if_found(
-/*==========================*/
- /* out: TRUE if found */
- hash_table_t* table, /* in: hash table */
- ulint fold, /* in: folded value of the searched data */
- void* data); /* in: pointer to the data */
-/*********************************************************************
-Removes from the chain determined by fold all nodes whose data pointer
-points to the page given. */
-
-void
-ha_remove_all_nodes_to_page(
-/*========================*/
- hash_table_t* table, /* in: hash table */
- ulint fold, /* in: fold value */
- page_t* page); /* in: buffer page */
-/*****************************************************************
-Validates a given range of the cells in hash table. */
-
-ibool
-ha_validate(
-/*========*/
- /* out: TRUE if ok */
- hash_table_t* table, /* in: hash table */
- ulint start_index, /* in: start index */
- ulint end_index); /* in: end index */
-/*****************************************************************
-Prints info of a hash table. */
-
-void
-ha_print_info(
-/*==========*/
- FILE* file, /* in: file where to print */
- hash_table_t* table); /* in: hash table */
-
-/* The hash table external chain node */
-
-typedef struct ha_node_struct ha_node_t;
-struct ha_node_struct {
- ha_node_t* next; /* next chain node or NULL if none */
- void* data; /* pointer to the data */
- ulint fold; /* fold value for the data */
-};
-
-#ifndef UNIV_NONINL
-#include "ha0ha.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/ha0ha.ic b/storage/innobase/include/ha0ha.ic
deleted file mode 100644
index fb264377f28..00000000000
--- a/storage/innobase/include/ha0ha.ic
+++ /dev/null
@@ -1,185 +0,0 @@
-/************************************************************************
-The hash table with external chains
-
-(c) 1994-1997 Innobase Oy
-
-Created 8/18/1994 Heikki Tuuri
-*************************************************************************/
-
-#include "ut0rnd.h"
-#include "mem0mem.h"
-
-/***************************************************************
-Deletes a hash node. */
-
-void
-ha_delete_hash_node(
-/*================*/
- hash_table_t* table, /* in: hash table */
- ha_node_t* del_node); /* in: node to be deleted */
-
-/**********************************************************************
-Gets a hash node data. */
-UNIV_INLINE
-void*
-ha_node_get_data(
-/*=============*/
- /* out: pointer to the data */
- ha_node_t* node) /* in: hash chain node */
-{
- return(node->data);
-}
-
-/**********************************************************************
-Sets hash node data. */
-UNIV_INLINE
-void
-ha_node_set_data(
-/*=============*/
- ha_node_t* node, /* in: hash chain node */
- void* data) /* in: pointer to the data */
-{
- node->data = data;
-}
-
-/**********************************************************************
-Gets the next node in a hash chain. */
-UNIV_INLINE
-ha_node_t*
-ha_chain_get_next(
-/*==============*/
- /* out: next node, NULL if none */
- ha_node_t* node) /* in: hash chain node */
-{
- return(node->next);
-}
-
-/**********************************************************************
-Gets the first node in a hash chain. */
-UNIV_INLINE
-ha_node_t*
-ha_chain_get_first(
-/*===============*/
- /* out: first node, NULL if none */
- hash_table_t* table, /* in: hash table */
- ulint fold) /* in: fold value determining the chain */
-{
- return(hash_get_nth_cell(table, hash_calc_hash(fold, table))->node);
-}
-
-/*****************************************************************
-Looks for an element in a hash table. */
-UNIV_INLINE
-ha_node_t*
-ha_search(
-/*======*/
- /* out: pointer to the first hash table node
- in chain having the fold number, NULL if not
- found */
- hash_table_t* table, /* in: hash table */
- ulint fold) /* in: folded value of the searched data */
-{
- ha_node_t* node;
-
- ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
-
- node = ha_chain_get_first(table, fold);
-
- while (node) {
- if (node->fold == fold) {
-
- return(node);
- }
-
- node = ha_chain_get_next(node);
- }
-
- return(NULL);
-}
-
-/*****************************************************************
-Looks for an element in a hash table. */
-UNIV_INLINE
-void*
-ha_search_and_get_data(
-/*===================*/
- /* out: pointer to the data of the first hash
- table node in chain having the fold number,
- NULL if not found */
- hash_table_t* table, /* in: hash table */
- ulint fold) /* in: folded value of the searched data */
-{
- ha_node_t* node;
-
- ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
-
- node = ha_chain_get_first(table, fold);
-
- while (node) {
- if (node->fold == fold) {
-
- return(node->data);
- }
-
- node = ha_chain_get_next(node);
- }
-
- return(NULL);
-}
-
-/*************************************************************
-Looks for an element when we know the pointer to the data. */
-UNIV_INLINE
-ha_node_t*
-ha_search_with_data(
-/*================*/
- /* out: pointer to the hash table node, NULL
- if not found in the table */
- hash_table_t* table, /* in: hash table */
- ulint fold, /* in: folded value of the searched data */
- void* data) /* in: pointer to the data */
-{
- ha_node_t* node;
-
- ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
-
- node = ha_chain_get_first(table, fold);
-
- while (node) {
- if (node->data == data) {
-
- return(node);
- }
-
- node = ha_chain_get_next(node);
- }
-
- return(NULL);
-}
-
-/*************************************************************
-Looks for an element when we know the pointer to the data, and deletes
-it from the hash table, if found. */
-UNIV_INLINE
-ibool
-ha_search_and_delete_if_found(
-/*==========================*/
- /* out: TRUE if found */
- hash_table_t* table, /* in: hash table */
- ulint fold, /* in: folded value of the searched data */
- void* data) /* in: pointer to the data */
-{
- ha_node_t* node;
-
- ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
-
- node = ha_search_with_data(table, fold, data);
-
- if (node) {
- ha_delete_hash_node(table, node);
-
- return(TRUE);
- }
-
- return(FALSE);
-}
diff --git a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h
deleted file mode 100644
index 6bfc43579b3..00000000000
--- a/storage/innobase/include/ha_prototypes.h
+++ /dev/null
@@ -1,76 +0,0 @@
-#ifndef HA_INNODB_PROTOTYPES_H
-#define HA_INNODB_PROTOTYPES_H
-
-#ifndef UNIV_HOTBACKUP
-
-#include "univ.i" /* ulint, uint */
-#include "m_ctype.h" /* CHARSET_INFO */
-
-/* Prototypes for global functions in ha_innodb.cc that are called by
-InnoDB's C-code. */
-
-/*************************************************************************
-Wrapper around MySQL's copy_and_convert function, see it for
-documentation. */
-
-ulint
-innobase_convert_string(
-/*====================*/
- void* to,
- ulint to_length,
- CHARSET_INFO* to_cs,
- const void* from,
- ulint from_length,
- CHARSET_INFO* from_cs,
- uint* errors);
-
-/*********************************************************************
-Display an SQL identifier. */
-
-void
-innobase_print_identifier(
-/*======================*/
- FILE* f, /* in: output stream */
- trx_t* trx, /* in: transaction */
- ibool table_id,/* in: TRUE=print a table name,
- FALSE=print other identifier */
- const char* name, /* in: name to print */
- ulint namelen);/* in: length of name */
-
-/**********************************************************************
-Returns true if the thread is the replication thread on the slave
-server. Used in srv_conc_enter_innodb() to determine if the thread
-should be allowed to enter InnoDB - the replication thread is treated
-differently than other threads. Also used in
-srv_conc_force_exit_innodb(). */
-
-ibool
-thd_is_replication_slave_thread(
-/*============================*/
- /* out: true if thd is the replication thread */
- void* thd); /* in: thread handle (THD*) */
-
-/**********************************************************************
-Returns true if the transaction this thread is processing has edited
-non-transactional tables. Used by the deadlock detector when deciding
-which transaction to rollback in case of a deadlock - we try to avoid
-rolling back transactions that have edited non-transactional tables. */
-
-ibool
-thd_has_edited_nontrans_tables(
-/*===========================*/
- /* out: true if non-transactional tables have
- been edited */
- void* thd); /* in: thread handle (THD*) */
-
-/**********************************************************************
-Returns true if the thread is executing a SELECT statement. */
-
-ibool
-thd_is_select(
-/*==========*/
- /* out: true if thd is executing SELECT */
- const void* thd); /* in: thread handle (THD*) */
-
-#endif
-#endif
diff --git a/storage/innobase/include/hash0hash.h b/storage/innobase/include/hash0hash.h
deleted file mode 100644
index e119a117c94..00000000000
--- a/storage/innobase/include/hash0hash.h
+++ /dev/null
@@ -1,367 +0,0 @@
-/******************************************************
-The simple hash table utility
-
-(c) 1997 Innobase Oy
-
-Created 5/20/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef hash0hash_h
-#define hash0hash_h
-
-#include "univ.i"
-#include "mem0mem.h"
-#include "sync0sync.h"
-
-typedef struct hash_table_struct hash_table_t;
-typedef struct hash_cell_struct hash_cell_t;
-
-typedef void* hash_node_t;
-
-/* Fix Bug #13859: symbol collision between imap/mysql */
-#define hash_create hash0_create
-
-/*****************************************************************
-Creates a hash table with >= n array cells. The actual number
-of cells is chosen to be a prime number slightly bigger than n. */
-
-hash_table_t*
-hash_create(
-/*========*/
- /* out, own: created table */
- ulint n); /* in: number of array cells */
-/*****************************************************************
-Creates a mutex array to protect a hash table. */
-
-void
-hash_create_mutexes_func(
-/*=====================*/
- hash_table_t* table, /* in: hash table */
-#ifdef UNIV_SYNC_DEBUG
- ulint sync_level, /* in: latching order level of the
- mutexes: used in the debug version */
-#endif /* UNIV_SYNC_DEBUG */
- ulint n_mutexes); /* in: number of mutexes */
-#ifdef UNIV_SYNC_DEBUG
-# define hash_create_mutexes(t,n,level) hash_create_mutexes_func(t,level,n)
-#else /* UNIV_SYNC_DEBUG */
-# define hash_create_mutexes(t,n,level) hash_create_mutexes_func(t,n)
-#endif /* UNIV_SYNC_DEBUG */
-
-/*****************************************************************
-Frees a hash table. */
-
-void
-hash_table_free(
-/*============*/
- hash_table_t* table); /* in, own: hash table */
-/******************************************************************
-Calculates the hash value from a folded value. */
-UNIV_INLINE
-ulint
-hash_calc_hash(
-/*===========*/
- /* out: hashed value */
- ulint fold, /* in: folded value */
- hash_table_t* table); /* in: hash table */
-/************************************************************************
-Assert that the mutex for the table in a hash operation is owned. */
-#ifdef UNIV_SYNC_DEBUG
-# define HASH_ASSERT_OWNED(TABLE, FOLD) \
-ut_ad(!(TABLE)->mutexes || mutex_own(hash_get_mutex(TABLE, FOLD)));
-#else
-# define HASH_ASSERT_OWNED(TABLE, FOLD)
-#endif
-
-/***********************************************************************
-Inserts a struct to a hash table. */
-
-#define HASH_INSERT(TYPE, NAME, TABLE, FOLD, DATA)\
-do {\
- hash_cell_t* cell3333;\
- TYPE* struct3333;\
-\
- HASH_ASSERT_OWNED(TABLE, FOLD)\
-\
- (DATA)->NAME = NULL;\
-\
- cell3333 = hash_get_nth_cell(TABLE, hash_calc_hash(FOLD, TABLE));\
-\
- if (cell3333->node == NULL) {\
- cell3333->node = DATA;\
- } else {\
- struct3333 = cell3333->node;\
-\
- while (struct3333->NAME != NULL) {\
-\
- struct3333 = struct3333->NAME;\
- }\
-\
- struct3333->NAME = DATA;\
- }\
-} while (0)
-
-/***********************************************************************
-Deletes a struct from a hash table. */
-
-#define HASH_DELETE(TYPE, NAME, TABLE, FOLD, DATA)\
-do {\
- hash_cell_t* cell3333;\
- TYPE* struct3333;\
-\
- HASH_ASSERT_OWNED(TABLE, FOLD)\
-\
- cell3333 = hash_get_nth_cell(TABLE, hash_calc_hash(FOLD, TABLE));\
-\
- if (cell3333->node == DATA) {\
- cell3333->node = DATA->NAME;\
- } else {\
- struct3333 = cell3333->node;\
-\
- while (struct3333->NAME != DATA) {\
-\
- struct3333 = struct3333->NAME;\
- ut_a(struct3333);\
- }\
-\
- struct3333->NAME = DATA->NAME;\
- }\
-} while (0)
-
-/***********************************************************************
-Gets the first struct in a hash chain, NULL if none. */
-
-#define HASH_GET_FIRST(TABLE, HASH_VAL)\
- (hash_get_nth_cell(TABLE, HASH_VAL)->node)
-
-/***********************************************************************
-Gets the next struct in a hash chain, NULL if none. */
-
-#define HASH_GET_NEXT(NAME, DATA) ((DATA)->NAME)
-
-/************************************************************************
-Looks for a struct in a hash table. */
-#define HASH_SEARCH(NAME, TABLE, FOLD, DATA, TEST)\
-{\
-\
- HASH_ASSERT_OWNED(TABLE, FOLD)\
-\
- (DATA) = HASH_GET_FIRST(TABLE, hash_calc_hash(FOLD, TABLE));\
-\
- while ((DATA) != NULL) {\
- if (TEST) {\
- break;\
- } else {\
- (DATA) = HASH_GET_NEXT(NAME, DATA);\
- }\
- }\
-}
-
-/****************************************************************
-Gets the nth cell in a hash table. */
-UNIV_INLINE
-hash_cell_t*
-hash_get_nth_cell(
-/*==============*/
- /* out: pointer to cell */
- hash_table_t* table, /* in: hash table */
- ulint n); /* in: cell index */
-/*****************************************************************
-Returns the number of cells in a hash table. */
-UNIV_INLINE
-ulint
-hash_get_n_cells(
-/*=============*/
- /* out: number of cells */
- hash_table_t* table); /* in: table */
-/***********************************************************************
-Deletes a struct which is stored in the heap of the hash table, and compacts
-the heap. The fold value must be stored in the struct NODE in a field named
-'fold'. */
-
-#define HASH_DELETE_AND_COMPACT(TYPE, NAME, TABLE, NODE)\
-do {\
- TYPE* node111;\
- TYPE* top_node111;\
- hash_cell_t* cell111;\
- ulint fold111;\
-\
- fold111 = (NODE)->fold;\
-\
- HASH_DELETE(TYPE, NAME, TABLE, fold111, NODE);\
-\
- top_node111 = (TYPE*)mem_heap_get_top(\
- hash_get_heap(TABLE, fold111),\
- sizeof(TYPE));\
-\
- /* If the node to remove is not the top node in the heap, compact the\
- heap of nodes by moving the top node in the place of NODE. */\
-\
- if (NODE != top_node111) {\
-\
- /* Copy the top node in place of NODE */\
-\
- *(NODE) = *top_node111;\
-\
- cell111 = hash_get_nth_cell(TABLE,\
- hash_calc_hash(top_node111->fold, TABLE));\
-\
- /* Look for the pointer to the top node, to update it */\
-\
- if (cell111->node == top_node111) {\
- /* The top node is the first in the chain */\
-\
- cell111->node = NODE;\
- } else {\
- /* We have to look for the predecessor of the top\
- node */\
- node111 = cell111->node;\
-\
- while (top_node111 != HASH_GET_NEXT(NAME, node111)) {\
-\
- node111 = HASH_GET_NEXT(NAME, node111);\
- }\
-\
- /* Now we have the predecessor node */\
-\
- node111->NAME = NODE;\
- }\
- }\
-\
- /* Free the space occupied by the top node */\
-\
- mem_heap_free_top(hash_get_heap(TABLE, fold111), sizeof(TYPE));\
-} while (0)
-
-/********************************************************************
-Move all hash table entries from OLD_TABLE to NEW_TABLE.*/
-
-#define HASH_MIGRATE(OLD_TABLE, NEW_TABLE, NODE_TYPE, PTR_NAME, FOLD_FUNC) \
-do {\
- ulint i2222;\
- ulint cell_count2222;\
-\
- cell_count2222 = hash_get_n_cells(OLD_TABLE);\
-\
- for (i2222 = 0; i2222 < cell_count2222; i2222++) {\
- NODE_TYPE* node2222 = HASH_GET_FIRST((OLD_TABLE), i2222);\
-\
- while (node2222) {\
- NODE_TYPE* next2222 = node2222->PTR_NAME;\
- ulint fold2222 = FOLD_FUNC(node2222);\
-\
- HASH_INSERT(NODE_TYPE, PTR_NAME, (NEW_TABLE),\
- fold2222, node2222);\
-\
- node2222 = next2222;\
- }\
- }\
-} while (0)
-
-
-/****************************************************************
-Gets the mutex index for a fold value in a hash table. */
-UNIV_INLINE
-ulint
-hash_get_mutex_no(
-/*==============*/
- /* out: mutex number */
- hash_table_t* table, /* in: hash table */
- ulint fold); /* in: fold */
-/****************************************************************
-Gets the nth heap in a hash table. */
-UNIV_INLINE
-mem_heap_t*
-hash_get_nth_heap(
-/*==============*/
- /* out: mem heap */
- hash_table_t* table, /* in: hash table */
- ulint i); /* in: index of the heap */
-/****************************************************************
-Gets the heap for a fold value in a hash table. */
-UNIV_INLINE
-mem_heap_t*
-hash_get_heap(
-/*==========*/
- /* out: mem heap */
- hash_table_t* table, /* in: hash table */
- ulint fold); /* in: fold */
-/****************************************************************
-Gets the nth mutex in a hash table. */
-UNIV_INLINE
-mutex_t*
-hash_get_nth_mutex(
-/*===============*/
- /* out: mutex */
- hash_table_t* table, /* in: hash table */
- ulint i); /* in: index of the mutex */
-/****************************************************************
-Gets the mutex for a fold value in a hash table. */
-UNIV_INLINE
-mutex_t*
-hash_get_mutex(
-/*===========*/
- /* out: mutex */
- hash_table_t* table, /* in: hash table */
- ulint fold); /* in: fold */
-/****************************************************************
-Reserves the mutex for a fold value in a hash table. */
-
-void
-hash_mutex_enter(
-/*=============*/
- hash_table_t* table, /* in: hash table */
- ulint fold); /* in: fold */
-/****************************************************************
-Releases the mutex for a fold value in a hash table. */
-
-void
-hash_mutex_exit(
-/*============*/
- hash_table_t* table, /* in: hash table */
- ulint fold); /* in: fold */
-/****************************************************************
-Reserves all the mutexes of a hash table, in an ascending order. */
-
-void
-hash_mutex_enter_all(
-/*=================*/
- hash_table_t* table); /* in: hash table */
-/****************************************************************
-Releases all the mutexes of a hash table. */
-
-void
-hash_mutex_exit_all(
-/*================*/
- hash_table_t* table); /* in: hash table */
-
-
-struct hash_cell_struct{
- void* node; /* hash chain node, NULL if none */
-};
-
-/* The hash table structure */
-struct hash_table_struct {
- ibool adaptive;/* TRUE if this is the hash table of the
- adaptive hash index */
- ulint n_cells;/* number of cells in the hash table */
- hash_cell_t* array; /* pointer to cell array */
- ulint n_mutexes;/* if mutexes != NULL, then the number of
- mutexes, must be a power of 2 */
- mutex_t* mutexes;/* NULL, or an array of mutexes used to
- protect segments of the hash table */
- mem_heap_t** heaps; /* if this is non-NULL, hash chain nodes for
- external chaining can be allocated from these
- memory heaps; there are then n_mutexes many of
- these heaps */
- mem_heap_t* heap;
- ulint magic_n;
-};
-
-#define HASH_TABLE_MAGIC_N 76561114
-
-#ifndef UNIV_NONINL
-#include "hash0hash.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/hash0hash.ic b/storage/innobase/include/hash0hash.ic
deleted file mode 100644
index d246d8ee831..00000000000
--- a/storage/innobase/include/hash0hash.ic
+++ /dev/null
@@ -1,131 +0,0 @@
-/******************************************************
-The simple hash table utility
-
-(c) 1997 Innobase Oy
-
-Created 5/20/1997 Heikki Tuuri
-*******************************************************/
-
-#include "ut0rnd.h"
-
-/****************************************************************
-Gets the nth cell in a hash table. */
-UNIV_INLINE
-hash_cell_t*
-hash_get_nth_cell(
-/*==============*/
- /* out: pointer to cell */
- hash_table_t* table, /* in: hash table */
- ulint n) /* in: cell index */
-{
- ut_ad(n < table->n_cells);
-
- return(table->array + n);
-}
-
-/*****************************************************************
-Returns the number of cells in a hash table. */
-UNIV_INLINE
-ulint
-hash_get_n_cells(
-/*=============*/
- /* out: number of cells */
- hash_table_t* table) /* in: table */
-{
- return(table->n_cells);
-}
-
-/******************************************************************
-Calculates the hash value from a folded value. */
-UNIV_INLINE
-ulint
-hash_calc_hash(
-/*===========*/
- /* out: hashed value */
- ulint fold, /* in: folded value */
- hash_table_t* table) /* in: hash table */
-{
- return(ut_hash_ulint(fold, table->n_cells));
-}
-
-/****************************************************************
-Gets the mutex index for a fold value in a hash table. */
-UNIV_INLINE
-ulint
-hash_get_mutex_no(
-/*==============*/
- /* out: mutex number */
- hash_table_t* table, /* in: hash table */
- ulint fold) /* in: fold */
-{
- return(ut_2pow_remainder(hash_calc_hash(fold, table),
- table->n_mutexes));
-}
-
-/****************************************************************
-Gets the nth heap in a hash table. */
-UNIV_INLINE
-mem_heap_t*
-hash_get_nth_heap(
-/*==============*/
- /* out: mem heap */
- hash_table_t* table, /* in: hash table */
- ulint i) /* in: index of the heap */
-{
- ut_ad(i < table->n_mutexes);
-
- return(table->heaps[i]);
-}
-
-/****************************************************************
-Gets the heap for a fold value in a hash table. */
-UNIV_INLINE
-mem_heap_t*
-hash_get_heap(
-/*==========*/
- /* out: mem heap */
- hash_table_t* table, /* in: hash table */
- ulint fold) /* in: fold */
-{
- ulint i;
-
- if (table->heap) {
- return(table->heap);
- }
-
- i = hash_get_mutex_no(table, fold);
-
- return(hash_get_nth_heap(table, i));
-}
-
-/****************************************************************
-Gets the nth mutex in a hash table. */
-UNIV_INLINE
-mutex_t*
-hash_get_nth_mutex(
-/*===============*/
- /* out: mutex */
- hash_table_t* table, /* in: hash table */
- ulint i) /* in: index of the mutex */
-{
- ut_ad(i < table->n_mutexes);
-
- return(table->mutexes + i);
-}
-
-/****************************************************************
-Gets the mutex for a fold value in a hash table. */
-UNIV_INLINE
-mutex_t*
-hash_get_mutex(
-/*===========*/
- /* out: mutex */
- hash_table_t* table, /* in: hash table */
- ulint fold) /* in: fold */
-{
- ulint i;
-
- i = hash_get_mutex_no(table, fold);
-
- return(hash_get_nth_mutex(table, i));
-}
diff --git a/storage/innobase/include/ibuf0ibuf.h b/storage/innobase/include/ibuf0ibuf.h
deleted file mode 100644
index 77fefe2020b..00000000000
--- a/storage/innobase/include/ibuf0ibuf.h
+++ /dev/null
@@ -1,309 +0,0 @@
-/******************************************************
-Insert buffer
-
-(c) 1997 Innobase Oy
-
-Created 7/19/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef ibuf0ibuf_h
-#define ibuf0ibuf_h
-
-#include "univ.i"
-
-#include "dict0mem.h"
-#include "dict0dict.h"
-#include "mtr0mtr.h"
-#include "que0types.h"
-#include "ibuf0types.h"
-#include "fsp0fsp.h"
-
-extern ibuf_t* ibuf;
-
-/**********************************************************************
-Creates the insert buffer data struct for a single tablespace. Reads the
-root page of the insert buffer tree in the tablespace. This function can
-be called only after the dictionary system has been initialized, as this
-creates also the insert buffer table and index for this tablespace. */
-
-ibuf_data_t*
-ibuf_data_init_for_space(
-/*=====================*/
- /* out, own: ibuf data struct, linked to the list
- in ibuf control structure. */
- ulint space); /* in: space id */
-/**********************************************************************
-Creates the insert buffer data structure at a database startup and
-initializes the data structures for the insert buffer of each tablespace. */
-
-void
-ibuf_init_at_db_start(void);
-/*=======================*/
-/*************************************************************************
-Reads the biggest tablespace id from the high end of the insert buffer
-tree and updates the counter in fil_system. */
-
-void
-ibuf_update_max_tablespace_id(void);
-/*===============================*/
-/*************************************************************************
-Initializes an ibuf bitmap page. */
-
-void
-ibuf_bitmap_page_init(
-/*==================*/
- page_t* page, /* in: bitmap page */
- mtr_t* mtr); /* in: mtr */
-/****************************************************************************
-Resets the free bits of the page in the ibuf bitmap. This is done in a
-separate mini-transaction, hence this operation does not restrict further
-work to only ibuf bitmap operations, which would result if the latch to the
-bitmap page were kept. */
-
-void
-ibuf_reset_free_bits_with_type(
-/*===========================*/
- ulint type, /* in: index type */
- page_t* page); /* in: index page; free bits are set to 0 if the index
- is non-clustered and non-unique and the page level is
- 0 */
-/****************************************************************************
-Resets the free bits of the page in the ibuf bitmap. This is done in a
-separate mini-transaction, hence this operation does not restrict further
-work to solely ibuf bitmap operations, which would result if the latch to
-the bitmap page were kept. */
-
-void
-ibuf_reset_free_bits(
-/*=================*/
- dict_index_t* index, /* in: index */
- page_t* page); /* in: index page; free bits are set to 0 if
- the index is non-clustered and non-unique and
- the page level is 0 */
-/****************************************************************************
-Updates the free bits of the page in the ibuf bitmap if there is not enough
-free on the page any more. This is done in a separate mini-transaction, hence
-this operation does not restrict further work to only ibuf bitmap operations,
-which would result if the latch to the bitmap page were kept. */
-UNIV_INLINE
-void
-ibuf_update_free_bits_if_full(
-/*==========================*/
- dict_index_t* index, /* in: index */
- page_t* page, /* in: index page to which we have added new
- records; the free bits are updated if the
- index is non-clustered and non-unique and
- the page level is 0, and the page becomes
- fuller */
- ulint max_ins_size,/* in: value of maximum insert size with
- reorganize before the latest operation
- performed to the page */
- ulint increase);/* in: upper limit for the additional space
- used in the latest operation, if known, or
- ULINT_UNDEFINED */
-/**************************************************************************
-Updates the free bits for the page to reflect the present state. Does this
-in the mtr given, which means that the latching order rules virtually
-prevent any further operations for this OS thread until mtr is committed. */
-
-void
-ibuf_update_free_bits_low(
-/*======================*/
- dict_index_t* index, /* in: index */
- page_t* page, /* in: index page */
- ulint max_ins_size, /* in: value of maximum insert size
- with reorganize before the latest
- operation performed to the page */
- mtr_t* mtr); /* in: mtr */
-/**************************************************************************
-Updates the free bits for the two pages to reflect the present state. Does
-this in the mtr given, which means that the latching order rules virtually
-prevent any further operations until mtr is committed. */
-
-void
-ibuf_update_free_bits_for_two_pages_low(
-/*====================================*/
- dict_index_t* index, /* in: index */
- page_t* page1, /* in: index page */
- page_t* page2, /* in: index page */
- mtr_t* mtr); /* in: mtr */
-/**************************************************************************
-A basic partial test if an insert to the insert buffer could be possible and
-recommended. */
-UNIV_INLINE
-ibool
-ibuf_should_try(
-/*============*/
- dict_index_t* index, /* in: index where to insert */
- ulint ignore_sec_unique); /* in: if != 0, we should
- ignore UNIQUE constraint on
- a secondary index when we
- decide */
-/**********************************************************************
-Returns TRUE if the current OS thread is performing an insert buffer
-routine. */
-
-ibool
-ibuf_inside(void);
-/*=============*/
- /* out: TRUE if inside an insert buffer routine: for instance,
- a read-ahead of non-ibuf pages is then forbidden */
-/***************************************************************************
-Checks if a page address is an ibuf bitmap page (level 3 page) address. */
-UNIV_INLINE
-ibool
-ibuf_bitmap_page(
-/*=============*/
- /* out: TRUE if a bitmap page */
- ulint page_no);/* in: page number */
-/***************************************************************************
-Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */
-
-ibool
-ibuf_page(
-/*======*/
- /* out: TRUE if level 2 or level 3 page */
- ulint space, /* in: space id */
- ulint page_no);/* in: page number */
-/***************************************************************************
-Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */
-
-ibool
-ibuf_page_low(
-/*==========*/
- /* out: TRUE if level 2 or level 3 page */
- ulint space, /* in: space id */
- ulint page_no,/* in: page number */
- mtr_t* mtr); /* in: mtr which will contain an x-latch to the
- bitmap page if the page is not one of the fixed
- address ibuf pages */
-/***************************************************************************
-Frees excess pages from the ibuf free list. This function is called when an OS
-thread calls fsp services to allocate a new file segment, or a new page to a
-file segment, and the thread did not own the fsp latch before this call. */
-
-void
-ibuf_free_excess_pages(
-/*===================*/
- ulint space); /* in: space id */
-/*************************************************************************
-Makes an index insert to the insert buffer, instead of directly to the disk
-page, if this is possible. Does not do insert if the index is clustered
-or unique. */
-
-ibool
-ibuf_insert(
-/*========*/
- /* out: TRUE if success */
- dtuple_t* entry, /* in: index entry to insert */
- dict_index_t* index, /* in: index where to insert */
- ulint space, /* in: space id where to insert */
- ulint page_no,/* in: page number where to insert */
- que_thr_t* thr); /* in: query thread */
-/*************************************************************************
-When an index page is read from a disk to the buffer pool, this function
-inserts to the page the possible index entries buffered in the insert buffer.
-The entries are deleted from the insert buffer. If the page is not read, but
-created in the buffer pool, this function deletes its buffered entries from
-the insert buffer; there can exist entries for such a page if the page
-belonged to an index which subsequently was dropped. */
-
-void
-ibuf_merge_or_delete_for_page(
-/*==========================*/
- page_t* page, /* in: if page has been read from disk, pointer to
- the page x-latched, else NULL */
- ulint space, /* in: space id of the index page */
- ulint page_no,/* in: page number of the index page */
- ibool update_ibuf_bitmap);/* in: normally this is set to TRUE, but if
- we have deleted or are deleting the tablespace, then we
- naturally do not want to update a non-existent bitmap
- page */
-/*************************************************************************
-Deletes all entries in the insert buffer for a given space id. This is used
-in DISCARD TABLESPACE and IMPORT TABLESPACE.
-NOTE: this does not update the page free bitmaps in the space. The space will
-become CORRUPT when you call this function! */
-
-void
-ibuf_delete_for_discarded_space(
-/*============================*/
- ulint space); /* in: space id */
-/*************************************************************************
-Contracts insert buffer trees by reading pages to the buffer pool. */
-
-ulint
-ibuf_contract(
-/*==========*/
- /* out: a lower limit for the combined size in bytes
- of entries which will be merged from ibuf trees to the
- pages read, 0 if ibuf is empty */
- ibool sync); /* in: TRUE if the caller wants to wait for the
- issued read with the highest tablespace address
- to complete */
-/*************************************************************************
-Contracts insert buffer trees by reading pages to the buffer pool. */
-
-ulint
-ibuf_contract_for_n_pages(
-/*======================*/
- /* out: a lower limit for the combined size in bytes
- of entries which will be merged from ibuf trees to the
- pages read, 0 if ibuf is empty */
- ibool sync, /* in: TRUE if the caller wants to wait for the
- issued read with the highest tablespace address
- to complete */
- ulint n_pages);/* in: try to read at least this many pages to
- the buffer pool and merge the ibuf contents to
- them */
-/*************************************************************************
-Parses a redo log record of an ibuf bitmap page init. */
-
-byte*
-ibuf_parse_bitmap_init(
-/*===================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr); /* in: mtr or NULL */
-#ifdef UNIV_IBUF_DEBUG
-/**********************************************************************
-Gets the ibuf count for a given page. */
-
-ulint
-ibuf_count_get(
-/*===========*/
- /* out: number of entries in the insert buffer
- currently buffered for this page */
- ulint space, /* in: space id */
- ulint page_no);/* in: page number */
-#endif
-/**********************************************************************
-Looks if the insert buffer is empty. */
-
-ibool
-ibuf_is_empty(void);
-/*===============*/
- /* out: TRUE if empty */
-/**********************************************************************
-Prints info of ibuf. */
-
-void
-ibuf_print(
-/*=======*/
- FILE* file); /* in: file where to print */
-
-#define IBUF_HEADER_PAGE_NO FSP_IBUF_HEADER_PAGE_NO
-#define IBUF_TREE_ROOT_PAGE_NO FSP_IBUF_TREE_ROOT_PAGE_NO
-
-/* The ibuf header page currently contains only the file segment header
-for the file segment from which the pages for the ibuf tree are allocated */
-#define IBUF_HEADER PAGE_DATA
-#define IBUF_TREE_SEG_HEADER 0 /* fseg header for ibuf tree */
-
-#ifndef UNIV_NONINL
-#include "ibuf0ibuf.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/ibuf0ibuf.ic b/storage/innobase/include/ibuf0ibuf.ic
deleted file mode 100644
index 4d65a7f5250..00000000000
--- a/storage/innobase/include/ibuf0ibuf.ic
+++ /dev/null
@@ -1,224 +0,0 @@
-/******************************************************
-Insert buffer
-
-(c) 1997 Innobase Oy
-
-Created 7/19/1997 Heikki Tuuri
-*******************************************************/
-
-#include "buf0lru.h"
-#include "page0page.h"
-
-extern ulint ibuf_flush_count;
-
-/* If this number is n, an index page must contain at least the page size
-per n bytes of free space for ibuf to try to buffer inserts to this page.
-If there is this much of free space, the corresponding bits are set in the
-ibuf bitmap. */
-#define IBUF_PAGE_SIZE_PER_FREE_SPACE 32
-
-/* Insert buffer data struct for a single tablespace */
-struct ibuf_data_struct{
- ulint space; /* space id */
- ulint seg_size;/* allocated pages if the file segment
- containing ibuf header and tree */
- ulint size; /* size of the insert buffer tree in pages */
- ibool empty; /* after an insert to the ibuf tree is
- performed, this is set to FALSE, and if a
- contract operation finds the tree empty, this
- is set to TRUE */
- ulint free_list_len;
- /* length of the free list */
- ulint height; /* tree height */
- dict_index_t* index; /* insert buffer index */
- UT_LIST_NODE_T(ibuf_data_t) data_list;
- /* list of ibuf data structs */
- ulint n_inserts;/* number of inserts made to the insert
- buffer */
- ulint n_merges;/* number of pages merged */
- ulint n_merged_recs;/* number of records merged */
-};
-
-struct ibuf_struct{
- ulint size; /* current size of the ibuf index
- trees in pages */
- ulint max_size; /* recommended maximum size in pages
- for the ibuf index tree */
- UT_LIST_BASE_NODE_T(ibuf_data_t) data_list;
- /* list of ibuf data structs for
- each tablespace */
-};
-
-/****************************************************************************
-Sets the free bit of the page in the ibuf bitmap. This is done in a separate
-mini-transaction, hence this operation does not restrict further work to only
-ibuf bitmap operations, which would result if the latch to the bitmap page
-were kept. */
-
-void
-ibuf_set_free_bits(
-/*===============*/
- ulint type, /* in: index type */
- page_t* page, /* in: index page; free bit is reset if the index is
- a non-clustered non-unique, and page level is 0 */
- ulint val, /* in: value to set: < 4 */
- ulint max_val);/* in: ULINT_UNDEFINED or a maximum value which
- the bits must have before setting; this is for
- debugging */
-
-/**************************************************************************
-A basic partial test if an insert to the insert buffer could be possible and
-recommended. */
-UNIV_INLINE
-ibool
-ibuf_should_try(
-/*============*/
- dict_index_t* index, /* in: index where to insert */
- ulint ignore_sec_unique) /* in: if != 0, we should
- ignore UNIQUE constraint on
- a secondary index when we
- decide */
-{
- if (!(index->type & DICT_CLUSTERED)
- && (ignore_sec_unique || !(index->type & DICT_UNIQUE))) {
-
- ibuf_flush_count++;
-
- if (ibuf_flush_count % 8 == 0) {
-
- buf_LRU_try_free_flushed_blocks();
- }
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/***************************************************************************
-Checks if a page address is an ibuf bitmap page address. */
-UNIV_INLINE
-ibool
-ibuf_bitmap_page(
-/*=============*/
- /* out: TRUE if a bitmap page */
- ulint page_no)/* in: page number */
-{
- if (page_no % XDES_DESCRIBED_PER_PAGE == FSP_IBUF_BITMAP_OFFSET) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*************************************************************************
-Translates the free space on a page to a value in the ibuf bitmap.*/
-UNIV_INLINE
-ulint
-ibuf_index_page_calc_free_bits(
-/*===========================*/
- /* out: value for ibuf bitmap bits */
- ulint max_ins_size) /* in: maximum insert size after reorganize
- for the page */
-{
- ulint n;
-
- n = max_ins_size / (UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
-
- if (n == 3) {
- n = 2;
- }
-
- if (n > 3) {
- n = 3;
- }
-
- return(n);
-}
-
-/*************************************************************************
-Translates the ibuf free bits to the free space on a page in bytes. */
-UNIV_INLINE
-ulint
-ibuf_index_page_calc_free_from_bits(
-/*================================*/
- /* out: maximum insert size after reorganize for the
- page */
- ulint bits) /* in: value for ibuf bitmap bits */
-{
- ut_ad(bits < 4);
-
- if (bits == 3) {
- return(4 * UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
- }
-
- return(bits * UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
-}
-
-/*************************************************************************
-Translates the free space on a page to a value in the ibuf bitmap.*/
-UNIV_INLINE
-ulint
-ibuf_index_page_calc_free(
-/*======================*/
- /* out: value for ibuf bitmap bits */
- page_t* page) /* in: non-unique secondary index page */
-{
- return(ibuf_index_page_calc_free_bits(
- page_get_max_insert_size_after_reorganize(page, 1)));
-}
-
-/****************************************************************************
-Updates the free bits of the page in the ibuf bitmap if there is not enough
-free on the page any more. This is done in a separate mini-transaction, hence
-this operation does not restrict further work to only ibuf bitmap operations,
-which would result if the latch to the bitmap page were kept. */
-UNIV_INLINE
-void
-ibuf_update_free_bits_if_full(
-/*==========================*/
- dict_index_t* index, /* in: index */
- page_t* page, /* in: index page to which we have added new
- records; the free bits are updated if the
- index is non-clustered and non-unique and
- the page level is 0, and the page becomes
- fuller */
- ulint max_ins_size,/* in: value of maximum insert size with
- reorganize before the latest operation
- performed to the page */
- ulint increase)/* in: upper limit for the additional space
- used in the latest operation, if known, or
- ULINT_UNDEFINED */
-{
- ulint before;
- ulint after;
-
- before = ibuf_index_page_calc_free_bits(max_ins_size);
-
- if (max_ins_size >= increase) {
-#if ULINT32_UNDEFINED <= UNIV_PAGE_SIZE
-# error "ULINT32_UNDEFINED <= UNIV_PAGE_SIZE"
-#endif
- after = ibuf_index_page_calc_free_bits(max_ins_size
- - increase);
-#ifdef UNIV_IBUF_DEBUG
- ut_a(after <= ibuf_index_page_calc_free(page));
-#endif
- } else {
- after = ibuf_index_page_calc_free(page);
- }
-
- if (after == 0) {
- /* We move the page to the front of the buffer pool LRU list:
- the purpose of this is to prevent those pages to which we
- cannot make inserts using the insert buffer from slipping
- out of the buffer pool */
-
- buf_page_make_young(page);
- }
-
- if (before > after) {
- ibuf_set_free_bits(index->type, page, after, before);
- }
-}
diff --git a/storage/innobase/include/ibuf0types.h b/storage/innobase/include/ibuf0types.h
deleted file mode 100644
index fb202ac44b0..00000000000
--- a/storage/innobase/include/ibuf0types.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/******************************************************
-Insert buffer global types
-
-(c) 1997 Innobase Oy
-
-Created 7/29/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef ibuf0types_h
-#define ibuf0types_h
-
-typedef struct ibuf_data_struct ibuf_data_t;
-typedef struct ibuf_struct ibuf_t;
-
-#endif
diff --git a/storage/innobase/include/lock0iter.h b/storage/innobase/include/lock0iter.h
deleted file mode 100644
index d063a360c1f..00000000000
--- a/storage/innobase/include/lock0iter.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/******************************************************
-Lock queue iterator type and function prototypes.
-
-(c) 2007 Innobase Oy
-
-Created July 16, 2007 Vasil Dimov
-*******************************************************/
-
-#ifndef lock0iter_h
-#define lock0iter_h
-
-#include "univ.i"
-#include "lock0types.h"
-
-typedef struct lock_queue_iterator_struct {
- lock_t* current_lock;
- /* In case this is a record lock queue (not table lock queue)
- then bit_no is the record number within the heap in which the
- record is stored. */
- ulint bit_no;
-} lock_queue_iterator_t;
-
-/***********************************************************************
-Initialize lock queue iterator so that it starts to iterate from
-"lock". bit_no specifies the record number within the heap where the
-record is stored. It can be undefined (ULINT_UNDEFINED) in two cases:
-1. If the lock is a table lock, thus we have a table lock queue;
-2. If the lock is a record lock and it is a wait lock. In this case
- bit_no is calculated in this function by using
- lock_rec_find_set_bit(). There is exactly one bit set in the bitmap
- of a wait lock. */
-
-void
-lock_queue_iterator_reset(
-/*======================*/
- lock_queue_iterator_t* iter, /* out: iterator */
- lock_t* lock, /* in: lock to start from */
- ulint bit_no);/* in: record number in the
- heap */
-
-/***********************************************************************
-Gets the previous lock in the lock queue, returns NULL if there are no
-more locks (i.e. the current lock is the first one). The iterator is
-receded (if not-NULL is returned). */
-
-lock_t*
-lock_queue_iterator_get_prev(
-/*=========================*/
- /* out: previous lock or NULL */
- lock_queue_iterator_t* iter); /* in/out: iterator */
-
-#endif /* lock0iter_h */
diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h
deleted file mode 100644
index 635724bf5a1..00000000000
--- a/storage/innobase/include/lock0lock.h
+++ /dev/null
@@ -1,709 +0,0 @@
-/******************************************************
-The transaction lock system
-
-(c) 1996 Innobase Oy
-
-Created 5/7/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef lock0lock_h
-#define lock0lock_h
-
-#include "univ.i"
-#include "trx0types.h"
-#include "rem0types.h"
-#include "dict0types.h"
-#include "que0types.h"
-#include "page0types.h"
-#include "lock0types.h"
-#include "read0types.h"
-#include "hash0hash.h"
-
-#ifdef UNIV_DEBUG
-extern ibool lock_print_waits;
-#endif /* UNIV_DEBUG */
-/* Buffer for storing information about the most recent deadlock error */
-extern FILE* lock_latest_err_file;
-
-/*************************************************************************
-Gets the size of a lock struct. */
-
-ulint
-lock_get_size(void);
-/*===============*/
- /* out: size in bytes */
-/*************************************************************************
-Creates the lock system at database start. */
-
-void
-lock_sys_create(
-/*============*/
- ulint n_cells); /* in: number of slots in lock hash table */
-/*************************************************************************
-Checks if some transaction has an implicit x-lock on a record in a secondary
-index. */
-
-trx_t*
-lock_sec_rec_some_has_impl_off_kernel(
-/*==================================*/
- /* out: transaction which has the x-lock, or
- NULL */
- rec_t* rec, /* in: user record */
- dict_index_t* index, /* in: secondary index */
- const ulint* offsets);/* in: rec_get_offsets(rec, index) */
-/*************************************************************************
-Checks if some transaction has an implicit x-lock on a record in a clustered
-index. */
-UNIV_INLINE
-trx_t*
-lock_clust_rec_some_has_impl(
-/*=========================*/
- /* out: transaction which has the x-lock, or
- NULL */
- rec_t* rec, /* in: user record */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets);/* in: rec_get_offsets(rec, index) */
-/*****************************************************************
-Makes a record to inherit the locks of another record as gap type
-locks, but does not reset the lock bits of the other record. Also
-waiting lock requests on rec are inherited as GRANTED gap locks. */
-
-void
-lock_rec_inherit_to_gap(
-/*====================*/
- rec_t* heir, /* in: record which inherits */
- rec_t* rec); /* in: record from which inherited; does NOT reset
- the locks on this record */
-/*****************************************************************
-Updates the lock table when we have reorganized a page. NOTE: we copy
-also the locks set on the infimum of the page; the infimum may carry
-locks if an update of a record is occurring on the page, and its locks
-were temporarily stored on the infimum. */
-
-void
-lock_move_reorganize_page(
-/*======================*/
- page_t* page, /* in: old index page */
- page_t* new_page); /* in: reorganized page */
-/*****************************************************************
-Moves the explicit locks on user records to another page if a record
-list end is moved to another page. */
-
-void
-lock_move_rec_list_end(
-/*===================*/
- page_t* new_page, /* in: index page to move to */
- page_t* page, /* in: index page */
- rec_t* rec); /* in: record on page: this is the
- first record moved */
-/*****************************************************************
-Moves the explicit locks on user records to another page if a record
-list start is moved to another page. */
-
-void
-lock_move_rec_list_start(
-/*=====================*/
- page_t* new_page, /* in: index page to move to */
- page_t* page, /* in: index page */
- rec_t* rec, /* in: record on page: this is the
- first record NOT copied */
- rec_t* old_end); /* in: old previous-to-last record on
- new_page before the records were copied */
-/*****************************************************************
-Updates the lock table when a page is split to the right. */
-
-void
-lock_update_split_right(
-/*====================*/
- page_t* right_page, /* in: right page */
- page_t* left_page); /* in: left page */
-/*****************************************************************
-Updates the lock table when a page is merged to the right. */
-
-void
-lock_update_merge_right(
-/*====================*/
- rec_t* orig_succ, /* in: original successor of infimum
- on the right page before merge */
- page_t* left_page); /* in: merged index page which will be
- discarded */
-/*****************************************************************
-Updates the lock table when the root page is copied to another in
-btr_root_raise_and_insert. Note that we leave lock structs on the
-root page, even though they do not make sense on other than leaf
-pages: the reason is that in a pessimistic update the infimum record
-of the root page will act as a dummy carrier of the locks of the record
-to be updated. */
-
-void
-lock_update_root_raise(
-/*===================*/
- page_t* new_page, /* in: index page to which copied */
- page_t* root); /* in: root page */
-/*****************************************************************
-Updates the lock table when a page is copied to another and the original page
-is removed from the chain of leaf pages, except if page is the root! */
-
-void
-lock_update_copy_and_discard(
-/*=========================*/
- page_t* new_page, /* in: index page to which copied */
- page_t* page); /* in: index page; NOT the root! */
-/*****************************************************************
-Updates the lock table when a page is split to the left. */
-
-void
-lock_update_split_left(
-/*===================*/
- page_t* right_page, /* in: right page */
- page_t* left_page); /* in: left page */
-/*****************************************************************
-Updates the lock table when a page is merged to the left. */
-
-void
-lock_update_merge_left(
-/*===================*/
- page_t* left_page, /* in: left page to which merged */
- rec_t* orig_pred, /* in: original predecessor of supremum
- on the left page before merge */
- page_t* right_page); /* in: merged index page which will be
- discarded */
-/*****************************************************************
-Resets the original locks on heir and replaces them with gap type locks
-inherited from rec. */
-
-void
-lock_rec_reset_and_inherit_gap_locks(
-/*=================================*/
- rec_t* heir, /* in: heir record */
- rec_t* rec); /* in: record */
-/*****************************************************************
-Updates the lock table when a page is discarded. */
-
-void
-lock_update_discard(
-/*================*/
- rec_t* heir, /* in: record which will inherit the locks */
- page_t* page); /* in: index page which will be discarded */
-/*****************************************************************
-Updates the lock table when a new user record is inserted. */
-
-void
-lock_update_insert(
-/*===============*/
- rec_t* rec); /* in: the inserted record */
-/*****************************************************************
-Updates the lock table when a record is removed. */
-
-void
-lock_update_delete(
-/*===============*/
- rec_t* rec); /* in: the record to be removed */
-/*************************************************************************
-Stores on the page infimum record the explicit locks of another record.
-This function is used to store the lock state of a record when it is
-updated and the size of the record changes in the update. The record
-is in such an update moved, perhaps to another page. The infimum record
-acts as a dummy carrier record, taking care of lock releases while the
-actual record is being moved. */
-
-void
-lock_rec_store_on_page_infimum(
-/*===========================*/
- page_t* page, /* in: page containing the record */
- rec_t* rec); /* in: record whose lock state is stored
- on the infimum record of the same page; lock
- bits are reset on the record */
-/*************************************************************************
-Restores the state of explicit lock requests on a single record, where the
-state was stored on the infimum of the page. */
-
-void
-lock_rec_restore_from_page_infimum(
-/*===============================*/
- rec_t* rec, /* in: record whose lock state is restored */
- page_t* page); /* in: page (rec is not necessarily on this page)
- whose infimum stored the lock state; lock bits are
- reset on the infimum */
-/*************************************************************************
-Returns TRUE if there are explicit record locks on a page. */
-
-ibool
-lock_rec_expl_exist_on_page(
-/*========================*/
- /* out: TRUE if there are explicit record locks on
- the page */
- ulint space, /* in: space id */
- ulint page_no);/* in: page number */
-/*************************************************************************
-Checks if locks of other transactions prevent an immediate insert of
-a record. If they do, first tests if the query thread should anyway
-be suspended for some reason; if not, then puts the transaction and
-the query thread to the lock wait state and inserts a waiting request
-for a gap x-lock to the lock queue. */
-
-ulint
-lock_rec_insert_check_and_lock(
-/*===========================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT,
- DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
- ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
- does nothing */
- rec_t* rec, /* in: record after which to insert */
- dict_index_t* index, /* in: index */
- que_thr_t* thr, /* in: query thread */
- ibool* inherit);/* out: set to TRUE if the new inserted
- record maybe should inherit LOCK_GAP type
- locks from the successor record */
-/*************************************************************************
-Checks if locks of other transactions prevent an immediate modify (update,
-delete mark, or delete unmark) of a clustered index record. If they do,
-first tests if the query thread should anyway be suspended for some
-reason; if not, then puts the transaction and the query thread to the
-lock wait state and inserts a waiting request for a record x-lock to the
-lock queue. */
-
-ulint
-lock_clust_rec_modify_check_and_lock(
-/*=================================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT,
- DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
- ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
- does nothing */
- rec_t* rec, /* in: record which should be modified */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- que_thr_t* thr); /* in: query thread */
-/*************************************************************************
-Checks if locks of other transactions prevent an immediate modify
-(delete mark or delete unmark) of a secondary index record. */
-
-ulint
-lock_sec_rec_modify_check_and_lock(
-/*===============================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT,
- DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
- ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
- does nothing */
- rec_t* rec, /* in: record which should be modified;
- NOTE: as this is a secondary index, we
- always have to modify the clustered index
- record first: see the comment below */
- dict_index_t* index, /* in: secondary index */
- que_thr_t* thr); /* in: query thread */
-/*************************************************************************
-Like the counterpart for a clustered index below, but now we read a
-secondary index record. */
-
-ulint
-lock_sec_rec_read_check_and_lock(
-/*=============================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT,
- DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
- ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
- does nothing */
- rec_t* rec, /* in: user record or page supremum record
- which should be read or passed over by a read
- cursor */
- dict_index_t* index, /* in: secondary index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- ulint mode, /* in: mode of the lock which the read cursor
- should set on records: LOCK_S or LOCK_X; the
- latter is possible in SELECT FOR UPDATE */
- ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or
- LOCK_REC_NOT_GAP */
- que_thr_t* thr); /* in: query thread */
-/*************************************************************************
-Checks if locks of other transactions prevent an immediate read, or passing
-over by a read cursor, of a clustered index record. If they do, first tests
-if the query thread should anyway be suspended for some reason; if not, then
-puts the transaction and the query thread to the lock wait state and inserts a
-waiting request for a record lock to the lock queue. Sets the requested mode
-lock on the record. */
-
-ulint
-lock_clust_rec_read_check_and_lock(
-/*===============================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT,
- DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
- ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
- does nothing */
- rec_t* rec, /* in: user record or page supremum record
- which should be read or passed over by a read
- cursor */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- ulint mode, /* in: mode of the lock which the read cursor
- should set on records: LOCK_S or LOCK_X; the
- latter is possible in SELECT FOR UPDATE */
- ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or
- LOCK_REC_NOT_GAP */
- que_thr_t* thr); /* in: query thread */
-/*************************************************************************
-Checks if locks of other transactions prevent an immediate read, or passing
-over by a read cursor, of a clustered index record. If they do, first tests
-if the query thread should anyway be suspended for some reason; if not, then
-puts the transaction and the query thread to the lock wait state and inserts a
-waiting request for a record lock to the lock queue. Sets the requested mode
-lock on the record. This is an alternative version of
-lock_clust_rec_read_check_and_lock() that does not require the parameter
-"offsets". */
-
-ulint
-lock_clust_rec_read_check_and_lock_alt(
-/*===================================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT,
- DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
- ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
- does nothing */
- rec_t* rec, /* in: user record or page supremum record
- which should be read or passed over by a read
- cursor */
- dict_index_t* index, /* in: clustered index */
- ulint mode, /* in: mode of the lock which the read cursor
- should set on records: LOCK_S or LOCK_X; the
- latter is possible in SELECT FOR UPDATE */
- ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or
- LOCK_REC_NOT_GAP */
- que_thr_t* thr); /* in: query thread */
-/*************************************************************************
-Checks that a record is seen in a consistent read. */
-
-ibool
-lock_clust_rec_cons_read_sees(
-/*==========================*/
- /* out: TRUE if sees, or FALSE if an earlier
- version of the record should be retrieved */
- rec_t* rec, /* in: user record which should be read or
- passed over by a read cursor */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- read_view_t* view); /* in: consistent read view */
-/*************************************************************************
-Checks that a non-clustered index record is seen in a consistent read. */
-
-ulint
-lock_sec_rec_cons_read_sees(
-/*========================*/
- /* out: TRUE if certainly sees, or FALSE if an
- earlier version of the clustered index record
- might be needed: NOTE that a non-clustered
- index page contains so little information on
- its modifications that also in the case FALSE,
- the present version of rec may be the right,
- but we must check this from the clustered
- index record */
- rec_t* rec, /* in: user record which should be read or
- passed over by a read cursor */
- dict_index_t* index, /* in: non-clustered index */
- read_view_t* view); /* in: consistent read view */
-/*************************************************************************
-Locks the specified database table in the mode given. If the lock cannot
-be granted immediately, the query thread is put to wait. */
-
-ulint
-lock_table(
-/*=======*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT,
- DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
- ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
- does nothing */
- dict_table_t* table, /* in: database table in dictionary cache */
- ulint mode, /* in: lock mode */
- que_thr_t* thr); /* in: query thread */
-/*************************************************************************
-Checks if there are any locks set on the table. */
-
-ibool
-lock_is_on_table(
-/*=============*/
- /* out: TRUE if there are lock(s) */
- dict_table_t* table); /* in: database table in dictionary cache */
-/*****************************************************************
-Removes a granted record lock of a transaction from the queue and grants
-locks to other transactions waiting in the queue if they now are entitled
-to a lock. */
-
-void
-lock_rec_unlock(
-/*============*/
- trx_t* trx, /* in: transaction that has set a record
- lock */
- rec_t* rec, /* in: record */
- ulint lock_mode); /* in: LOCK_S or LOCK_X */
-/*************************************************************************
-Releases a table lock.
-Releases possible other transactions waiting for this lock. */
-
-void
-lock_table_unlock(
-/*==============*/
- lock_t* lock); /* in: lock */
-/*************************************************************************
-Releases an auto-inc lock a transaction possibly has on a table.
-Releases possible other transactions waiting for this lock. */
-
-void
-lock_table_unlock_auto_inc(
-/*=======================*/
- trx_t* trx); /* in: transaction */
-/*************************************************************************
-Releases transaction locks, and releases possible other transactions waiting
-because of these locks. */
-
-void
-lock_release_off_kernel(
-/*====================*/
- trx_t* trx); /* in: transaction */
-/*************************************************************************
-Cancels a waiting lock request and releases possible other transactions
-waiting behind it. */
-
-void
-lock_cancel_waiting_and_release(
-/*============================*/
- lock_t* lock); /* in: waiting lock request */
-
-/*************************************************************************
-Removes locks on a table to be dropped or truncated.
-If remove_also_table_sx_locks is TRUE then table-level S and X locks are
-also removed in addition to other table-level and record-level locks.
-No lock, that is going to be removed, is allowed to be a wait lock. */
-
-void
-lock_remove_all_on_table(
-/*=====================*/
- dict_table_t* table, /* in: table to be dropped
- or truncated */
- ibool remove_also_table_sx_locks);/* in: also removes
- table S and X locks */
-
-/*************************************************************************
-Calculates the fold value of a page file address: used in inserting or
-searching for a lock in the hash table. */
-UNIV_INLINE
-ulint
-lock_rec_fold(
-/*==========*/
- /* out: folded value */
- ulint space, /* in: space */
- ulint page_no);/* in: page number */
-/*************************************************************************
-Calculates the hash value of a page file address: used in inserting or
-searching for a lock in the hash table. */
-UNIV_INLINE
-ulint
-lock_rec_hash(
-/*==========*/
- /* out: hashed value */
- ulint space, /* in: space */
- ulint page_no);/* in: page number */
-/*************************************************************************
-Gets the source table of an ALTER TABLE transaction. The table must be
-covered by an IX or IS table lock. */
-
-dict_table_t*
-lock_get_src_table(
-/*===============*/
- /* out: the source table of transaction,
- if it is covered by an IX or IS table lock;
- dest if there is no source table, and
- NULL if the transaction is locking more than
- two tables or an inconsistency is found */
- trx_t* trx, /* in: transaction */
- dict_table_t* dest, /* in: destination of ALTER TABLE */
- ulint* mode); /* out: lock mode of the source table */
-/*************************************************************************
-Determine if the given table is exclusively "owned" by the given
-transaction, i.e., transaction holds LOCK_IX and possibly LOCK_AUTO_INC
-on the table. */
-
-ibool
-lock_is_table_exclusive(
-/*====================*/
- /* out: TRUE if table is only locked by trx,
- with LOCK_IX, and possibly LOCK_AUTO_INC */
- dict_table_t* table, /* in: table */
- trx_t* trx); /* in: transaction */
-/*************************************************************************
-Checks if a lock request lock1 has to wait for request lock2. */
-
-ibool
-lock_has_to_wait(
-/*=============*/
- /* out: TRUE if lock1 has to wait for lock2 to be
- removed */
- lock_t* lock1, /* in: waiting lock */
- lock_t* lock2); /* in: another lock; NOTE that it is assumed that this
- has a lock bit set on the same record as in lock1 if
- the locks are record locks */
-/*************************************************************************
-Checks that a transaction id is sensible, i.e., not in the future. */
-
-ibool
-lock_check_trx_id_sanity(
-/*=====================*/
- /* out: TRUE if ok */
- dulint trx_id, /* in: trx id */
- rec_t* rec, /* in: user record */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets, /* in: rec_get_offsets(rec, index) */
- ibool has_kernel_mutex);/* in: TRUE if the caller owns the
- kernel mutex */
-/*************************************************************************
-Validates the lock queue on a single record. */
-
-ibool
-lock_rec_queue_validate(
-/*====================*/
- /* out: TRUE if ok */
- rec_t* rec, /* in: record to look at */
- dict_index_t* index, /* in: index, or NULL if not known */
- const ulint* offsets);/* in: rec_get_offsets(rec, index) */
-/*************************************************************************
-Prints info of a table lock. */
-
-void
-lock_table_print(
-/*=============*/
- FILE* file, /* in: file where to print */
- lock_t* lock); /* in: table type lock */
-/*************************************************************************
-Prints info of a record lock. */
-
-void
-lock_rec_print(
-/*===========*/
- FILE* file, /* in: file where to print */
- lock_t* lock); /* in: record type lock */
-/*************************************************************************
-Prints info of locks for all transactions. */
-
-void
-lock_print_info_summary(
-/*====================*/
- FILE* file); /* in: file where to print */
-/*************************************************************************
-Prints info of locks for each transaction. */
-
-void
-lock_print_info_all_transactions(
-/*=============================*/
- FILE* file); /* in: file where to print */
-/*************************************************************************
-Validates the lock queue on a table. */
-
-ibool
-lock_table_queue_validate(
-/*======================*/
- /* out: TRUE if ok */
- dict_table_t* table); /* in: table */
-/*************************************************************************
-Validates the record lock queues on a page. */
-
-ibool
-lock_rec_validate_page(
-/*===================*/
- /* out: TRUE if ok */
- ulint space, /* in: space id */
- ulint page_no);/* in: page number */
-/*************************************************************************
-Validates the lock system. */
-
-ibool
-lock_validate(void);
-/*===============*/
- /* out: TRUE if ok */
-/*************************************************************************
-Return approximate number or record locks (bits set in the bitmap) for
-this transaction. Since delete-marked records may be removed, the
-record count will not be precise. */
-
-ulint
-lock_number_of_rows_locked(
-/*=======================*/
- trx_t* trx); /* in: transaction */
-
-/* The lock system */
-extern lock_sys_t* lock_sys;
-
-/* Lock modes and types */
-/* Basic modes */
-#define LOCK_NONE 0 /* this flag is used elsewhere to note
- consistent read */
-#define LOCK_IS 2 /* intention shared */
-#define LOCK_IX 3 /* intention exclusive */
-#define LOCK_S 4 /* shared */
-#define LOCK_X 5 /* exclusive */
-#define LOCK_AUTO_INC 6 /* locks the auto-inc counter of a table
- in an exclusive mode */
-#define LOCK_MODE_MASK 0xFUL /* mask used to extract mode from the
- type_mode field in a lock */
-/* Lock types */
-#define LOCK_TABLE 16 /* these type values should be so high that */
-#define LOCK_REC 32 /* they can be ORed to the lock mode */
-#define LOCK_TYPE_MASK 0xF0UL /* mask used to extract lock type from the
- type_mode field in a lock */
-/* Waiting lock flag */
-#define LOCK_WAIT 256 /* this wait bit should be so high that
- it can be ORed to the lock mode and type;
- when this bit is set, it means that the
- lock has not yet been granted, it is just
- waiting for its turn in the wait queue */
-/* Precise modes */
-#define LOCK_ORDINARY 0 /* this flag denotes an ordinary next-key lock
- in contrast to LOCK_GAP or LOCK_REC_NOT_GAP */
-#define LOCK_GAP 512 /* this gap bit should be so high that
- it can be ORed to the other flags;
- when this bit is set, it means that the
- lock holds only on the gap before the record;
- for instance, an x-lock on the gap does not
- give permission to modify the record on which
- the bit is set; locks of this type are created
- when records are removed from the index chain
- of records */
-#define LOCK_REC_NOT_GAP 1024 /* this bit means that the lock is only on
- the index record and does NOT block inserts
- to the gap before the index record; this is
- used in the case when we retrieve a record
- with a unique key, and is also used in
- locking plain SELECTs (not part of UPDATE
- or DELETE) when the user has set the READ
- COMMITTED isolation level */
-#define LOCK_INSERT_INTENTION 2048 /* this bit is set when we place a waiting
- gap type record lock request in order to let
- an insert of an index record to wait until
- there are no conflicting locks by other
- transactions on the gap; note that this flag
- remains set when the waiting lock is granted,
- or if the lock is inherited to a neighboring
- record */
-
-/* When lock bits are reset, the following flags are available: */
-#define LOCK_RELEASE_WAIT 1
-#define LOCK_NOT_RELEASE_WAIT 2
-
-/* Lock operation struct */
-typedef struct lock_op_struct lock_op_t;
-struct lock_op_struct{
- dict_table_t* table; /* table to be locked */
- ulint mode; /* lock mode */
-};
-
-#define LOCK_OP_START 1
-#define LOCK_OP_COMPLETE 2
-
-/* The lock system struct */
-struct lock_sys_struct{
- hash_table_t* rec_hash; /* hash table of the record locks */
-};
-
-/* The lock system */
-extern lock_sys_t* lock_sys;
-
-
-#ifndef UNIV_NONINL
-#include "lock0lock.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/lock0lock.ic b/storage/innobase/include/lock0lock.ic
deleted file mode 100644
index 311623b190b..00000000000
--- a/storage/innobase/include/lock0lock.ic
+++ /dev/null
@@ -1,81 +0,0 @@
-/******************************************************
-The transaction lock system
-
-(c) 1996 Innobase Oy
-
-Created 5/7/1996 Heikki Tuuri
-*******************************************************/
-
-#include "sync0sync.h"
-#include "srv0srv.h"
-#include "dict0dict.h"
-#include "row0row.h"
-#include "trx0sys.h"
-#include "trx0trx.h"
-#include "buf0buf.h"
-#include "page0page.h"
-#include "page0cur.h"
-#include "row0vers.h"
-#include "que0que.h"
-#include "btr0cur.h"
-#include "read0read.h"
-#include "log0recv.h"
-
-/*************************************************************************
-Calculates the fold value of a page file address: used in inserting or
-searching for a lock in the hash table. */
-UNIV_INLINE
-ulint
-lock_rec_fold(
-/*==========*/
- /* out: folded value */
- ulint space, /* in: space */
- ulint page_no)/* in: page number */
-{
- return(ut_fold_ulint_pair(space, page_no));
-}
-
-/*************************************************************************
-Calculates the hash value of a page file address: used in inserting or
-searching for a lock in the hash table. */
-UNIV_INLINE
-ulint
-lock_rec_hash(
-/*==========*/
- /* out: hashed value */
- ulint space, /* in: space */
- ulint page_no)/* in: page number */
-{
- return(hash_calc_hash(lock_rec_fold(space, page_no),
- lock_sys->rec_hash));
-}
-
-/*************************************************************************
-Checks if some transaction has an implicit x-lock on a record in a clustered
-index. */
-UNIV_INLINE
-trx_t*
-lock_clust_rec_some_has_impl(
-/*=========================*/
- /* out: transaction which has the x-lock, or
- NULL */
- rec_t* rec, /* in: user record */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets)/* in: rec_get_offsets(rec, index) */
-{
- dulint trx_id;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(index->type & DICT_CLUSTERED);
- ut_ad(page_rec_is_user_rec(rec));
-
- trx_id = row_get_rec_trx_id(rec, index, offsets);
-
- if (trx_is_active(trx_id)) {
- /* The modifying or inserting transaction is active */
-
- return(trx_get_on_id(trx_id));
- }
-
- return(NULL);
-}
diff --git a/storage/innobase/include/lock0priv.h b/storage/innobase/include/lock0priv.h
deleted file mode 100644
index 7703a2b7def..00000000000
--- a/storage/innobase/include/lock0priv.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/******************************************************
-Lock module internal structures and methods.
-
-(c) 2007 Innobase Oy
-
-Created July 12, 2007 Vasil Dimov
-*******************************************************/
-
-#ifndef lock0priv_h
-#define lock0priv_h
-
-#ifndef LOCK_MODULE_IMPLEMENTATION
-/* If you need to access members of the structures defined in this
-file, please write appropriate functions that retrieve them and put
-those functions in lock/ */
-#error Do not include lock0priv.h outside of the lock/ module
-#endif
-
-#include "univ.i"
-#include "dict0types.h"
-#include "hash0hash.h"
-#include "trx0types.h"
-#include "ut0lst.h"
-
-/* A table lock */
-typedef struct lock_table_struct lock_table_t;
-struct lock_table_struct {
- dict_table_t* table; /* database table in dictionary
- cache */
- UT_LIST_NODE_T(lock_t)
- locks; /* list of locks on the same
- table */
-};
-
-/* Record lock for a page */
-typedef struct lock_rec_struct lock_rec_t;
-struct lock_rec_struct {
- ulint space; /* space id */
- ulint page_no; /* page number */
- ulint n_bits; /* number of bits in the lock
- bitmap; NOTE: the lock bitmap is
- placed immediately after the
- lock struct */
-};
-
-/* Lock struct */
-struct lock_struct {
- trx_t* trx; /* transaction owning the
- lock */
- UT_LIST_NODE_T(lock_t)
- trx_locks; /* list of the locks of the
- transaction */
- ulint type_mode; /* lock type, mode, LOCK_GAP or
- LOCK_REC_NOT_GAP,
- LOCK_INSERT_INTENTION,
- wait flag, ORed */
- hash_node_t hash; /* hash chain node for a record
- lock */
- dict_index_t* index; /* index for a record lock */
- union {
- lock_table_t tab_lock;/* table lock */
- lock_rec_t rec_lock;/* record lock */
- } un_member;
-};
-
-/*************************************************************************
-Gets the type of a lock. */
-UNIV_INLINE
-ulint
-lock_get_type(
-/*==========*/
- /* out: LOCK_TABLE or LOCK_REC */
- const lock_t* lock); /* in: lock */
-
-/**************************************************************************
-Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED,
-if none found. */
-
-ulint
-lock_rec_find_set_bit(
-/*==================*/
- /* out: bit index == heap number of the record, or
- ULINT_UNDEFINED if none found */
- lock_t* lock); /* in: record lock with at least one bit set */
-
-/*************************************************************************
-Gets the previous record lock set on a record. */
-
-lock_t*
-lock_rec_get_prev(
-/*==============*/
- /* out: previous lock on the same record, NULL if
- none exists */
- lock_t* in_lock,/* in: record lock */
- ulint heap_no);/* in: heap number of the record */
-
-#ifndef UNIV_NONINL
-#include "lock0priv.ic"
-#endif
-
-#endif /* lock0priv_h */
diff --git a/storage/innobase/include/lock0priv.ic b/storage/innobase/include/lock0priv.ic
deleted file mode 100644
index 4bc8397509d..00000000000
--- a/storage/innobase/include/lock0priv.ic
+++ /dev/null
@@ -1,32 +0,0 @@
-/******************************************************
-Lock module internal inline methods.
-
-(c) 2007 Innobase Oy
-
-Created July 16, 2007 Vasil Dimov
-*******************************************************/
-
-/* This file contains only methods which are used in
-lock/lock0* files, other than lock/lock0lock.c.
-I.e. lock/lock0lock.c contains more internal inline
-methods but they are used only in that file. */
-
-#ifndef LOCK_MODULE_IMPLEMENTATION
-#error Do not include lock0priv.ic outside of the lock/ module
-#endif
-
-/*************************************************************************
-Gets the type of a lock. */
-UNIV_INLINE
-ulint
-lock_get_type(
-/*==========*/
- /* out: LOCK_TABLE or LOCK_REC */
- const lock_t* lock) /* in: lock */
-{
- ut_ad(lock);
-
- return(lock->type_mode & LOCK_TYPE_MASK);
-}
-
-/* vim: set filetype=c: */
diff --git a/storage/innobase/include/lock0types.h b/storage/innobase/include/lock0types.h
deleted file mode 100644
index 43fd2d60da5..00000000000
--- a/storage/innobase/include/lock0types.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/******************************************************
-The transaction lock system global types
-
-(c) 1996 Innobase Oy
-
-Created 5/7/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef lock0types_h
-#define lock0types_h
-
-#define lock_t ib_lock_t
-typedef struct lock_struct lock_t;
-typedef struct lock_sys_struct lock_sys_t;
-
-#endif
diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h
deleted file mode 100644
index 337b9f1e783..00000000000
--- a/storage/innobase/include/log0log.h
+++ /dev/null
@@ -1,872 +0,0 @@
-/******************************************************
-Database log
-
-(c) 1995 Innobase Oy
-
-Created 12/9/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef log0log_h
-#define log0log_h
-
-#include "univ.i"
-#include "ut0byte.h"
-#include "sync0sync.h"
-#include "sync0rw.h"
-
-typedef struct log_struct log_t;
-typedef struct log_group_struct log_group_t;
-
-#ifdef UNIV_DEBUG
-extern ibool log_do_write;
-extern ibool log_debug_writes;
-#else /* UNIV_DEBUG */
-# define log_do_write TRUE
-#endif /* UNIV_DEBUG */
-
-/* Wait modes for log_write_up_to */
-#define LOG_NO_WAIT 91
-#define LOG_WAIT_ONE_GROUP 92
-#define LOG_WAIT_ALL_GROUPS 93
-#define LOG_MAX_N_GROUPS 32
-
-/********************************************************************
-Sets the global variable log_fsp_current_free_limit. Also makes a checkpoint,
-so that we know that the limit has been written to a log checkpoint field
-on disk. */
-
-void
-log_fsp_current_free_limit_set_and_checkpoint(
-/*==========================================*/
- ulint limit); /* in: limit to set */
-/***********************************************************************
-Calculates where in log files we find a specified lsn. */
-
-ulint
-log_calc_where_lsn_is(
-/*==================*/
- /* out: log file number */
- ib_longlong* log_file_offset, /* out: offset in that file
- (including the header) */
- dulint first_header_lsn, /* in: first log file start
- lsn */
- dulint lsn, /* in: lsn whose position to
- determine */
- ulint n_log_files, /* in: total number of log
- files */
- ib_longlong log_file_size); /* in: log file size
- (including the header) */
-/****************************************************************
-Writes to the log the string given. The log must be released with
-log_release. */
-UNIV_INLINE
-dulint
-log_reserve_and_write_fast(
-/*=======================*/
- /* out: end lsn of the log record, ut_dulint_zero if
- did not succeed */
- byte* str, /* in: string */
- ulint len, /* in: string length */
- dulint* start_lsn,/* out: start lsn of the log record */
- ibool* success);/* out: TRUE if success */
-/***************************************************************************
-Releases the log mutex. */
-UNIV_INLINE
-void
-log_release(void);
-/*=============*/
-/***************************************************************************
-Checks if there is need for a log buffer flush or a new checkpoint, and does
-this if yes. Any database operation should call this when it has modified
-more than about 4 pages. NOTE that this function may only be called when the
-OS thread owns no synchronization objects except the dictionary mutex. */
-UNIV_INLINE
-void
-log_free_check(void);
-/*================*/
-/****************************************************************
-Opens the log for log_write_low. The log must be closed with log_close and
-released with log_release. */
-
-dulint
-log_reserve_and_open(
-/*=================*/
- /* out: start lsn of the log record */
- ulint len); /* in: length of data to be catenated */
-/****************************************************************
-Writes to the log the string given. It is assumed that the caller holds the
-log mutex. */
-
-void
-log_write_low(
-/*==========*/
- byte* str, /* in: string */
- ulint str_len); /* in: string length */
-/****************************************************************
-Closes the log. */
-
-dulint
-log_close(void);
-/*===========*/
- /* out: lsn */
-/****************************************************************
-Gets the current lsn. */
-UNIV_INLINE
-dulint
-log_get_lsn(void);
-/*=============*/
- /* out: current lsn */
-/**********************************************************
-Initializes the log. */
-
-void
-log_init(void);
-/*==========*/
-/**********************************************************************
-Inits a log group to the log system. */
-
-void
-log_group_init(
-/*===========*/
- ulint id, /* in: group id */
- ulint n_files, /* in: number of log files */
- ulint file_size, /* in: log file size in bytes */
- ulint space_id, /* in: space id of the file space
- which contains the log files of this
- group */
- ulint archive_space_id); /* in: space id of the file space
- which contains some archived log
- files for this group; currently, only
- for the first log group this is
- used */
-/**********************************************************
-Completes an i/o to a log file. */
-
-void
-log_io_complete(
-/*============*/
- log_group_t* group); /* in: log group */
-/**********************************************************
-This function is called, e.g., when a transaction wants to commit. It checks
-that the log has been written to the log file up to the last log entry written
-by the transaction. If there is a flush running, it waits and checks if the
-flush flushed enough. If not, starts a new flush. */
-
-void
-log_write_up_to(
-/*============*/
- dulint lsn, /* in: log sequence number up to which the log should
- be written, ut_dulint_max if not specified */
- ulint wait, /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
- or LOG_WAIT_ALL_GROUPS */
- ibool flush_to_disk);
- /* in: TRUE if we want the written log also to be
- flushed to disk */
-/********************************************************************
-Does a syncronous flush of the log buffer to disk. */
-
-void
-log_buffer_flush_to_disk(void);
-/*==========================*/
-/********************************************************************
-Flushes the log buffer. Forces it to disk depending on the value of
-the configuration parameter innodb_flush_log_at_trx_commit. */
-
-void
-log_buffer_flush_maybe_sync(void);
-/*==========================*/
-/********************************************************************
-Advances the smallest lsn for which there are unflushed dirty blocks in the
-buffer pool and also may make a new checkpoint. NOTE: this function may only
-be called if the calling thread owns no synchronization objects! */
-
-ibool
-log_preflush_pool_modified_pages(
-/*=============================*/
- /* out: FALSE if there was a flush batch of
- the same type running, which means that we
- could not start this flush batch */
- dulint new_oldest, /* in: try to advance oldest_modified_lsn
- at least to this lsn */
- ibool sync); /* in: TRUE if synchronous operation is
- desired */
-/**********************************************************
-Makes a checkpoint. Note that this function does not flush dirty
-blocks from the buffer pool: it only checks what is lsn of the oldest
-modification in the pool, and writes information about the lsn in
-log files. Use log_make_checkpoint_at to flush also the pool. */
-
-ibool
-log_checkpoint(
-/*===========*/
- /* out: TRUE if success, FALSE if a checkpoint
- write was already running */
- ibool sync, /* in: TRUE if synchronous operation is
- desired */
- ibool write_always); /* in: the function normally checks if the
- the new checkpoint would have a greater
- lsn than the previous one: if not, then no
- physical write is done; by setting this
- parameter TRUE, a physical write will always be
- made to log files */
-/********************************************************************
-Makes a checkpoint at a given lsn or later. */
-
-void
-log_make_checkpoint_at(
-/*===================*/
- dulint lsn, /* in: make a checkpoint at this or a later
- lsn, if ut_dulint_max, makes a checkpoint at
- the latest lsn */
- ibool write_always); /* in: the function normally checks if the
- the new checkpoint would have a greater
- lsn than the previous one: if not, then no
- physical write is done; by setting this
- parameter TRUE, a physical write will always be
- made to log files */
-/********************************************************************
-Makes a checkpoint at the latest lsn and writes it to first page of each
-data file in the database, so that we know that the file spaces contain
-all modifications up to that lsn. This can only be called at database
-shutdown. This function also writes all log in log files to the log archive. */
-
-void
-logs_empty_and_mark_files_at_shutdown(void);
-/*=======================================*/
-/**********************************************************
-Reads a checkpoint info from a log group header to log_sys->checkpoint_buf. */
-
-void
-log_group_read_checkpoint_info(
-/*===========================*/
- log_group_t* group, /* in: log group */
- ulint field); /* in: LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 */
-/***********************************************************************
-Gets info from a checkpoint about a log group. */
-
-void
-log_checkpoint_get_nth_group_info(
-/*==============================*/
- byte* buf, /* in: buffer containing checkpoint info */
- ulint n, /* in: nth slot */
- ulint* file_no,/* out: archived file number */
- ulint* offset);/* out: archived file offset */
-/**********************************************************
-Writes checkpoint info to groups. */
-
-void
-log_groups_write_checkpoint_info(void);
-/*==================================*/
-/**********************************************************
-Writes info to a buffer of a log group when log files are created in
-backup restoration. */
-
-void
-log_reset_first_header_and_checkpoint(
-/*==================================*/
- byte* hdr_buf,/* in: buffer which will be written to the start
- of the first log file */
- dulint start); /* in: lsn of the start of the first log file;
- we pretend that there is a checkpoint at
- start + LOG_BLOCK_HDR_SIZE */
-/************************************************************************
-Starts an archiving operation. */
-
-ibool
-log_archive_do(
-/*===========*/
- /* out: TRUE if succeed, FALSE if an archiving
- operation was already running */
- ibool sync, /* in: TRUE if synchronous operation is desired */
- ulint* n_bytes);/* out: archive log buffer size, 0 if nothing to
- archive */
-/********************************************************************
-Writes the log contents to the archive up to the lsn when this function was
-called, and stops the archiving. When archiving is started again, the archived
-log file numbers start from a number one higher, so that the archiving will
-not write again to the archived log files which exist when this function
-returns. */
-
-ulint
-log_archive_stop(void);
-/*==================*/
- /* out: DB_SUCCESS or DB_ERROR */
-/********************************************************************
-Starts again archiving which has been stopped. */
-
-ulint
-log_archive_start(void);
-/*===================*/
- /* out: DB_SUCCESS or DB_ERROR */
-/********************************************************************
-Stop archiving the log so that a gap may occur in the archived log files. */
-
-ulint
-log_archive_noarchivelog(void);
-/*==========================*/
- /* out: DB_SUCCESS or DB_ERROR */
-/********************************************************************
-Start archiving the log so that a gap may occur in the archived log files. */
-
-ulint
-log_archive_archivelog(void);
-/*========================*/
- /* out: DB_SUCCESS or DB_ERROR */
-/**********************************************************
-Generates an archived log file name. */
-
-void
-log_archived_file_name_gen(
-/*=======================*/
- char* buf, /* in: buffer where to write */
- ulint id, /* in: group id */
- ulint file_no);/* in: file number */
-/************************************************************************
-Checks that there is enough free space in the log to start a new query step.
-Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this
-function may only be called if the calling thread owns no synchronization
-objects! */
-
-void
-log_check_margins(void);
-/*===================*/
-/**********************************************************
-Reads a specified log segment to a buffer. */
-
-void
-log_group_read_log_seg(
-/*===================*/
- ulint type, /* in: LOG_ARCHIVE or LOG_RECOVER */
- byte* buf, /* in: buffer where to read */
- log_group_t* group, /* in: log group */
- dulint start_lsn, /* in: read area start */
- dulint end_lsn); /* in: read area end */
-/**********************************************************
-Writes a buffer to a log file group. */
-
-void
-log_group_write_buf(
-/*================*/
- log_group_t* group, /* in: log group */
- byte* buf, /* in: buffer */
- ulint len, /* in: buffer len; must be divisible
- by OS_FILE_LOG_BLOCK_SIZE */
- dulint start_lsn, /* in: start lsn of the buffer; must
- be divisible by
- OS_FILE_LOG_BLOCK_SIZE */
- ulint new_data_offset);/* in: start offset of new data in
- buf: this parameter is used to decide
- if we have to write a new log file
- header */
-/************************************************************
-Sets the field values in group to correspond to a given lsn. For this function
-to work, the values must already be correctly initialized to correspond to
-some lsn, for instance, a checkpoint lsn. */
-
-void
-log_group_set_fields(
-/*=================*/
- log_group_t* group, /* in: group */
- dulint lsn); /* in: lsn for which the values should be
- set */
-/**********************************************************
-Calculates the data capacity of a log group, when the log file headers are not
-included. */
-
-ulint
-log_group_get_capacity(
-/*===================*/
- /* out: capacity in bytes */
- log_group_t* group); /* in: log group */
-/****************************************************************
-Gets a log block flush bit. */
-UNIV_INLINE
-ibool
-log_block_get_flush_bit(
-/*====================*/
- /* out: TRUE if this block was the first
- to be written in a log flush */
- byte* log_block); /* in: log block */
-/****************************************************************
-Gets a log block number stored in the header. */
-UNIV_INLINE
-ulint
-log_block_get_hdr_no(
-/*=================*/
- /* out: log block number stored in the block
- header */
- byte* log_block); /* in: log block */
-/****************************************************************
-Gets a log block data length. */
-UNIV_INLINE
-ulint
-log_block_get_data_len(
-/*===================*/
- /* out: log block data length measured as a
- byte offset from the block start */
- byte* log_block); /* in: log block */
-/****************************************************************
-Sets the log block data length. */
-UNIV_INLINE
-void
-log_block_set_data_len(
-/*===================*/
- byte* log_block, /* in: log block */
- ulint len); /* in: data length */
-/****************************************************************
-Calculates the checksum for a log block. */
-UNIV_INLINE
-ulint
-log_block_calc_checksum(
-/*====================*/
- /* out: checksum */
- byte* block); /* in: log block */
-/****************************************************************
-Gets a log block checksum field value. */
-UNIV_INLINE
-ulint
-log_block_get_checksum(
-/*===================*/
- /* out: checksum */
- byte* log_block); /* in: log block */
-/****************************************************************
-Sets a log block checksum field value. */
-UNIV_INLINE
-void
-log_block_set_checksum(
-/*===================*/
- byte* log_block, /* in: log block */
- ulint checksum); /* in: checksum */
-/****************************************************************
-Gets a log block first mtr log record group offset. */
-UNIV_INLINE
-ulint
-log_block_get_first_rec_group(
-/*==========================*/
- /* out: first mtr log record group byte offset
- from the block start, 0 if none */
- byte* log_block); /* in: log block */
-/****************************************************************
-Sets the log block first mtr log record group offset. */
-UNIV_INLINE
-void
-log_block_set_first_rec_group(
-/*==========================*/
- byte* log_block, /* in: log block */
- ulint offset); /* in: offset, 0 if none */
-/****************************************************************
-Gets a log block checkpoint number field (4 lowest bytes). */
-UNIV_INLINE
-ulint
-log_block_get_checkpoint_no(
-/*========================*/
- /* out: checkpoint no (4 lowest bytes) */
- byte* log_block); /* in: log block */
-/****************************************************************
-Initializes a log block in the log buffer. */
-UNIV_INLINE
-void
-log_block_init(
-/*===========*/
- byte* log_block, /* in: pointer to the log buffer */
- dulint lsn); /* in: lsn within the log block */
-/****************************************************************
-Initializes a log block in the log buffer in the old, < 3.23.52 format, where
-there was no checksum yet. */
-UNIV_INLINE
-void
-log_block_init_in_old_format(
-/*=========================*/
- byte* log_block, /* in: pointer to the log buffer */
- dulint lsn); /* in: lsn within the log block */
-/****************************************************************
-Converts a lsn to a log block number. */
-UNIV_INLINE
-ulint
-log_block_convert_lsn_to_no(
-/*========================*/
- /* out: log block number, it is > 0 and <= 1G */
- dulint lsn); /* in: lsn of a byte within the block */
-/**********************************************************
-Prints info of the log. */
-
-void
-log_print(
-/*======*/
- FILE* file); /* in: file where to print */
-/**********************************************************
-Peeks the current lsn. */
-
-ibool
-log_peek_lsn(
-/*=========*/
- /* out: TRUE if success, FALSE if could not get the
- log system mutex */
- dulint* lsn); /* out: if returns TRUE, current lsn is here */
-/**************************************************************************
-Refreshes the statistics used to print per-second averages. */
-
-void
-log_refresh_stats(void);
-/*===================*/
-
-extern log_t* log_sys;
-
-/* Values used as flags */
-#define LOG_FLUSH 7652559
-#define LOG_CHECKPOINT 78656949
-#define LOG_ARCHIVE 11122331
-#define LOG_RECOVER 98887331
-
-/* The counting of lsn's starts from this value: this must be non-zero */
-#define LOG_START_LSN ut_dulint_create(0, 16 * OS_FILE_LOG_BLOCK_SIZE)
-
-#define LOG_BUFFER_SIZE (srv_log_buffer_size * UNIV_PAGE_SIZE)
-#define LOG_ARCHIVE_BUF_SIZE (srv_log_buffer_size * UNIV_PAGE_SIZE / 4)
-
-/* Offsets of a log block header */
-#define LOG_BLOCK_HDR_NO 0 /* block number which must be > 0 and
- is allowed to wrap around at 2G; the
- highest bit is set to 1 if this is the
- first log block in a log flush write
- segment */
-#define LOG_BLOCK_FLUSH_BIT_MASK 0x80000000UL
- /* mask used to get the highest bit in
- the preceding field */
-#define LOG_BLOCK_HDR_DATA_LEN 4 /* number of bytes of log written to
- this block */
-#define LOG_BLOCK_FIRST_REC_GROUP 6 /* offset of the first start of an
- mtr log record group in this log block,
- 0 if none; if the value is the same
- as LOG_BLOCK_HDR_DATA_LEN, it means
- that the first rec group has not yet
- been catenated to this log block, but
- if it will, it will start at this
- offset; an archive recovery can
- start parsing the log records starting
- from this offset in this log block,
- if value not 0 */
-#define LOG_BLOCK_CHECKPOINT_NO 8 /* 4 lower bytes of the value of
- log_sys->next_checkpoint_no when the
- log block was last written to: if the
- block has not yet been written full,
- this value is only updated before a
- log buffer flush */
-#define LOG_BLOCK_HDR_SIZE 12 /* size of the log block header in
- bytes */
-
-/* Offsets of a log block trailer from the end of the block */
-#define LOG_BLOCK_CHECKSUM 4 /* 4 byte checksum of the log block
- contents; in InnoDB versions
- < 3.23.52 this did not contain the
- checksum but the same value as
- .._HDR_NO */
-#define LOG_BLOCK_TRL_SIZE 4 /* trailer size in bytes */
-
-/* Offsets for a checkpoint field */
-#define LOG_CHECKPOINT_NO 0
-#define LOG_CHECKPOINT_LSN 8
-#define LOG_CHECKPOINT_OFFSET 16
-#define LOG_CHECKPOINT_LOG_BUF_SIZE 20
-#define LOG_CHECKPOINT_ARCHIVED_LSN 24
-#define LOG_CHECKPOINT_GROUP_ARRAY 32
-
-/* For each value < LOG_MAX_N_GROUPS the following 8 bytes: */
-
-#define LOG_CHECKPOINT_ARCHIVED_FILE_NO 0
-#define LOG_CHECKPOINT_ARCHIVED_OFFSET 4
-
-#define LOG_CHECKPOINT_ARRAY_END (LOG_CHECKPOINT_GROUP_ARRAY\
- + LOG_MAX_N_GROUPS * 8)
-#define LOG_CHECKPOINT_CHECKSUM_1 LOG_CHECKPOINT_ARRAY_END
-#define LOG_CHECKPOINT_CHECKSUM_2 (4 + LOG_CHECKPOINT_ARRAY_END)
-#define LOG_CHECKPOINT_FSP_FREE_LIMIT (8 + LOG_CHECKPOINT_ARRAY_END)
- /* current fsp free limit in
- tablespace 0, in units of one
- megabyte; this information is only used
- by ibbackup to decide if it can
- truncate unused ends of
- non-auto-extending data files in space
- 0 */
-#define LOG_CHECKPOINT_FSP_MAGIC_N (12 + LOG_CHECKPOINT_ARRAY_END)
- /* this magic number tells if the
- checkpoint contains the above field:
- the field was added to
- InnoDB-3.23.50 */
-#define LOG_CHECKPOINT_SIZE (16 + LOG_CHECKPOINT_ARRAY_END)
-
-#define LOG_CHECKPOINT_FSP_MAGIC_N_VAL 1441231243
-
-/* Offsets of a log file header */
-#define LOG_GROUP_ID 0 /* log group number */
-#define LOG_FILE_START_LSN 4 /* lsn of the start of data in this
- log file */
-#define LOG_FILE_NO 12 /* 4-byte archived log file number;
- this field is only defined in an
- archived log file */
-#define LOG_FILE_WAS_CREATED_BY_HOT_BACKUP 16
- /* a 32-byte field which contains
- the string 'ibbackup' and the
- creation time if the log file was
- created by ibbackup --restore;
- when mysqld is first time started
- on the restored database, it can
- print helpful info for the user */
-#define LOG_FILE_ARCH_COMPLETED OS_FILE_LOG_BLOCK_SIZE
- /* this 4-byte field is TRUE when
- the writing of an archived log file
- has been completed; this field is
- only defined in an archived log file */
-#define LOG_FILE_END_LSN (OS_FILE_LOG_BLOCK_SIZE + 4)
- /* lsn where the archived log file
- at least extends: actually the
- archived log file may extend to a
- later lsn, as long as it is within the
- same log block as this lsn; this field
- is defined only when an archived log
- file has been completely written */
-#define LOG_CHECKPOINT_1 OS_FILE_LOG_BLOCK_SIZE
- /* first checkpoint field in the log
- header; we write alternately to the
- checkpoint fields when we make new
- checkpoints; this field is only defined
- in the first log file of a log group */
-#define LOG_CHECKPOINT_2 (3 * OS_FILE_LOG_BLOCK_SIZE)
- /* second checkpoint field in the log
- header */
-#define LOG_FILE_HDR_SIZE (4 * OS_FILE_LOG_BLOCK_SIZE)
-
-#define LOG_GROUP_OK 301
-#define LOG_GROUP_CORRUPTED 302
-
-/* Log group consists of a number of log files, each of the same size; a log
-group is implemented as a space in the sense of the module fil0fil. */
-
-struct log_group_struct{
- /* The following fields are protected by log_sys->mutex */
- ulint id; /* log group id */
- ulint n_files; /* number of files in the group */
- ulint file_size; /* individual log file size in bytes,
- including the log file header */
- ulint space_id; /* file space which implements the log
- group */
- ulint state; /* LOG_GROUP_OK or
- LOG_GROUP_CORRUPTED */
- dulint lsn; /* lsn used to fix coordinates within
- the log group */
- ulint lsn_offset; /* the offset of the above lsn */
- ulint n_pending_writes;/* number of currently pending flush
- writes for this log group */
- byte** file_header_bufs;/* buffers for each file header in the
- group */
- /*-----------------------------*/
- byte** archive_file_header_bufs;/* buffers for each file
- header in the group */
- ulint archive_space_id;/* file space which implements the log
- group archive */
- ulint archived_file_no;/* file number corresponding to
- log_sys->archived_lsn */
- ulint archived_offset;/* file offset corresponding to
- log_sys->archived_lsn, 0 if we have
- not yet written to the archive file
- number archived_file_no */
- ulint next_archived_file_no;/* during an archive write,
- until the write is completed, we
- store the next value for
- archived_file_no here: the write
- completion function then sets the new
- value to ..._file_no */
- ulint next_archived_offset; /* like the preceding field */
- /*-----------------------------*/
- dulint scanned_lsn; /* used only in recovery: recovery scan
- succeeded up to this lsn in this log
- group */
- byte* checkpoint_buf; /* checkpoint header is written from
- this buffer to the group */
- UT_LIST_NODE_T(log_group_t)
- log_groups; /* list of log groups */
-};
-
-struct log_struct{
- byte pad[64]; /* padding to prevent other memory
- update hotspots from residing on the
- same memory cache line */
- dulint lsn; /* log sequence number */
- ulint buf_free; /* first free offset within the log
- buffer */
- mutex_t mutex; /* mutex protecting the log */
- byte* buf; /* log buffer */
- ulint buf_size; /* log buffer size in bytes */
- ulint max_buf_free; /* recommended maximum value of
- buf_free, after which the buffer is
- flushed */
- ulint old_buf_free; /* value of buf free when log was
- last time opened; only in the debug
- version */
- dulint old_lsn; /* value of lsn when log was last time
- opened; only in the debug version */
- ibool check_flush_or_checkpoint;
- /* this is set to TRUE when there may
- be need to flush the log buffer, or
- preflush buffer pool pages, or make
- a checkpoint; this MUST be TRUE when
- lsn - last_checkpoint_lsn >
- max_checkpoint_age; this flag is
- peeked at by log_free_check(), which
- does not reserve the log mutex */
- UT_LIST_BASE_NODE_T(log_group_t)
- log_groups; /* log groups */
-
- /* The fields involved in the log buffer flush */
-
- ulint buf_next_to_write;/* first offset in the log buffer
- where the byte content may not exist
- written to file, e.g., the start
- offset of a log record catenated
- later; this is advanced when a flush
- operation is completed to all the log
- groups */
- dulint written_to_some_lsn;
- /* first log sequence number not yet
- written to any log group; for this to
- be advanced, it is enough that the
- write i/o has been completed for any
- one log group */
- dulint written_to_all_lsn;
- /* first log sequence number not yet
- written to some log group; for this to
- be advanced, it is enough that the
- write i/o has been completed for all
- log groups */
- dulint write_lsn; /* end lsn for the current running
- write */
- ulint write_end_offset;/* the data in buffer has been written
- up to this offset when the current
- write ends: this field will then
- be copied to buf_next_to_write */
- dulint current_flush_lsn;/* end lsn for the current running
- write + flush operation */
- dulint flushed_to_disk_lsn;
- /* how far we have written the log
- AND flushed to disk */
- ulint n_pending_writes;/* number of currently pending flushes
- or writes */
- /* NOTE on the 'flush' in names of the fields below: starting from
- 4.0.14, we separate the write of the log file and the actual fsync()
- or other method to flush it to disk. The names below shhould really
- be 'flush_or_write'! */
- os_event_t no_flush_event; /* this event is in the reset state
- when a flush or a write is running;
- a thread should wait for this without
- owning the log mutex, but NOTE that
- to set or reset this event, the
- thread MUST own the log mutex! */
- ibool one_flushed; /* during a flush, this is first FALSE
- and becomes TRUE when one log group
- has been written or flushed */
- os_event_t one_flushed_event;/* this event is reset when the
- flush or write has not yet completed
- for any log group; e.g., this means
- that a transaction has been committed
- when this is set; a thread should wait
- for this without owning the log mutex,
- but NOTE that to set or reset this
- event, the thread MUST own the log
- mutex! */
- ulint n_log_ios; /* number of log i/os initiated thus
- far */
- ulint n_log_ios_old; /* number of log i/o's at the
- previous printout */
- time_t last_printout_time;/* when log_print was last time
- called */
-
- /* Fields involved in checkpoints */
- ulint log_group_capacity; /* capacity of the log group; if
- the checkpoint age exceeds this, it is
- a serious error because it is possible
- we will then overwrite log and spoil
- crash recovery */
- ulint max_modified_age_async;
- /* when this recommended value for lsn
- - buf_pool_get_oldest_modification()
- is exceeded, we start an asynchronous
- preflush of pool pages */
- ulint max_modified_age_sync;
- /* when this recommended value for lsn
- - buf_pool_get_oldest_modification()
- is exceeded, we start a synchronous
- preflush of pool pages */
- ulint adm_checkpoint_interval;
- /* administrator-specified checkpoint
- interval in terms of log growth in
- bytes; the interval actually used by
- the database can be smaller */
- ulint max_checkpoint_age_async;
- /* when this checkpoint age is exceeded
- we start an asynchronous writing of a
- new checkpoint */
- ulint max_checkpoint_age;
- /* this is the maximum allowed value
- for lsn - last_checkpoint_lsn when a
- new query step is started */
- dulint next_checkpoint_no;
- /* next checkpoint number */
- dulint last_checkpoint_lsn;
- /* latest checkpoint lsn */
- dulint next_checkpoint_lsn;
- /* next checkpoint lsn */
- ulint n_pending_checkpoint_writes;
- /* number of currently pending
- checkpoint writes */
- rw_lock_t checkpoint_lock;/* this latch is x-locked when a
- checkpoint write is running; a thread
- should wait for this without owning
- the log mutex */
- byte* checkpoint_buf; /* checkpoint header is read to this
- buffer */
- /* Fields involved in archiving */
- ulint archiving_state;/* LOG_ARCH_ON, LOG_ARCH_STOPPING
- LOG_ARCH_STOPPED, LOG_ARCH_OFF */
- dulint archived_lsn; /* archiving has advanced to this
- lsn */
- ulint max_archived_lsn_age_async;
- /* recommended maximum age of
- archived_lsn, before we start
- asynchronous copying to the archive */
- ulint max_archived_lsn_age;
- /* maximum allowed age for
- archived_lsn */
- dulint next_archived_lsn;/* during an archive write,
- until the write is completed, we
- store the next value for
- archived_lsn here: the write
- completion function then sets the new
- value to archived_lsn */
- ulint archiving_phase;/* LOG_ARCHIVE_READ or
- LOG_ARCHIVE_WRITE */
- ulint n_pending_archive_ios;
- /* number of currently pending reads
- or writes in archiving */
- rw_lock_t archive_lock; /* this latch is x-locked when an
- archive write is running; a thread
- should wait for this without owning
- the log mutex */
- ulint archive_buf_size;/* size of archive_buf */
- byte* archive_buf; /* log segment is written to the
- archive from this buffer */
- os_event_t archiving_on; /* if archiving has been stopped,
- a thread can wait for this event to
- become signaled */
-};
-
-#define LOG_ARCH_ON 71
-#define LOG_ARCH_STOPPING 72
-#define LOG_ARCH_STOPPING2 73
-#define LOG_ARCH_STOPPED 74
-#define LOG_ARCH_OFF 75
-
-#ifndef UNIV_NONINL
-#include "log0log.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/log0log.ic b/storage/innobase/include/log0log.ic
deleted file mode 100644
index df0a8baf2d5..00000000000
--- a/storage/innobase/include/log0log.ic
+++ /dev/null
@@ -1,398 +0,0 @@
-/******************************************************
-Database log
-
-(c) 1995 Innobase Oy
-
-Created 12/9/1995 Heikki Tuuri
-*******************************************************/
-
-#include "os0file.h"
-#include "mach0data.h"
-#include "mtr0mtr.h"
-
-/**********************************************************
-Checks by parsing that the catenated log segment for a single mtr is
-consistent. */
-
-ibool
-log_check_log_recs(
-/*===============*/
- byte* buf, /* in: pointer to the start of the log segment
- in the log_sys->buf log buffer */
- ulint len, /* in: segment length in bytes */
- dulint buf_start_lsn); /* in: buffer start lsn */
-
-/****************************************************************
-Gets a log block flush bit. */
-UNIV_INLINE
-ibool
-log_block_get_flush_bit(
-/*====================*/
- /* out: TRUE if this block was the first
- to be written in a log flush */
- byte* log_block) /* in: log block */
-{
- if (LOG_BLOCK_FLUSH_BIT_MASK
- & mach_read_from_4(log_block + LOG_BLOCK_HDR_NO)) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/****************************************************************
-Sets the log block flush bit. */
-UNIV_INLINE
-void
-log_block_set_flush_bit(
-/*====================*/
- byte* log_block, /* in: log block */
- ibool val) /* in: value to set */
-{
- ulint field;
-
- field = mach_read_from_4(log_block + LOG_BLOCK_HDR_NO);
-
- if (val) {
- field = field | LOG_BLOCK_FLUSH_BIT_MASK;
- } else {
- field = field & ~LOG_BLOCK_FLUSH_BIT_MASK;
- }
-
- mach_write_to_4(log_block + LOG_BLOCK_HDR_NO, field);
-}
-
-/****************************************************************
-Gets a log block number stored in the header. */
-UNIV_INLINE
-ulint
-log_block_get_hdr_no(
-/*=================*/
- /* out: log block number stored in the block
- header */
- byte* log_block) /* in: log block */
-{
- return(~LOG_BLOCK_FLUSH_BIT_MASK
- & mach_read_from_4(log_block + LOG_BLOCK_HDR_NO));
-}
-
-/****************************************************************
-Sets the log block number stored in the header; NOTE that this must be set
-before the flush bit! */
-UNIV_INLINE
-void
-log_block_set_hdr_no(
-/*=================*/
- byte* log_block, /* in: log block */
- ulint n) /* in: log block number: must be > 0 and
- < LOG_BLOCK_FLUSH_BIT_MASK */
-{
- ut_ad(n > 0);
- ut_ad(n < LOG_BLOCK_FLUSH_BIT_MASK);
-
- mach_write_to_4(log_block + LOG_BLOCK_HDR_NO, n);
-}
-
-/****************************************************************
-Gets a log block data length. */
-UNIV_INLINE
-ulint
-log_block_get_data_len(
-/*===================*/
- /* out: log block data length measured as a
- byte offset from the block start */
- byte* log_block) /* in: log block */
-{
- return(mach_read_from_2(log_block + LOG_BLOCK_HDR_DATA_LEN));
-}
-
-/****************************************************************
-Sets the log block data length. */
-UNIV_INLINE
-void
-log_block_set_data_len(
-/*===================*/
- byte* log_block, /* in: log block */
- ulint len) /* in: data length */
-{
- mach_write_to_2(log_block + LOG_BLOCK_HDR_DATA_LEN, len);
-}
-
-/****************************************************************
-Gets a log block first mtr log record group offset. */
-UNIV_INLINE
-ulint
-log_block_get_first_rec_group(
-/*==========================*/
- /* out: first mtr log record group byte offset
- from the block start, 0 if none */
- byte* log_block) /* in: log block */
-{
- return(mach_read_from_2(log_block + LOG_BLOCK_FIRST_REC_GROUP));
-}
-
-/****************************************************************
-Sets the log block first mtr log record group offset. */
-UNIV_INLINE
-void
-log_block_set_first_rec_group(
-/*==========================*/
- byte* log_block, /* in: log block */
- ulint offset) /* in: offset, 0 if none */
-{
- mach_write_to_2(log_block + LOG_BLOCK_FIRST_REC_GROUP, offset);
-}
-
-/****************************************************************
-Gets a log block checkpoint number field (4 lowest bytes). */
-UNIV_INLINE
-ulint
-log_block_get_checkpoint_no(
-/*========================*/
- /* out: checkpoint no (4 lowest bytes) */
- byte* log_block) /* in: log block */
-{
- return(mach_read_from_4(log_block + LOG_BLOCK_CHECKPOINT_NO));
-}
-
-/****************************************************************
-Sets a log block checkpoint number field (4 lowest bytes). */
-UNIV_INLINE
-void
-log_block_set_checkpoint_no(
-/*========================*/
- byte* log_block, /* in: log block */
- dulint no) /* in: checkpoint no */
-{
- mach_write_to_4(log_block + LOG_BLOCK_CHECKPOINT_NO,
- ut_dulint_get_low(no));
-}
-
-/****************************************************************
-Converts a lsn to a log block number. */
-UNIV_INLINE
-ulint
-log_block_convert_lsn_to_no(
-/*========================*/
- /* out: log block number, it is > 0 and <= 1G */
- dulint lsn) /* in: lsn of a byte within the block */
-{
- ulint no;
-
- no = ut_dulint_get_low(lsn) / OS_FILE_LOG_BLOCK_SIZE;
- no += (ut_dulint_get_high(lsn) % OS_FILE_LOG_BLOCK_SIZE)
- * 2 * (0x80000000UL / OS_FILE_LOG_BLOCK_SIZE);
-
- no = no & 0x3FFFFFFFUL;
-
- return(no + 1);
-}
-
-/****************************************************************
-Calculates the checksum for a log block. */
-UNIV_INLINE
-ulint
-log_block_calc_checksum(
-/*====================*/
- /* out: checksum */
- byte* block) /* in: log block */
-{
- ulint sum;
- ulint sh;
- ulint i;
-
- sum = 1;
- sh = 0;
-
- for (i = 0; i < OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE; i++) {
- sum = sum & 0x7FFFFFFFUL;
- sum += (((ulint)(*(block + i))) << sh) + (ulint)(*(block + i));
- sh++;
- if (sh > 24) {
- sh = 0;
- }
- }
-
- return(sum);
-}
-
-/****************************************************************
-Gets a log block checksum field value. */
-UNIV_INLINE
-ulint
-log_block_get_checksum(
-/*===================*/
- /* out: checksum */
- byte* log_block) /* in: log block */
-{
- return(mach_read_from_4(log_block + OS_FILE_LOG_BLOCK_SIZE
- - LOG_BLOCK_CHECKSUM));
-}
-
-/****************************************************************
-Sets a log block checksum field value. */
-UNIV_INLINE
-void
-log_block_set_checksum(
-/*===================*/
- byte* log_block, /* in: log block */
- ulint checksum) /* in: checksum */
-{
- mach_write_to_4(log_block + OS_FILE_LOG_BLOCK_SIZE
- - LOG_BLOCK_CHECKSUM,
- checksum);
-}
-
-/****************************************************************
-Initializes a log block in the log buffer. */
-UNIV_INLINE
-void
-log_block_init(
-/*===========*/
- byte* log_block, /* in: pointer to the log buffer */
- dulint lsn) /* in: lsn within the log block */
-{
- ulint no;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- no = log_block_convert_lsn_to_no(lsn);
-
- log_block_set_hdr_no(log_block, no);
-
- log_block_set_data_len(log_block, LOG_BLOCK_HDR_SIZE);
- log_block_set_first_rec_group(log_block, 0);
-}
-
-/****************************************************************
-Initializes a log block in the log buffer in the old format, where there
-was no checksum yet. */
-UNIV_INLINE
-void
-log_block_init_in_old_format(
-/*=========================*/
- byte* log_block, /* in: pointer to the log buffer */
- dulint lsn) /* in: lsn within the log block */
-{
- ulint no;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- no = log_block_convert_lsn_to_no(lsn);
-
- log_block_set_hdr_no(log_block, no);
- mach_write_to_4(log_block + OS_FILE_LOG_BLOCK_SIZE
- - LOG_BLOCK_CHECKSUM, no);
- log_block_set_data_len(log_block, LOG_BLOCK_HDR_SIZE);
- log_block_set_first_rec_group(log_block, 0);
-}
-
-/****************************************************************
-Writes to the log the string given. The log must be released with
-log_release. */
-UNIV_INLINE
-dulint
-log_reserve_and_write_fast(
-/*=======================*/
- /* out: end lsn of the log record, ut_dulint_zero if
- did not succeed */
- byte* str, /* in: string */
- ulint len, /* in: string length */
- dulint* start_lsn,/* out: start lsn of the log record */
- ibool* success)/* out: TRUE if success */
-{
- log_t* log = log_sys;
- ulint data_len;
- dulint lsn;
-
- *success = TRUE;
-
- mutex_enter(&(log->mutex));
-
- data_len = len + log->buf_free % OS_FILE_LOG_BLOCK_SIZE;
-
- if (data_len >= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
-
- /* The string does not fit within the current log block
- or the log block would become full */
-
- *success = FALSE;
-
- mutex_exit(&(log->mutex));
-
- return(ut_dulint_zero);
- }
-
- *start_lsn = log->lsn;
-
- ut_memcpy(log->buf + log->buf_free, str, len);
-
- log_block_set_data_len(ut_align_down(log->buf + log->buf_free,
- OS_FILE_LOG_BLOCK_SIZE),
- data_len);
-#ifdef UNIV_LOG_DEBUG
- log->old_buf_free = log->buf_free;
- log->old_lsn = log->lsn;
-#endif
- log->buf_free += len;
-
- ut_ad(log->buf_free <= log->buf_size);
-
- lsn = ut_dulint_add(log->lsn, len);
-
- log->lsn = lsn;
-
-#ifdef UNIV_LOG_DEBUG
- log_check_log_recs(log->buf + log->old_buf_free,
- log->buf_free - log->old_buf_free, log->old_lsn);
-#endif
- return(lsn);
-}
-
-/***************************************************************************
-Releases the log mutex. */
-UNIV_INLINE
-void
-log_release(void)
-/*=============*/
-{
- mutex_exit(&(log_sys->mutex));
-}
-
-/****************************************************************
-Gets the current lsn. */
-UNIV_INLINE
-dulint
-log_get_lsn(void)
-/*=============*/
- /* out: current lsn */
-{
- dulint lsn;
-
- mutex_enter(&(log_sys->mutex));
-
- lsn = log_sys->lsn;
-
- mutex_exit(&(log_sys->mutex));
-
- return(lsn);
-}
-
-/***************************************************************************
-Checks if there is need for a log buffer flush or a new checkpoint, and does
-this if yes. Any database operation should call this when it has modified
-more than about 4 pages. NOTE that this function may only be called when the
-OS thread owns no synchronization objects except the dictionary mutex. */
-UNIV_INLINE
-void
-log_free_check(void)
-/*================*/
-{
- /* ut_ad(sync_thread_levels_empty()); */
-
- if (log_sys->check_flush_or_checkpoint) {
-
- log_check_margins();
- }
-}
diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h
deleted file mode 100644
index 091bbe34562..00000000000
--- a/storage/innobase/include/log0recv.h
+++ /dev/null
@@ -1,349 +0,0 @@
-/******************************************************
-Recovery
-
-(c) 1997 Innobase Oy
-
-Created 9/20/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef log0recv_h
-#define log0recv_h
-
-#include "univ.i"
-#include "ut0byte.h"
-#include "page0types.h"
-#include "hash0hash.h"
-#include "log0log.h"
-
-#ifdef UNIV_HOTBACKUP
-extern ibool recv_replay_file_ops;
-#endif /* UNIV_HOTBACKUP */
-
-/***********************************************************************
-Reads the checkpoint info needed in hot backup. */
-
-ibool
-recv_read_cp_info_for_backup(
-/*=========================*/
- /* out: TRUE if success */
- byte* hdr, /* in: buffer containing the log group header */
- dulint* lsn, /* out: checkpoint lsn */
- ulint* offset, /* out: checkpoint offset in the log group */
- ulint* fsp_limit,/* out: fsp limit of space 0, 1000000000 if the
- database is running with < version 3.23.50 of InnoDB */
- dulint* cp_no, /* out: checkpoint number */
- dulint* first_header_lsn);
- /* out: lsn of of the start of the first log file */
-/***********************************************************************
-Scans the log segment and n_bytes_scanned is set to the length of valid
-log scanned. */
-
-void
-recv_scan_log_seg_for_backup(
-/*=========================*/
- byte* buf, /* in: buffer containing log data */
- ulint buf_len, /* in: data length in that buffer */
- dulint* scanned_lsn, /* in/out: lsn of buffer start,
- we return scanned lsn */
- ulint* scanned_checkpoint_no,
- /* in/out: 4 lowest bytes of the
- highest scanned checkpoint number so
- far */
- ulint* n_bytes_scanned);/* out: how much we were able to
- scan, smaller than buf_len if log
- data ended here */
-/***********************************************************************
-Returns TRUE if recovery is currently running. */
-UNIV_INLINE
-ibool
-recv_recovery_is_on(void);
-/*=====================*/
-/***********************************************************************
-Returns TRUE if recovery from backup is currently running. */
-UNIV_INLINE
-ibool
-recv_recovery_from_backup_is_on(void);
-/*=================================*/
-/****************************************************************************
-Applies the hashed log records to the page, if the page lsn is less than the
-lsn of a log record. This can be called when a buffer page has just been
-read in, or also for a page already in the buffer pool. */
-
-void
-recv_recover_page(
-/*==============*/
- ibool recover_backup, /* in: TRUE if we are recovering a backup
- page: then we do not acquire any latches
- since the page was read in outside the
- buffer pool */
- ibool just_read_in, /* in: TRUE if the i/o-handler calls this for
- a freshly read page */
- page_t* page, /* in: buffer page */
- ulint space, /* in: space id */
- ulint page_no); /* in: page number */
-/************************************************************
-Recovers from a checkpoint. When this function returns, the database is able
-to start processing of new user transactions, but the function
-recv_recovery_from_checkpoint_finish should be called later to complete
-the recovery and free the resources used in it. */
-
-ulint
-recv_recovery_from_checkpoint_start(
-/*================================*/
- /* out: error code or DB_SUCCESS */
- ulint type, /* in: LOG_CHECKPOINT or LOG_ARCHIVE */
- dulint limit_lsn, /* in: recover up to this lsn if possible */
- dulint min_flushed_lsn,/* in: min flushed lsn from data files */
- dulint max_flushed_lsn);/* in: max flushed lsn from data files */
-/************************************************************
-Completes recovery from a checkpoint. */
-
-void
-recv_recovery_from_checkpoint_finish(void);
-/*======================================*/
-/***********************************************************
-Scans log from a buffer and stores new log data to the parsing buffer. Parses
-and hashes the log records if new data found. */
-
-ibool
-recv_scan_log_recs(
-/*===============*/
- /* out: TRUE if limit_lsn has been reached, or
- not able to scan any more in this log group */
- ibool apply_automatically,/* in: TRUE if we want this function to
- apply log records automatically when the
- hash table becomes full; in the hot backup tool
- the tool does the applying, not this
- function */
- ulint available_memory,/* in: we let the hash table of recs to grow
- to this size, at the maximum */
- ibool store_to_hash, /* in: TRUE if the records should be stored
- to the hash table; this is set to FALSE if just
- debug checking is needed */
- byte* buf, /* in: buffer containing a log segment or
- garbage */
- ulint len, /* in: buffer length */
- dulint start_lsn, /* in: buffer start lsn */
- dulint* contiguous_lsn, /* in/out: it is known that all log groups
- contain contiguous log data up to this lsn */
- dulint* group_scanned_lsn);/* out: scanning succeeded up to this lsn */
-/**********************************************************
-Resets the logs. The contents of log files will be lost! */
-
-void
-recv_reset_logs(
-/*============*/
- dulint lsn, /* in: reset to this lsn rounded up to
- be divisible by OS_FILE_LOG_BLOCK_SIZE,
- after which we add LOG_BLOCK_HDR_SIZE */
-#ifdef UNIV_LOG_ARCHIVE
- ulint arch_log_no, /* in: next archived log file number */
-#endif /* UNIV_LOG_ARCHIVE */
- ibool new_logs_created);/* in: TRUE if resetting logs is done
- at the log creation; FALSE if it is done
- after archive recovery */
-#ifdef UNIV_HOTBACKUP
-/**********************************************************
-Creates new log files after a backup has been restored. */
-
-void
-recv_reset_log_files_for_backup(
-/*============================*/
- const char* log_dir, /* in: log file directory path */
- ulint n_log_files, /* in: number of log files */
- ulint log_file_size, /* in: log file size */
- dulint lsn); /* in: new start lsn, must be
- divisible by OS_FILE_LOG_BLOCK_SIZE */
-#endif /* UNIV_HOTBACKUP */
-/************************************************************
-Creates the recovery system. */
-
-void
-recv_sys_create(void);
-/*=================*/
-/************************************************************
-Inits the recovery system for a recovery operation. */
-
-void
-recv_sys_init(
-/*==========*/
- ibool recover_from_backup, /* in: TRUE if this is called
- to recover from a hot backup */
- ulint available_memory); /* in: available memory in bytes */
-/***********************************************************************
-Empties the hash table of stored log records, applying them to appropriate
-pages. */
-
-void
-recv_apply_hashed_log_recs(
-/*=======================*/
- ibool allow_ibuf); /* in: if TRUE, also ibuf operations are
- allowed during the application; if FALSE,
- no ibuf operations are allowed, and after
- the application all file pages are flushed to
- disk and invalidated in buffer pool: this
- alternative means that no new log records
- can be generated during the application */
-#ifdef UNIV_HOTBACKUP
-/***********************************************************************
-Applies log records in the hash table to a backup. */
-
-void
-recv_apply_log_recs_for_backup(void);
-/*================================*/
-#endif
-#ifdef UNIV_LOG_ARCHIVE
-/************************************************************
-Recovers from archived log files, and also from log files, if they exist. */
-
-ulint
-recv_recovery_from_archive_start(
-/*=============================*/
- /* out: error code or DB_SUCCESS */
- dulint min_flushed_lsn,/* in: min flushed lsn field from the
- data files */
- dulint limit_lsn, /* in: recover up to this lsn if possible */
- ulint first_log_no); /* in: number of the first archived log file
- to use in the recovery; the file will be
- searched from INNOBASE_LOG_ARCH_DIR specified
- in server config file */
-/************************************************************
-Completes recovery from archive. */
-
-void
-recv_recovery_from_archive_finish(void);
-/*===================================*/
-#endif /* UNIV_LOG_ARCHIVE */
-
-/* Block of log record data */
-typedef struct recv_data_struct recv_data_t;
-struct recv_data_struct{
- recv_data_t* next; /* pointer to the next block or NULL */
- /* the log record data is stored physically
- immediately after this struct, max amount
- RECV_DATA_BLOCK_SIZE bytes of it */
-};
-
-/* Stored log record struct */
-typedef struct recv_struct recv_t;
-struct recv_struct{
- byte type; /* log record type */
- ulint len; /* log record body length in bytes */
- recv_data_t* data; /* chain of blocks containing the log record
- body */
- dulint start_lsn;/* start lsn of the log segment written by
- the mtr which generated this log record: NOTE
- that this is not necessarily the start lsn of
- this log record */
- dulint end_lsn;/* end lsn of the log segment written by
- the mtr which generated this log record: NOTE
- that this is not necessarily the end lsn of
- this log record */
- UT_LIST_NODE_T(recv_t)
- rec_list;/* list of log records for this page */
-};
-
-/* Hashed page file address struct */
-typedef struct recv_addr_struct recv_addr_t;
-struct recv_addr_struct{
- ulint state; /* RECV_NOT_PROCESSED, RECV_BEING_PROCESSED,
- or RECV_PROCESSED */
- ulint space; /* space id */
- ulint page_no;/* page number */
- UT_LIST_BASE_NODE_T(recv_t)
- rec_list;/* list of log records for this page */
- hash_node_t addr_hash;
-};
-
-/* Recovery system data structure */
-typedef struct recv_sys_struct recv_sys_t;
-struct recv_sys_struct{
- mutex_t mutex; /* mutex protecting the fields apply_log_recs,
- n_addrs, and the state field in each recv_addr
- struct */
- ibool apply_log_recs;
- /* this is TRUE when log rec application to
- pages is allowed; this flag tells the
- i/o-handler if it should do log record
- application */
- ibool apply_batch_on;
- /* this is TRUE when a log rec application
- batch is running */
- dulint lsn; /* log sequence number */
- ulint last_log_buf_size;
- /* size of the log buffer when the database
- last time wrote to the log */
- byte* last_block;
- /* possible incomplete last recovered log
- block */
- byte* last_block_buf_start;
- /* the nonaligned start address of the
- preceding buffer */
- byte* buf; /* buffer for parsing log records */
- ulint len; /* amount of data in buf */
- dulint parse_start_lsn;
- /* this is the lsn from which we were able to
- start parsing log records and adding them to
- the hash table; ut_dulint_zero if a suitable
- start point not found yet */
- dulint scanned_lsn;
- /* the log data has been scanned up to this
- lsn */
- ulint scanned_checkpoint_no;
- /* the log data has been scanned up to this
- checkpoint number (lowest 4 bytes) */
- ulint recovered_offset;
- /* start offset of non-parsed log records in
- buf */
- dulint recovered_lsn;
- /* the log records have been parsed up to
- this lsn */
- dulint limit_lsn;/* recovery should be made at most up to this
- lsn */
- ibool found_corrupt_log;
- /* this is set to TRUE if we during log
- scan find a corrupt log block, or a corrupt
- log record, or there is a log parsing
- buffer overflow */
- log_group_t* archive_group;
- /* in archive recovery: the log group whose
- archive is read */
- mem_heap_t* heap; /* memory heap of log records and file
- addresses*/
- hash_table_t* addr_hash;/* hash table of file addresses of pages */
- ulint n_addrs;/* number of not processed hashed file
- addresses in the hash table */
-};
-
-extern recv_sys_t* recv_sys;
-extern ibool recv_recovery_on;
-extern ibool recv_no_ibuf_operations;
-extern ibool recv_needed_recovery;
-
-extern ibool recv_lsn_checks_on;
-#ifdef UNIV_HOTBACKUP
-extern ibool recv_is_making_a_backup;
-#endif /* UNIV_HOTBACKUP */
-extern ulint recv_max_parsed_page_no;
-
-/* Size of the parsing buffer; it must accommodate RECV_SCAN_SIZE many
-times! */
-#define RECV_PARSING_BUF_SIZE (2 * 1024 * 1024)
-
-/* Size of block reads when the log groups are scanned forward to do a
-roll-forward */
-#define RECV_SCAN_SIZE (4 * UNIV_PAGE_SIZE)
-
-/* States of recv_addr_struct */
-#define RECV_NOT_PROCESSED 71
-#define RECV_BEING_READ 72
-#define RECV_BEING_PROCESSED 73
-#define RECV_PROCESSED 74
-
-extern ulint recv_n_pool_free_frames;
-
-#ifndef UNIV_NONINL
-#include "log0recv.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/log0recv.ic b/storage/innobase/include/log0recv.ic
deleted file mode 100644
index 489641bade2..00000000000
--- a/storage/innobase/include/log0recv.ic
+++ /dev/null
@@ -1,35 +0,0 @@
-/******************************************************
-Recovery
-
-(c) 1997 Innobase Oy
-
-Created 9/20/1997 Heikki Tuuri
-*******************************************************/
-
-#include "sync0sync.h"
-#include "mem0mem.h"
-#include "log0log.h"
-#include "os0file.h"
-
-extern ibool recv_recovery_from_backup_on;
-
-/***********************************************************************
-Returns TRUE if recovery is currently running. */
-UNIV_INLINE
-ibool
-recv_recovery_is_on(void)
-/*=====================*/
-{
- return(recv_recovery_on);
-}
-
-/***********************************************************************
-Returns TRUE if recovery from backup is currently running. */
-UNIV_INLINE
-ibool
-recv_recovery_from_backup_is_on(void)
-/*=================================*/
-{
- return(recv_recovery_from_backup_on);
-}
-
diff --git a/storage/innobase/include/mach0data.h b/storage/innobase/include/mach0data.h
deleted file mode 100644
index 25b619b3f12..00000000000
--- a/storage/innobase/include/mach0data.h
+++ /dev/null
@@ -1,345 +0,0 @@
-/**********************************************************************
-Utilities for converting data from the database file
-to the machine format.
-
-(c) 1995 Innobase Oy
-
-Created 11/28/1995 Heikki Tuuri
-***********************************************************************/
-
-#ifndef mach0data_h
-#define mach0data_h
-
-#include "univ.i"
-#include "ut0byte.h"
-
-/* The data and all fields are always stored in a database file
-in the same format: ascii, big-endian, ... .
-All data in the files MUST be accessed using the functions in this
-module. */
-
-/***********************************************************
-The following function is used to store data in one byte. */
-UNIV_INLINE
-void
-mach_write_to_1(
-/*============*/
- byte* b, /* in: pointer to byte where to store */
- ulint n); /* in: ulint integer to be stored, >= 0, < 256 */
-/************************************************************
-The following function is used to fetch data from one byte. */
-UNIV_INLINE
-ulint
-mach_read_from_1(
-/*=============*/
- /* out: ulint integer, >= 0, < 256 */
- byte* b); /* in: pointer to byte */
-/***********************************************************
-The following function is used to store data in two consecutive
-bytes. We store the most significant byte to the lower address. */
-UNIV_INLINE
-void
-mach_write_to_2(
-/*============*/
- byte* b, /* in: pointer to two bytes where to store */
- ulint n); /* in: ulint integer to be stored, >= 0, < 64k */
-/************************************************************
-The following function is used to fetch data from two consecutive
-bytes. The most significant byte is at the lowest address. */
-UNIV_INLINE
-ulint
-mach_read_from_2(
-/*=============*/
- /* out: ulint integer, >= 0, < 64k */
- byte* b); /* in: pointer to two bytes */
-
-/************************************************************
-The following function is used to convert a 16-bit data item
-to the canonical format, for fast bytewise equality test
-against memory. */
-UNIV_INLINE
-uint16
-mach_encode_2(
-/*==========*/
- /* out: 16-bit integer in canonical format */
- ulint n); /* in: integer in machine-dependent format */
-/************************************************************
-The following function is used to convert a 16-bit data item
-from the canonical format, for fast bytewise equality test
-against memory. */
-UNIV_INLINE
-ulint
-mach_decode_2(
-/*==========*/
- /* out: integer in machine-dependent format */
- uint16 n); /* in: 16-bit integer in canonical format */
-/***********************************************************
-The following function is used to store data in 3 consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_to_3(
-/*============*/
- byte* b, /* in: pointer to 3 bytes where to store */
- ulint n); /* in: ulint integer to be stored */
-/************************************************************
-The following function is used to fetch data from 3 consecutive
-bytes. The most significant byte is at the lowest address. */
-UNIV_INLINE
-ulint
-mach_read_from_3(
-/*=============*/
- /* out: ulint integer */
- byte* b); /* in: pointer to 3 bytes */
-/***********************************************************
-The following function is used to store data in four consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_to_4(
-/*============*/
- byte* b, /* in: pointer to four bytes where to store */
- ulint n); /* in: ulint integer to be stored */
-/************************************************************
-The following function is used to fetch data from 4 consecutive
-bytes. The most significant byte is at the lowest address. */
-UNIV_INLINE
-ulint
-mach_read_from_4(
-/*=============*/
- /* out: ulint integer */
- byte* b); /* in: pointer to four bytes */
-/*************************************************************
-Writes a ulint in a compressed form (1..5 bytes). */
-UNIV_INLINE
-ulint
-mach_write_compressed(
-/*==================*/
- /* out: stored size in bytes */
- byte* b, /* in: pointer to memory where to store */
- ulint n); /* in: ulint integer to be stored */
-/*************************************************************
-Returns the size of an ulint when written in the compressed form. */
-UNIV_INLINE
-ulint
-mach_get_compressed_size(
-/*=====================*/
- /* out: compressed size in bytes */
- ulint n); /* in: ulint integer to be stored */
-/*************************************************************
-Reads a ulint in a compressed form. */
-UNIV_INLINE
-ulint
-mach_read_compressed(
-/*=================*/
- /* out: read integer */
- byte* b); /* in: pointer to memory from where to read */
-/***********************************************************
-The following function is used to store data in 6 consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_to_6(
-/*============*/
- byte* b, /* in: pointer to 6 bytes where to store */
- dulint n); /* in: dulint integer to be stored */
-/************************************************************
-The following function is used to fetch data from 6 consecutive
-bytes. The most significant byte is at the lowest address. */
-UNIV_INLINE
-dulint
-mach_read_from_6(
-/*=============*/
- /* out: dulint integer */
- byte* b); /* in: pointer to 6 bytes */
-/***********************************************************
-The following function is used to store data in 7 consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_to_7(
-/*============*/
- byte* b, /* in: pointer to 7 bytes where to store */
- dulint n); /* in: dulint integer to be stored */
-/************************************************************
-The following function is used to fetch data from 7 consecutive
-bytes. The most significant byte is at the lowest address. */
-UNIV_INLINE
-dulint
-mach_read_from_7(
-/*=============*/
- /* out: dulint integer */
- byte* b); /* in: pointer to 7 bytes */
-/***********************************************************
-The following function is used to store data in 8 consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_to_8(
-/*============*/
- byte* b, /* in: pointer to 8 bytes where to store */
- dulint n); /* in: dulint integer to be stored */
-/************************************************************
-The following function is used to fetch data from 8 consecutive
-bytes. The most significant byte is at the lowest address. */
-UNIV_INLINE
-dulint
-mach_read_from_8(
-/*=============*/
- /* out: dulint integer */
- byte* b); /* in: pointer to 8 bytes */
-/*************************************************************
-Writes a dulint in a compressed form (5..9 bytes). */
-UNIV_INLINE
-ulint
-mach_dulint_write_compressed(
-/*=========================*/
- /* out: size in bytes */
- byte* b, /* in: pointer to memory where to store */
- dulint n); /* in: dulint integer to be stored */
-/*************************************************************
-Returns the size of a dulint when written in the compressed form. */
-UNIV_INLINE
-ulint
-mach_dulint_get_compressed_size(
-/*============================*/
- /* out: compressed size in bytes */
- dulint n); /* in: dulint integer to be stored */
-/*************************************************************
-Reads a dulint in a compressed form. */
-UNIV_INLINE
-dulint
-mach_dulint_read_compressed(
-/*========================*/
- /* out: read dulint */
- byte* b); /* in: pointer to memory from where to read */
-/*************************************************************
-Writes a dulint in a compressed form (1..11 bytes). */
-UNIV_INLINE
-ulint
-mach_dulint_write_much_compressed(
-/*==============================*/
- /* out: size in bytes */
- byte* b, /* in: pointer to memory where to store */
- dulint n); /* in: dulint integer to be stored */
-/*************************************************************
-Returns the size of a dulint when written in the compressed form. */
-UNIV_INLINE
-ulint
-mach_dulint_get_much_compressed_size(
-/*=================================*/
- /* out: compressed size in bytes */
- dulint n); /* in: dulint integer to be stored */
-/*************************************************************
-Reads a dulint in a compressed form. */
-UNIV_INLINE
-dulint
-mach_dulint_read_much_compressed(
-/*=============================*/
- /* out: read dulint */
- byte* b); /* in: pointer to memory from where to read */
-/*************************************************************
-Reads a ulint in a compressed form if the log record fully contains it. */
-
-byte*
-mach_parse_compressed(
-/*==================*/
- /* out: pointer to end of the stored field, NULL if
- not complete */
- byte* ptr, /* in: pointer to buffer from where to read */
- byte* end_ptr,/* in: pointer to end of the buffer */
- ulint* val); /* out: read value */
-/*************************************************************
-Reads a dulint in a compressed form if the log record fully contains it. */
-
-byte*
-mach_dulint_parse_compressed(
-/*=========================*/
- /* out: pointer to end of the stored field, NULL if
- not complete */
- byte* ptr, /* in: pointer to buffer from where to read */
- byte* end_ptr,/* in: pointer to end of the buffer */
- dulint* val); /* out: read value */
-/*************************************************************
-Reads a double. It is stored in a little-endian format. */
-UNIV_INLINE
-double
-mach_double_read(
-/*=============*/
- /* out: double read */
- byte* b); /* in: pointer to memory from where to read */
-/*************************************************************
-Writes a double. It is stored in a little-endian format. */
-UNIV_INLINE
-void
-mach_double_write(
-/*==============*/
- byte* b, /* in: pointer to memory where to write */
- double d); /* in: double */
-/*************************************************************
-Reads a float. It is stored in a little-endian format. */
-UNIV_INLINE
-float
-mach_float_read(
-/*============*/
- /* out: float read */
- byte* b); /* in: pointer to memory from where to read */
-/*************************************************************
-Writes a float. It is stored in a little-endian format. */
-UNIV_INLINE
-void
-mach_float_write(
-/*=============*/
- byte* b, /* in: pointer to memory where to write */
- float d); /* in: float */
-/*************************************************************
-Reads a ulint stored in the little-endian format. */
-UNIV_INLINE
-ulint
-mach_read_from_n_little_endian(
-/*===========================*/
- /* out: unsigned long int */
- byte* buf, /* in: from where to read */
- ulint buf_size); /* in: from how many bytes to read */
-/*************************************************************
-Writes a ulint in the little-endian format. */
-UNIV_INLINE
-void
-mach_write_to_n_little_endian(
-/*==========================*/
- byte* dest, /* in: where to write */
- ulint dest_size, /* in: into how many bytes to write */
- ulint n); /* in: unsigned long int to write */
-/*************************************************************
-Reads a ulint stored in the little-endian format. */
-UNIV_INLINE
-ulint
-mach_read_from_2_little_endian(
-/*===========================*/
- /* out: unsigned long int */
- byte* buf); /* in: from where to read */
-/*************************************************************
-Writes a ulint in the little-endian format. */
-UNIV_INLINE
-void
-mach_write_to_2_little_endian(
-/*==========================*/
- byte* dest, /* in: where to write */
- ulint n); /* in: unsigned long int to write */
-
-/*************************************************************
-Convert integral type from storage byte order (big endian) to
-host byte order. */
-UNIV_INLINE
-ullint
-mach_read_int_type(
-/*===============*/
- /* out: integer value */
- const byte* src, /* in: where to read from */
- ulint len, /* in: length of src */
- ibool unsigned_type); /* in: signed or unsigned flag */
-#ifndef UNIV_NONINL
-#include "mach0data.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/mach0data.ic b/storage/innobase/include/mach0data.ic
deleted file mode 100644
index ec15c10c661..00000000000
--- a/storage/innobase/include/mach0data.ic
+++ /dev/null
@@ -1,734 +0,0 @@
-/**********************************************************************
-Utilities for converting data from the database file
-to the machine format.
-
-(c) 1995 Innobase Oy
-
-Created 11/28/1995 Heikki Tuuri
-***********************************************************************/
-
-#include "ut0mem.h"
-
-/***********************************************************
-The following function is used to store data in one byte. */
-UNIV_INLINE
-void
-mach_write_to_1(
-/*============*/
- byte* b, /* in: pointer to byte where to store */
- ulint n) /* in: ulint integer to be stored, >= 0, < 256 */
-{
- ut_ad(b);
- ut_ad(n <= 0xFFUL);
-
- b[0] = (byte)n;
-}
-
-/************************************************************
-The following function is used to fetch data from one byte. */
-UNIV_INLINE
-ulint
-mach_read_from_1(
-/*=============*/
- /* out: ulint integer, >= 0, < 256 */
- byte* b) /* in: pointer to byte */
-{
- ut_ad(b);
- return((ulint)(b[0]));
-}
-
-/***********************************************************
-The following function is used to store data in two consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_to_2(
-/*============*/
- byte* b, /* in: pointer to two bytes where to store */
- ulint n) /* in: ulint integer to be stored */
-{
- ut_ad(b);
- ut_ad(n <= 0xFFFFUL);
-
- b[0] = (byte)(n >> 8);
- b[1] = (byte)(n);
-}
-
-/************************************************************
-The following function is used to fetch data from 2 consecutive
-bytes. The most significant byte is at the lowest address. */
-UNIV_INLINE
-ulint
-mach_read_from_2(
-/*=============*/
- /* out: ulint integer */
- byte* b) /* in: pointer to 2 bytes */
-{
- ut_ad(b);
- return( ((ulint)(b[0]) << 8)
- + (ulint)(b[1])
- );
-}
-
-/************************************************************
-The following function is used to convert a 16-bit data item
-to the canonical format, for fast bytewise equality test
-against memory. */
-UNIV_INLINE
-uint16
-mach_encode_2(
-/*==========*/
- /* out: 16-bit integer in canonical format */
- ulint n) /* in: integer in machine-dependent format */
-{
- uint16 ret;
- ut_ad(2 == sizeof ret);
- mach_write_to_2((byte*) &ret, n);
- return(ret);
-}
-/************************************************************
-The following function is used to convert a 16-bit data item
-from the canonical format, for fast bytewise equality test
-against memory. */
-UNIV_INLINE
-ulint
-mach_decode_2(
-/*==========*/
- /* out: integer in machine-dependent format */
- uint16 n) /* in: 16-bit integer in canonical format */
-{
- ut_ad(2 == sizeof n);
- return(mach_read_from_2((byte*) &n));
-}
-
-/***********************************************************
-The following function is used to store data in 3 consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_to_3(
-/*============*/
- byte* b, /* in: pointer to 3 bytes where to store */
- ulint n) /* in: ulint integer to be stored */
-{
- ut_ad(b);
- ut_ad(n <= 0xFFFFFFUL);
-
- b[0] = (byte)(n >> 16);
- b[1] = (byte)(n >> 8);
- b[2] = (byte)(n);
-}
-
-/************************************************************
-The following function is used to fetch data from 3 consecutive
-bytes. The most significant byte is at the lowest address. */
-UNIV_INLINE
-ulint
-mach_read_from_3(
-/*=============*/
- /* out: ulint integer */
- byte* b) /* in: pointer to 3 bytes */
-{
- ut_ad(b);
- return( ((ulint)(b[0]) << 16)
- + ((ulint)(b[1]) << 8)
- + (ulint)(b[2])
- );
-}
-
-/***********************************************************
-The following function is used to store data in four consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_to_4(
-/*============*/
- byte* b, /* in: pointer to four bytes where to store */
- ulint n) /* in: ulint integer to be stored */
-{
- ut_ad(b);
-
- b[0] = (byte)(n >> 24);
- b[1] = (byte)(n >> 16);
- b[2] = (byte)(n >> 8);
- b[3] = (byte)n;
-}
-
-/************************************************************
-The following function is used to fetch data from 4 consecutive
-bytes. The most significant byte is at the lowest address. */
-UNIV_INLINE
-ulint
-mach_read_from_4(
-/*=============*/
- /* out: ulint integer */
- byte* b) /* in: pointer to four bytes */
-{
- ut_ad(b);
- return( ((ulint)(b[0]) << 24)
- + ((ulint)(b[1]) << 16)
- + ((ulint)(b[2]) << 8)
- + (ulint)(b[3])
- );
-}
-
-/*************************************************************
-Writes a ulint in a compressed form where the first byte codes the
-length of the stored ulint. We look at the most significant bits of
-the byte. If the most significant bit is zero, it means 1-byte storage,
-else if the 2nd bit is 0, it means 2-byte storage, else if 3rd is 0,
-it means 3-byte storage, else if 4th is 0, it means 4-byte storage,
-else the storage is 5-byte. */
-UNIV_INLINE
-ulint
-mach_write_compressed(
-/*==================*/
- /* out: compressed size in bytes */
- byte* b, /* in: pointer to memory where to store */
- ulint n) /* in: ulint integer (< 2^32) to be stored */
-{
- ut_ad(b);
-
- if (n < 0x80UL) {
- mach_write_to_1(b, n);
- return(1);
- } else if (n < 0x4000UL) {
- mach_write_to_2(b, n | 0x8000UL);
- return(2);
- } else if (n < 0x200000UL) {
- mach_write_to_3(b, n | 0xC00000UL);
- return(3);
- } else if (n < 0x10000000UL) {
- mach_write_to_4(b, n | 0xE0000000UL);
- return(4);
- } else {
- mach_write_to_1(b, 0xF0UL);
- mach_write_to_4(b + 1, n);
- return(5);
- }
-}
-
-/*************************************************************
-Returns the size of a ulint when written in the compressed form. */
-UNIV_INLINE
-ulint
-mach_get_compressed_size(
-/*=====================*/
- /* out: compressed size in bytes */
- ulint n) /* in: ulint integer (< 2^32) to be stored */
-{
- if (n < 0x80UL) {
- return(1);
- } else if (n < 0x4000UL) {
- return(2);
- } else if (n < 0x200000UL) {
- return(3);
- } else if (n < 0x10000000UL) {
- return(4);
- } else {
- return(5);
- }
-}
-
-/*************************************************************
-Reads a ulint in a compressed form. */
-UNIV_INLINE
-ulint
-mach_read_compressed(
-/*=================*/
- /* out: read integer (< 2^32) */
- byte* b) /* in: pointer to memory from where to read */
-{
- ulint flag;
-
- ut_ad(b);
-
- flag = mach_read_from_1(b);
-
- if (flag < 0x80UL) {
- return(flag);
- } else if (flag < 0xC0UL) {
- return(mach_read_from_2(b) & 0x7FFFUL);
- } else if (flag < 0xE0UL) {
- return(mach_read_from_3(b) & 0x3FFFFFUL);
- } else if (flag < 0xF0UL) {
- return(mach_read_from_4(b) & 0x1FFFFFFFUL);
- } else {
- ut_ad(flag == 0xF0UL);
- return(mach_read_from_4(b + 1));
- }
-}
-
-/***********************************************************
-The following function is used to store data in 8 consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_to_8(
-/*============*/
- byte* b, /* in: pointer to 8 bytes where to store */
- dulint n) /* in: dulint integer to be stored */
-{
- ut_ad(b);
-
- mach_write_to_4(b, ut_dulint_get_high(n));
- mach_write_to_4(b + 4, ut_dulint_get_low(n));
-}
-
-/************************************************************
-The following function is used to fetch data from 8 consecutive
-bytes. The most significant byte is at the lowest address. */
-UNIV_INLINE
-dulint
-mach_read_from_8(
-/*=============*/
- /* out: dulint integer */
- byte* b) /* in: pointer to 8 bytes */
-{
- ulint high;
- ulint low;
-
- ut_ad(b);
-
- high = mach_read_from_4(b);
- low = mach_read_from_4(b + 4);
-
- return(ut_dulint_create(high, low));
-}
-
-/***********************************************************
-The following function is used to store data in 7 consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_to_7(
-/*============*/
- byte* b, /* in: pointer to 7 bytes where to store */
- dulint n) /* in: dulint integer to be stored */
-{
- ut_ad(b);
-
- mach_write_to_3(b, ut_dulint_get_high(n));
- mach_write_to_4(b + 3, ut_dulint_get_low(n));
-}
-
-/************************************************************
-The following function is used to fetch data from 7 consecutive
-bytes. The most significant byte is at the lowest address. */
-UNIV_INLINE
-dulint
-mach_read_from_7(
-/*=============*/
- /* out: dulint integer */
- byte* b) /* in: pointer to 7 bytes */
-{
- ulint high;
- ulint low;
-
- ut_ad(b);
-
- high = mach_read_from_3(b);
- low = mach_read_from_4(b + 3);
-
- return(ut_dulint_create(high, low));
-}
-
-/***********************************************************
-The following function is used to store data in 6 consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_to_6(
-/*============*/
- byte* b, /* in: pointer to 6 bytes where to store */
- dulint n) /* in: dulint integer to be stored */
-{
- ut_ad(b);
-
- mach_write_to_2(b, ut_dulint_get_high(n));
- mach_write_to_4(b + 2, ut_dulint_get_low(n));
-}
-
-/************************************************************
-The following function is used to fetch data from 6 consecutive
-bytes. The most significant byte is at the lowest address. */
-UNIV_INLINE
-dulint
-mach_read_from_6(
-/*=============*/
- /* out: dulint integer */
- byte* b) /* in: pointer to 7 bytes */
-{
- ulint high;
- ulint low;
-
- ut_ad(b);
-
- high = mach_read_from_2(b);
- low = mach_read_from_4(b + 2);
-
- return(ut_dulint_create(high, low));
-}
-
-/*************************************************************
-Writes a dulint in a compressed form (5..9 bytes). */
-UNIV_INLINE
-ulint
-mach_dulint_write_compressed(
-/*=========================*/
- /* out: size in bytes */
- byte* b, /* in: pointer to memory where to store */
- dulint n) /* in: dulint integer to be stored */
-{
- ulint size;
-
- ut_ad(b);
-
- size = mach_write_compressed(b, ut_dulint_get_high(n));
- mach_write_to_4(b + size, ut_dulint_get_low(n));
-
- return(size + 4);
-}
-
-/*************************************************************
-Returns the size of a dulint when written in the compressed form. */
-UNIV_INLINE
-ulint
-mach_dulint_get_compressed_size(
-/*============================*/
- /* out: compressed size in bytes */
- dulint n) /* in: dulint integer to be stored */
-{
- return(4 + mach_get_compressed_size(ut_dulint_get_high(n)));
-}
-
-/*************************************************************
-Reads a dulint in a compressed form. */
-UNIV_INLINE
-dulint
-mach_dulint_read_compressed(
-/*========================*/
- /* out: read dulint */
- byte* b) /* in: pointer to memory from where to read */
-{
- ulint high;
- ulint low;
- ulint size;
-
- ut_ad(b);
-
- high = mach_read_compressed(b);
-
- size = mach_get_compressed_size(high);
-
- low = mach_read_from_4(b + size);
-
- return(ut_dulint_create(high, low));
-}
-
-/*************************************************************
-Writes a dulint in a compressed form (1..11 bytes). */
-UNIV_INLINE
-ulint
-mach_dulint_write_much_compressed(
-/*==============================*/
- /* out: size in bytes */
- byte* b, /* in: pointer to memory where to store */
- dulint n) /* in: dulint integer to be stored */
-{
- ulint size;
-
- ut_ad(b);
-
- if (ut_dulint_get_high(n) == 0) {
- return(mach_write_compressed(b, ut_dulint_get_low(n)));
- }
-
- *b = (byte)0xFF;
- size = 1 + mach_write_compressed(b + 1, ut_dulint_get_high(n));
-
- size += mach_write_compressed(b + size, ut_dulint_get_low(n));
-
- return(size);
-}
-
-/*************************************************************
-Returns the size of a dulint when written in the compressed form. */
-UNIV_INLINE
-ulint
-mach_dulint_get_much_compressed_size(
-/*=================================*/
- /* out: compressed size in bytes */
- dulint n) /* in: dulint integer to be stored */
-{
- if (0 == ut_dulint_get_high(n)) {
- return(mach_get_compressed_size(ut_dulint_get_low(n)));
- }
-
- return(1 + mach_get_compressed_size(ut_dulint_get_high(n))
- + mach_get_compressed_size(ut_dulint_get_low(n)));
-}
-
-/*************************************************************
-Reads a dulint in a compressed form. */
-UNIV_INLINE
-dulint
-mach_dulint_read_much_compressed(
-/*=============================*/
- /* out: read dulint */
- byte* b) /* in: pointer to memory from where to read */
-{
- ulint high;
- ulint low;
- ulint size;
-
- ut_ad(b);
-
- if (*b != (byte)0xFF) {
- high = 0;
- size = 0;
- } else {
- high = mach_read_compressed(b + 1);
-
- size = 1 + mach_get_compressed_size(high);
- }
-
- low = mach_read_compressed(b + size);
-
- return(ut_dulint_create(high, low));
-}
-
-/*************************************************************
-Reads a double. It is stored in a little-endian format. */
-UNIV_INLINE
-double
-mach_double_read(
-/*=============*/
- /* out: double read */
- byte* b) /* in: pointer to memory from where to read */
-{
- double d;
- ulint i;
- byte* ptr;
-
- ptr = (byte*)&d;
-
- for (i = 0; i < sizeof(double); i++) {
-#ifdef WORDS_BIGENDIAN
- ptr[sizeof(double) - i - 1] = b[i];
-#else
- ptr[i] = b[i];
-#endif
- }
-
- return(d);
-}
-
-/*************************************************************
-Writes a double. It is stored in a little-endian format. */
-UNIV_INLINE
-void
-mach_double_write(
-/*==============*/
- byte* b, /* in: pointer to memory where to write */
- double d) /* in: double */
-{
- ulint i;
- byte* ptr;
-
- ptr = (byte*)&d;
-
- for (i = 0; i < sizeof(double); i++) {
-#ifdef WORDS_BIGENDIAN
- b[i] = ptr[sizeof(double) - i - 1];
-#else
- b[i] = ptr[i];
-#endif
- }
-}
-
-/*************************************************************
-Reads a float. It is stored in a little-endian format. */
-UNIV_INLINE
-float
-mach_float_read(
-/*============*/
- /* out: float read */
- byte* b) /* in: pointer to memory from where to read */
-{
- float d;
- ulint i;
- byte* ptr;
-
- ptr = (byte*)&d;
-
- for (i = 0; i < sizeof(float); i++) {
-#ifdef WORDS_BIGENDIAN
- ptr[sizeof(float) - i - 1] = b[i];
-#else
- ptr[i] = b[i];
-#endif
- }
-
- return(d);
-}
-
-/*************************************************************
-Writes a float. It is stored in a little-endian format. */
-UNIV_INLINE
-void
-mach_float_write(
-/*=============*/
- byte* b, /* in: pointer to memory where to write */
- float d) /* in: float */
-{
- ulint i;
- byte* ptr;
-
- ptr = (byte*)&d;
-
- for (i = 0; i < sizeof(float); i++) {
-#ifdef WORDS_BIGENDIAN
- b[i] = ptr[sizeof(float) - i - 1];
-#else
- b[i] = ptr[i];
-#endif
- }
-}
-
-/*************************************************************
-Reads a ulint stored in the little-endian format. */
-UNIV_INLINE
-ulint
-mach_read_from_n_little_endian(
-/*===========================*/
- /* out: unsigned long int */
- byte* buf, /* in: from where to read */
- ulint buf_size) /* in: from how many bytes to read */
-{
- ulint n = 0;
- byte* ptr;
-
- ut_ad(buf_size <= sizeof(ulint));
- ut_ad(buf_size > 0);
-
- ptr = buf + buf_size;
-
- for (;;) {
- ptr--;
-
- n = n << 8;
-
- n += (ulint)(*ptr);
-
- if (ptr == buf) {
- break;
- }
- }
-
- return(n);
-}
-
-/*************************************************************
-Writes a ulint in the little-endian format. */
-UNIV_INLINE
-void
-mach_write_to_n_little_endian(
-/*==========================*/
- byte* dest, /* in: where to write */
- ulint dest_size, /* in: into how many bytes to write */
- ulint n) /* in: unsigned long int to write */
-{
- byte* end;
-
- ut_ad(dest_size <= sizeof(ulint));
- ut_ad(dest_size > 0);
-
- end = dest + dest_size;
-
- for (;;) {
- *dest = (byte)(n & 0xFF);
-
- n = n >> 8;
-
- dest++;
-
- if (dest == end) {
- break;
- }
- }
-
- ut_ad(n == 0);
-}
-
-/*************************************************************
-Reads a ulint stored in the little-endian format. */
-UNIV_INLINE
-ulint
-mach_read_from_2_little_endian(
-/*===========================*/
- /* out: unsigned long int */
- byte* buf) /* in: from where to read */
-{
- return((ulint)(*buf) + ((ulint)(*(buf + 1))) * 256);
-}
-
-/*************************************************************
-Writes a ulint in the little-endian format. */
-UNIV_INLINE
-void
-mach_write_to_2_little_endian(
-/*==========================*/
- byte* dest, /* in: where to write */
- ulint n) /* in: unsigned long int to write */
-{
- ut_ad(n < 256 * 256);
-
- *dest = (byte)(n & 0xFFUL);
-
- n = n >> 8;
- dest++;
-
- *dest = (byte)(n & 0xFFUL);
-}
-
-/*************************************************************
-Convert integral type from storage byte order (big endian) to
-host byte order. */
-UNIV_INLINE
-ullint
-mach_read_int_type(
-/*===============*/
- /* out: integer value */
- const byte* src, /* in: where to read from */
- ulint len, /* in: length of src */
- ibool unsigned_type) /* in: signed or unsigned flag */
-{
- /* XXX this can be optimized on big-endian machines */
-
- ullint ret;
- uint i;
-
- if (unsigned_type || (src[0] & 0x80)) {
-
- ret = 0x0000000000000000ULL;
- } else {
-
- ret = 0xFFFFFFFFFFFFFF00ULL;
- }
-
- if (unsigned_type) {
-
- ret |= src[0];
- } else {
-
- ret |= src[0] ^ 0x80;
- }
-
- for (i = 1; i < len; i++) {
- ret <<= 8;
- ret |= src[i];
- }
-
- return(ret);
-}
diff --git a/storage/innobase/include/mem0dbg.h b/storage/innobase/include/mem0dbg.h
deleted file mode 100644
index 2393e4edb54..00000000000
--- a/storage/innobase/include/mem0dbg.h
+++ /dev/null
@@ -1,126 +0,0 @@
-/******************************************************
-The memory management: the debug code. This is not a compilation module,
-but is included in mem0mem.* !
-
-(c) 1994, 1995 Innobase Oy
-
-Created 6/9/1994 Heikki Tuuri
-*******************************************************/
-
-/* In the debug version each allocated field is surrounded with
-check fields whose sizes are given below */
-
-#ifdef UNIV_MEM_DEBUG
-#define MEM_FIELD_HEADER_SIZE ut_calc_align(2 * sizeof(ulint),\
- UNIV_MEM_ALIGNMENT)
-#define MEM_FIELD_TRAILER_SIZE sizeof(ulint)
-#else
-#define MEM_FIELD_HEADER_SIZE 0
-#endif
-
-
-/* Space needed when allocating for a user a field of
-length N. The space is allocated only in multiples of
-UNIV_MEM_ALIGNMENT. In the debug version there are also
-check fields at the both ends of the field. */
-#ifdef UNIV_MEM_DEBUG
-#define MEM_SPACE_NEEDED(N) ut_calc_align((N) + MEM_FIELD_HEADER_SIZE\
- + MEM_FIELD_TRAILER_SIZE, UNIV_MEM_ALIGNMENT)
-#else
-#define MEM_SPACE_NEEDED(N) ut_calc_align((N), UNIV_MEM_ALIGNMENT)
-#endif
-
-#if defined UNIV_MEM_DEBUG || defined UNIV_DEBUG
-/*******************************************************************
-Checks a memory heap for consistency and prints the contents if requested.
-Outputs the sum of sizes of buffers given to the user (only in
-the debug version), the physical size of the heap and the number of
-blocks in the heap. In case of error returns 0 as sizes and number
-of blocks. */
-
-void
-mem_heap_validate_or_print(
-/*=======================*/
- mem_heap_t* heap, /* in: memory heap */
- byte* top, /* in: calculate and validate only until
- this top pointer in the heap is reached,
- if this pointer is NULL, ignored */
- ibool print, /* in: if TRUE, prints the contents
- of the heap; works only in
- the debug version */
- ibool* error, /* out: TRUE if error */
- ulint* us_size,/* out: allocated memory
- (for the user) in the heap,
- if a NULL pointer is passed as this
- argument, it is ignored; in the
- non-debug version this is always -1 */
- ulint* ph_size,/* out: physical size of the heap,
- if a NULL pointer is passed as this
- argument, it is ignored */
- ulint* n_blocks); /* out: number of blocks in the heap,
- if a NULL pointer is passed as this
- argument, it is ignored */
-/******************************************************************
-Validates the contents of a memory heap. */
-
-ibool
-mem_heap_validate(
-/*==============*/
- /* out: TRUE if ok */
- mem_heap_t* heap); /* in: memory heap */
-#endif /* UNIV_MEM_DEBUG || UNIV_DEBUG */
-#ifdef UNIV_DEBUG
-/******************************************************************
-Checks that an object is a memory heap (or a block of it) */
-
-ibool
-mem_heap_check(
-/*===========*/
- /* out: TRUE if ok */
- mem_heap_t* heap); /* in: memory heap */
-#endif /* UNIV_DEBUG */
-#ifdef UNIV_MEM_DEBUG
-/*********************************************************************
-TRUE if no memory is currently allocated. */
-
-ibool
-mem_all_freed(void);
-/*===============*/
- /* out: TRUE if no heaps exist */
-/*********************************************************************
-Validates the dynamic memory */
-
-ibool
-mem_validate_no_assert(void);
-/*=========================*/
- /* out: TRUE if error */
-/****************************************************************
-Validates the dynamic memory */
-
-ibool
-mem_validate(void);
-/*===============*/
- /* out: TRUE if ok */
-#endif /* UNIV_MEM_DEBUG */
-/****************************************************************
-Tries to find neigboring memory allocation blocks and dumps to stderr
-the neighborhood of a given pointer. */
-
-void
-mem_analyze_corruption(
-/*===================*/
- void* ptr); /* in: pointer to place of possible corruption */
-/*********************************************************************
-Prints information of dynamic memory usage and currently allocated memory
-heaps or buffers. Can only be used in the debug version. */
-
-void
-mem_print_info(void);
-/*================*/
-/*********************************************************************
-Prints information of dynamic memory usage and currently allocated memory
-heaps or buffers since the last ..._print_info or..._print_new_info. */
-
-void
-mem_print_new_info(void);
-/*====================*/
diff --git a/storage/innobase/include/mem0dbg.ic b/storage/innobase/include/mem0dbg.ic
deleted file mode 100644
index e8a34adb3fa..00000000000
--- a/storage/innobase/include/mem0dbg.ic
+++ /dev/null
@@ -1,93 +0,0 @@
-/************************************************************************
-The memory management: the debug code. This is not an independent
-compilation module but is included in mem0mem.*.
-
-(c) 1994, 1995 Innobase Oy
-
-Created 6/8/1994 Heikki Tuuri
-*************************************************************************/
-
-#ifdef UNIV_MEM_DEBUG
-extern mutex_t mem_hash_mutex;
-extern ulint mem_current_allocated_memory;
-
-/**********************************************************************
-Initializes an allocated memory field in the debug version. */
-
-void
-mem_field_init(
-/*===========*/
- byte* buf, /* in: memory field */
- ulint n); /* in: how many bytes the user requested */
-/**********************************************************************
-Erases an allocated memory field in the debug version. */
-
-void
-mem_field_erase(
-/*============*/
- byte* buf, /* in: memory field */
- ulint n); /* in: how many bytes the user requested */
-/*******************************************************************
-Initializes a buffer to a random combination of hex BA and BE.
-Used to initialize allocated memory. */
-
-void
-mem_init_buf(
-/*=========*/
- byte* buf, /* in: pointer to buffer */
- ulint n); /* in: length of buffer */
-/*******************************************************************
-Initializes a buffer to a random combination of hex DE and AD.
-Used to erase freed memory.*/
-
-void
-mem_erase_buf(
-/*==========*/
- byte* buf, /* in: pointer to buffer */
- ulint n); /* in: length of buffer */
-/*******************************************************************
-Inserts a created memory heap to the hash table of
-current allocated memory heaps.
-Initializes the hash table when first called. */
-
-void
-mem_hash_insert(
-/*============*/
- mem_heap_t* heap, /* in: the created heap */
- const char* file_name, /* in: file name of creation */
- ulint line); /* in: line where created */
-/*******************************************************************
-Removes a memory heap (which is going to be freed by the caller)
-from the list of live memory heaps. Returns the size of the heap
-in terms of how much memory in bytes was allocated for the user of
-the heap (not the total space occupied by the heap).
-Also validates the heap.
-NOTE: This function does not free the storage occupied by the
-heap itself, only the node in the list of heaps. */
-
-void
-mem_hash_remove(
-/*============*/
- mem_heap_t* heap, /* in: the heap to be freed */
- const char* file_name, /* in: file name of freeing */
- ulint line); /* in: line where freed */
-
-
-void
-mem_field_header_set_len(byte* field, ulint len);
-
-ulint
-mem_field_header_get_len(byte* field);
-
-void
-mem_field_header_set_check(byte* field, ulint check);
-
-ulint
-mem_field_header_get_check(byte* field);
-
-void
-mem_field_trailer_set_check(byte* field, ulint check);
-
-ulint
-mem_field_trailer_get_check(byte* field);
-#endif /* UNIV_MEM_DEBUG */
diff --git a/storage/innobase/include/mem0mem.h b/storage/innobase/include/mem0mem.h
deleted file mode 100644
index 2d5fd1db6c3..00000000000
--- a/storage/innobase/include/mem0mem.h
+++ /dev/null
@@ -1,412 +0,0 @@
-/******************************************************
-The memory management
-
-(c) 1994, 1995 Innobase Oy
-
-Created 6/9/1994 Heikki Tuuri
-*******************************************************/
-
-#ifndef mem0mem_h
-#define mem0mem_h
-
-#include "univ.i"
-#include "ut0mem.h"
-#include "ut0byte.h"
-#include "ut0ut.h"
-#include "ut0rnd.h"
-#include "sync0sync.h"
-#include "ut0lst.h"
-#include "mach0data.h"
-
-/* -------------------- MEMORY HEAPS ----------------------------- */
-
-/* The info structure stored at the beginning of a heap block */
-typedef struct mem_block_info_struct mem_block_info_t;
-
-/* A block of a memory heap consists of the info structure
-followed by an area of memory */
-typedef mem_block_info_t mem_block_t;
-
-/* A memory heap is a nonempty linear list of memory blocks */
-typedef mem_block_t mem_heap_t;
-
-/* Types of allocation for memory heaps: DYNAMIC means allocation from the
-dynamic memory pool of the C compiler, BUFFER means allocation from the
-buffer pool; the latter method is used for very big heaps */
-
-#define MEM_HEAP_DYNAMIC 0 /* the most common type */
-#define MEM_HEAP_BUFFER 1
-#define MEM_HEAP_BTR_SEARCH 2 /* this flag can optionally be
- ORed to MEM_HEAP_BUFFER, in which
- case heap->free_block is used in
- some cases for memory allocations,
- and if it's NULL, the memory
- allocation functions can return
- NULL. */
-
-/* The following start size is used for the first block in the memory heap if
-the size is not specified, i.e., 0 is given as the parameter in the call of
-create. The standard size is the maximum (payload) size of the blocks used for
-allocations of small buffers. */
-
-#define MEM_BLOCK_START_SIZE 64
-#define MEM_BLOCK_STANDARD_SIZE 8000
-
-/* If a memory heap is allowed to grow into the buffer pool, the following
-is the maximum size for a single allocated buffer: */
-#define MEM_MAX_ALLOC_IN_BUF (UNIV_PAGE_SIZE - 200)
-
-/**********************************************************************
-Initializes the memory system. */
-
-void
-mem_init(
-/*=====*/
- ulint size); /* in: common pool size in bytes */
-/******************************************************************
-Use this macro instead of the corresponding function! Macro for memory
-heap creation. */
-
-#define mem_heap_create(N) mem_heap_create_func(\
- (N), NULL, MEM_HEAP_DYNAMIC, __FILE__, __LINE__)
-/******************************************************************
-Use this macro instead of the corresponding function! Macro for memory
-heap creation. */
-
-#define mem_heap_create_in_buffer(N) mem_heap_create_func(\
- (N), NULL, MEM_HEAP_BUFFER, __FILE__, __LINE__)
-/******************************************************************
-Use this macro instead of the corresponding function! Macro for memory
-heap creation. */
-
-#define mem_heap_create_in_btr_search(N) mem_heap_create_func(\
- (N), NULL, MEM_HEAP_BTR_SEARCH | MEM_HEAP_BUFFER,\
- __FILE__, __LINE__)
-/******************************************************************
-Use this macro instead of the corresponding function! Macro for fast
-memory heap creation. An initial block of memory B is given by the
-caller, N is its size, and this memory block is not freed by
-mem_heap_free. See the parameter comment in mem_heap_create_func below. */
-
-#define mem_heap_fast_create(N, B) mem_heap_create_func(\
- (N), (B), MEM_HEAP_DYNAMIC, __FILE__, __LINE__)
-
-/******************************************************************
-Use this macro instead of the corresponding function! Macro for memory
-heap freeing. */
-
-#define mem_heap_free(heap) mem_heap_free_func(\
- (heap), __FILE__, __LINE__)
-/*********************************************************************
-NOTE: Use the corresponding macros instead of this function. Creates a
-memory heap. For debugging purposes, takes also the file name and line as
-arguments. */
-UNIV_INLINE
-mem_heap_t*
-mem_heap_create_func(
-/*=================*/
- /* out, own: memory heap, NULL if
- did not succeed (only possible for
- MEM_HEAP_BTR_SEARCH type heaps)*/
- ulint n, /* in: desired start block size,
- this means that a single user buffer
- of size n will fit in the block,
- 0 creates a default size block;
- if init_block is not NULL, n tells
- its size in bytes */
- void* init_block, /* in: if very fast creation is
- wanted, the caller can reserve some
- memory from its stack, for example,
- and pass it as the the initial block
- to the heap: then no OS call of malloc
- is needed at the creation. CAUTION:
- the caller must make sure the initial
- block is not unintentionally erased
- (if allocated in the stack), before
- the memory heap is explicitly freed. */
- ulint type, /* in: heap type */
- const char* file_name, /* in: file name where created */
- ulint line); /* in: line where created */
-/*********************************************************************
-NOTE: Use the corresponding macro instead of this function. Frees the space
-occupied by a memory heap. In the debug version erases the heap memory
-blocks. */
-UNIV_INLINE
-void
-mem_heap_free_func(
-/*===============*/
- mem_heap_t* heap, /* in, own: heap to be freed */
- const char* file_name, /* in: file name where freed */
- ulint line); /* in: line where freed */
-/*******************************************************************
-Allocates n bytes of memory from a memory heap. */
-UNIV_INLINE
-void*
-mem_heap_alloc(
-/*===========*/
- /* out: allocated storage, NULL if did not
- succeed (only possible for
- MEM_HEAP_BTR_SEARCH type heaps) */
- mem_heap_t* heap, /* in: memory heap */
- ulint n); /* in: number of bytes; if the heap is allowed
- to grow into the buffer pool, this must be
- <= MEM_MAX_ALLOC_IN_BUF */
-/*********************************************************************
-Returns a pointer to the heap top. */
-UNIV_INLINE
-byte*
-mem_heap_get_heap_top(
-/*==================*/
- /* out: pointer to the heap top */
- mem_heap_t* heap); /* in: memory heap */
-/*********************************************************************
-Frees the space in a memory heap exceeding the pointer given. The
-pointer must have been acquired from mem_heap_get_heap_top. The first
-memory block of the heap is not freed. */
-UNIV_INLINE
-void
-mem_heap_free_heap_top(
-/*===================*/
- mem_heap_t* heap, /* in: heap from which to free */
- byte* old_top);/* in: pointer to old top of heap */
-/*********************************************************************
-Empties a memory heap. The first memory block of the heap is not freed. */
-UNIV_INLINE
-void
-mem_heap_empty(
-/*===========*/
- mem_heap_t* heap); /* in: heap to empty */
-/*********************************************************************
-Returns a pointer to the topmost element in a memory heap.
-The size of the element must be given. */
-UNIV_INLINE
-void*
-mem_heap_get_top(
-/*=============*/
- /* out: pointer to the topmost element */
- mem_heap_t* heap, /* in: memory heap */
- ulint n); /* in: size of the topmost element */
-/*********************************************************************
-Frees the topmost element in a memory heap.
-The size of the element must be given. */
-UNIV_INLINE
-void
-mem_heap_free_top(
-/*==============*/
- mem_heap_t* heap, /* in: memory heap */
- ulint n); /* in: size of the topmost element */
-/*********************************************************************
-Returns the space in bytes occupied by a memory heap. */
-UNIV_INLINE
-ulint
-mem_heap_get_size(
-/*==============*/
- mem_heap_t* heap); /* in: heap */
-/******************************************************************
-Use this macro instead of the corresponding function!
-Macro for memory buffer allocation */
-
-#define mem_alloc(N) mem_alloc_func((N), __FILE__, __LINE__)
-/******************************************************************
-Use this macro instead of the corresponding function!
-Macro for memory buffer allocation */
-
-#define mem_alloc_noninline(N) mem_alloc_func_noninline(\
- (N), __FILE__, __LINE__)
-/*******************************************************************
-NOTE: Use the corresponding macro instead of this function.
-Allocates a single buffer of memory from the dynamic memory of
-the C compiler. Is like malloc of C. The buffer must be freed
-with mem_free. */
-UNIV_INLINE
-void*
-mem_alloc_func(
-/*===========*/
- /* out, own: free storage */
- ulint n, /* in: desired number of bytes */
- const char* file_name, /* in: file name where created */
- ulint line /* in: line where created */
-);
-/*******************************************************************
-NOTE: Use the corresponding macro instead of this function.
-Allocates a single buffer of memory from the dynamic memory of
-the C compiler. Is like malloc of C. The buffer must be freed
-with mem_free. */
-
-void*
-mem_alloc_func_noninline(
-/*=====================*/
- /* out, own: free storage */
- ulint n, /* in: desired number of bytes */
- const char* file_name, /* in: file name where created */
- ulint line /* in: line where created */
- );
-/******************************************************************
-Use this macro instead of the corresponding function!
-Macro for memory buffer freeing */
-
-#define mem_free(PTR) mem_free_func((PTR), __FILE__, __LINE__)
-/*******************************************************************
-NOTE: Use the corresponding macro instead of this function.
-Frees a single buffer of storage from
-the dynamic memory of C compiler. Similar to free of C. */
-UNIV_INLINE
-void
-mem_free_func(
-/*==========*/
- void* ptr, /* in, own: buffer to be freed */
- const char* file_name, /* in: file name where created */
- ulint line /* in: line where created */
-);
-
-/**************************************************************************
-Duplicates a NUL-terminated string. */
-UNIV_INLINE
-char*
-mem_strdup(
-/*=======*/
- /* out, own: a copy of the string,
- must be deallocated with mem_free */
- const char* str); /* in: string to be copied */
-/**************************************************************************
-Makes a NUL-terminated copy of a nonterminated string. */
-UNIV_INLINE
-char*
-mem_strdupl(
-/*========*/
- /* out, own: a copy of the string,
- must be deallocated with mem_free */
- const char* str, /* in: string to be copied */
- ulint len); /* in: length of str, in bytes */
-
-/**************************************************************************
-Duplicates a NUL-terminated string, allocated from a memory heap. */
-
-char*
-mem_heap_strdup(
-/*============*/
- /* out, own: a copy of the string */
- mem_heap_t* heap, /* in: memory heap where string is allocated */
- const char* str); /* in: string to be copied */
-/**************************************************************************
-Makes a NUL-terminated copy of a nonterminated string,
-allocated from a memory heap. */
-UNIV_INLINE
-char*
-mem_heap_strdupl(
-/*=============*/
- /* out, own: a copy of the string */
- mem_heap_t* heap, /* in: memory heap where string is allocated */
- const char* str, /* in: string to be copied */
- ulint len); /* in: length of str, in bytes */
-
-/**************************************************************************
-Concatenate two strings and return the result, using a memory heap. */
-
-char*
-mem_heap_strcat(
-/*============*/
- /* out, own: the result */
- mem_heap_t* heap, /* in: memory heap where string is allocated */
- const char* s1, /* in: string 1 */
- const char* s2); /* in: string 2 */
-
-/**************************************************************************
-Duplicate a block of data, allocated from a memory heap. */
-
-void*
-mem_heap_dup(
-/*=========*/
- /* out, own: a copy of the data */
- mem_heap_t* heap, /* in: memory heap where copy is allocated */
- const void* data, /* in: data to be copied */
- ulint len); /* in: length of data, in bytes */
-
-/**************************************************************************
-Concatenate two memory blocks and return the result, using a memory heap. */
-
-void*
-mem_heap_cat(
-/*=========*/
- /* out, own: the result */
- mem_heap_t* heap, /* in: memory heap where result is allocated */
- const void* b1, /* in: block 1 */
- ulint len1, /* in: length of b1, in bytes */
- const void* b2, /* in: block 2 */
- ulint len2); /* in: length of b2, in bytes */
-
-/********************************************************************
-A simple (s)printf replacement that dynamically allocates the space for the
-formatted string from the given heap. This supports a very limited set of
-the printf syntax: types 's' and 'u' and length modifier 'l' (which is
-required for the 'u' type). */
-
-char*
-mem_heap_printf(
-/*============*/
- /* out: heap-allocated formatted string */
- mem_heap_t* heap, /* in: memory heap */
- const char* format, /* in: format string */
- ...) __attribute__ ((format (printf, 2, 3)));
-
-#ifdef MEM_PERIODIC_CHECK
-/**********************************************************************
-Goes through the list of all allocated mem blocks, checks their magic
-numbers, and reports possible corruption. */
-
-void
-mem_validate_all_blocks(void);
-/*=========================*/
-#endif
-
-/*#######################################################################*/
-
-/* The info header of a block in a memory heap */
-
-struct mem_block_info_struct {
- ulint magic_n;/* magic number for debugging */
- char file_name[8];/* file name where the mem heap was created */
- ulint line; /* line number where the mem heap was created */
- UT_LIST_BASE_NODE_T(mem_block_t) base; /* In the first block in the
- the list this is the base node of the list of blocks;
- in subsequent blocks this is undefined */
- UT_LIST_NODE_T(mem_block_t) list; /* This contains pointers to next
- and prev in the list. The first block allocated
- to the heap is also the first block in this list,
- though it also contains the base node of the list. */
- ulint len; /* physical length of this block in bytes */
- ulint type; /* type of heap: MEM_HEAP_DYNAMIC, or
- MEM_HEAP_BUF possibly ORed to MEM_HEAP_BTR_SEARCH */
- ibool init_block; /* TRUE if this is the first block used in fast
- creation of a heap: the memory will be freed
- by the creator, not by mem_heap_free */
- ulint free; /* offset in bytes of the first free position for
- user data in the block */
- ulint start; /* the value of the struct field 'free' at the
- creation of the block */
- byte* free_block;
- /* if the MEM_HEAP_BTR_SEARCH bit is set in type,
- and this is the heap root, this can contain an
- allocated buffer frame, which can be appended as a
- free block to the heap, if we need more space;
- otherwise, this is NULL */
-#ifdef MEM_PERIODIC_CHECK
- UT_LIST_NODE_T(mem_block_t) mem_block_list;
- /* List of all mem blocks allocated; protected
- by the mem_comm_pool mutex */
-#endif
-};
-
-#define MEM_BLOCK_MAGIC_N 764741555
-#define MEM_FREED_BLOCK_MAGIC_N 547711122
-
-/* Header size for a memory heap block */
-#define MEM_BLOCK_HEADER_SIZE ut_calc_align(sizeof(mem_block_info_t),\
- UNIV_MEM_ALIGNMENT)
-#include "mem0dbg.h"
-
-#ifndef UNIV_NONINL
-#include "mem0mem.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/mem0mem.ic b/storage/innobase/include/mem0mem.ic
deleted file mode 100644
index 6227a27f277..00000000000
--- a/storage/innobase/include/mem0mem.ic
+++ /dev/null
@@ -1,619 +0,0 @@
-/************************************************************************
-The memory management
-
-(c) 1994, 1995 Innobase Oy
-
-Created 6/8/1994 Heikki Tuuri
-*************************************************************************/
-
-#include "mem0dbg.ic"
-
-#include "mem0pool.h"
-
-/*******************************************************************
-Creates a memory heap block where data can be allocated. */
-
-mem_block_t*
-mem_heap_create_block(
-/*==================*/
- /* out, own: memory heap block, NULL if
- did not succeed (only possible for
- MEM_HEAP_BTR_SEARCH type heaps) */
- mem_heap_t* heap, /* in: memory heap or NULL if first block
- should be created */
- ulint n, /* in: number of bytes needed for user data, or
- if init_block is not NULL, its size in bytes */
- void* init_block, /* in: init block in fast create,
- type must be MEM_HEAP_DYNAMIC */
- ulint type, /* in: type of heap: MEM_HEAP_DYNAMIC or
- MEM_HEAP_BUFFER */
- const char* file_name,/* in: file name where created */
- ulint line); /* in: line where created */
-/**********************************************************************
-Frees a block from a memory heap. */
-
-void
-mem_heap_block_free(
-/*================*/
- mem_heap_t* heap, /* in: heap */
- mem_block_t* block); /* in: block to free */
-/**********************************************************************
-Frees the free_block field from a memory heap. */
-
-void
-mem_heap_free_block_free(
-/*=====================*/
- mem_heap_t* heap); /* in: heap */
-/*******************************************************************
-Adds a new block to a memory heap. */
-
-mem_block_t*
-mem_heap_add_block(
-/*===============*/
- /* out: created block, NULL if did not
- succeed (only possible for
- MEM_HEAP_BTR_SEARCH type heaps)*/
- mem_heap_t* heap, /* in: memory heap */
- ulint n); /* in: number of bytes user needs */
-
-UNIV_INLINE
-void
-mem_block_set_len(mem_block_t* block, ulint len)
-{
- ut_ad(len > 0);
-
- block->len = len;
-}
-
-UNIV_INLINE
-ulint
-mem_block_get_len(mem_block_t* block)
-{
- return(block->len);
-}
-
-UNIV_INLINE
-void
-mem_block_set_type(mem_block_t* block, ulint type)
-{
- ut_ad((type == MEM_HEAP_DYNAMIC) || (type == MEM_HEAP_BUFFER)
- || (type == MEM_HEAP_BUFFER + MEM_HEAP_BTR_SEARCH));
-
- block->type = type;
-}
-
-UNIV_INLINE
-ulint
-mem_block_get_type(mem_block_t* block)
-{
- return(block->type);
-}
-
-UNIV_INLINE
-void
-mem_block_set_free(mem_block_t* block, ulint free)
-{
- ut_ad(free > 0);
- ut_ad(free <= mem_block_get_len(block));
-
- block->free = free;
-}
-
-UNIV_INLINE
-ulint
-mem_block_get_free(mem_block_t* block)
-{
- return(block->free);
-}
-
-UNIV_INLINE
-void
-mem_block_set_start(mem_block_t* block, ulint start)
-{
- ut_ad(start > 0);
-
- block->start = start;
-}
-
-UNIV_INLINE
-ulint
-mem_block_get_start(mem_block_t* block)
-{
- return(block->start);
-}
-
-/*******************************************************************
-Allocates n bytes of memory from a memory heap. */
-UNIV_INLINE
-void*
-mem_heap_alloc(
-/*===========*/
- /* out: allocated storage, NULL if did not
- succeed (only possible for
- MEM_HEAP_BTR_SEARCH type heaps) */
- mem_heap_t* heap, /* in: memory heap */
- ulint n) /* in: number of bytes; if the heap is allowed
- to grow into the buffer pool, this must be
- <= MEM_MAX_ALLOC_IN_BUF */
-{
- mem_block_t* block;
- void* buf;
- ulint free;
-
- ut_ad(mem_heap_check(heap));
-
- block = UT_LIST_GET_LAST(heap->base);
-
- ut_ad(!(block->type & MEM_HEAP_BUFFER) || (n <= MEM_MAX_ALLOC_IN_BUF));
-
- /* Check if there is enough space in block. If not, create a new
- block to the heap */
-
- if (mem_block_get_len(block)
- < mem_block_get_free(block) + MEM_SPACE_NEEDED(n)) {
-
- block = mem_heap_add_block(heap, n);
-
- if (block == NULL) {
-
- return(NULL);
- }
- }
-
- free = mem_block_get_free(block);
-
- buf = (byte*)block + free;
-
- mem_block_set_free(block, free + MEM_SPACE_NEEDED(n));
-
-#ifdef UNIV_MEM_DEBUG
- UNIV_MEM_ALLOC(buf,
- n + MEM_FIELD_HEADER_SIZE + MEM_FIELD_TRAILER_SIZE);
-
- /* In the debug version write debugging info to the field */
- mem_field_init((byte*)buf, n);
-
- /* Advance buf to point at the storage which will be given to the
- caller */
- buf = (byte*)buf + MEM_FIELD_HEADER_SIZE;
-
-#endif
-#ifdef UNIV_SET_MEM_TO_ZERO
- UNIV_MEM_ALLOC(buf, n);
- memset(buf, '\0', n);
-#endif
- UNIV_MEM_ALLOC(buf, n);
- return(buf);
-}
-
-/*********************************************************************
-Returns a pointer to the heap top. */
-UNIV_INLINE
-byte*
-mem_heap_get_heap_top(
-/*==================*/
- /* out: pointer to the heap top */
- mem_heap_t* heap) /* in: memory heap */
-{
- mem_block_t* block;
- byte* buf;
-
- ut_ad(mem_heap_check(heap));
-
- block = UT_LIST_GET_LAST(heap->base);
-
- buf = (byte*)block + mem_block_get_free(block);
-
- return(buf);
-}
-
-/*********************************************************************
-Frees the space in a memory heap exceeding the pointer given. The
-pointer must have been acquired from mem_heap_get_heap_top. The first
-memory block of the heap is not freed. */
-UNIV_INLINE
-void
-mem_heap_free_heap_top(
-/*===================*/
- mem_heap_t* heap, /* in: heap from which to free */
- byte* old_top)/* in: pointer to old top of heap */
-{
- mem_block_t* block;
- mem_block_t* prev_block;
-#ifdef UNIV_MEM_DEBUG
- ibool error;
- ulint total_size;
- ulint size;
-#endif
-
- ut_ad(mem_heap_check(heap));
-
-#ifdef UNIV_MEM_DEBUG
-
- /* Validate the heap and get its total allocated size */
- mem_heap_validate_or_print(heap, NULL, FALSE, &error, &total_size,
- NULL, NULL);
- ut_a(!error);
-
- /* Get the size below top pointer */
- mem_heap_validate_or_print(heap, old_top, FALSE, &error, &size, NULL,
- NULL);
- ut_a(!error);
-
-#endif
-
- block = UT_LIST_GET_LAST(heap->base);
-
- while (block != NULL) {
- if (((byte*)block + mem_block_get_free(block) >= old_top)
- && ((byte*)block <= old_top)) {
- /* Found the right block */
-
- break;
- }
-
- /* Store prev_block value before freeing the current block
- (the current block will be erased in freeing) */
-
- prev_block = UT_LIST_GET_PREV(list, block);
-
- mem_heap_block_free(heap, block);
-
- block = prev_block;
- }
-
- ut_ad(block);
-
- /* Set the free field of block */
- mem_block_set_free(block, old_top - (byte*)block);
-
-#ifdef UNIV_MEM_DEBUG
- ut_ad(mem_block_get_start(block) <= mem_block_get_free(block));
-
- /* In the debug version erase block from top up */
- mem_erase_buf(old_top, (byte*)block + block->len - old_top);
-
- /* Update allocated memory count */
- mutex_enter(&mem_hash_mutex);
- mem_current_allocated_memory -= (total_size - size);
- mutex_exit(&mem_hash_mutex);
-#else /* UNIV_MEM_DEBUG */
- UNIV_MEM_ASSERT_W(old_top, (byte*)block + block->len - old_top);
-#endif /* UNIV_MEM_DEBUG */
- UNIV_MEM_ALLOC(old_top, (byte*)block + block->len - old_top);
-
- /* If free == start, we may free the block if it is not the first
- one */
-
- if ((heap != block) && (mem_block_get_free(block)
- == mem_block_get_start(block))) {
- mem_heap_block_free(heap, block);
- }
-}
-
-/*********************************************************************
-Empties a memory heap. The first memory block of the heap is not freed. */
-UNIV_INLINE
-void
-mem_heap_empty(
-/*===========*/
- mem_heap_t* heap) /* in: heap to empty */
-{
- mem_heap_free_heap_top(heap, (byte*)heap + mem_block_get_start(heap));
-
- if (heap->free_block) {
- mem_heap_free_block_free(heap);
- }
-}
-
-/*********************************************************************
-Returns a pointer to the topmost element in a memory heap. The size of the
-element must be given. */
-UNIV_INLINE
-void*
-mem_heap_get_top(
-/*=============*/
- /* out: pointer to the topmost element */
- mem_heap_t* heap, /* in: memory heap */
- ulint n) /* in: size of the topmost element */
-{
- mem_block_t* block;
- void* buf;
-
- ut_ad(mem_heap_check(heap));
-
- block = UT_LIST_GET_LAST(heap->base);
-
- buf = (byte*)block + mem_block_get_free(block) - MEM_SPACE_NEEDED(n);
-
-#ifdef UNIV_MEM_DEBUG
- ut_ad(mem_block_get_start(block) <=(ulint)((byte*)buf - (byte*)block));
-
- /* In the debug version, advance buf to point at the storage which
- was given to the caller in the allocation*/
-
- buf = (byte*)buf + MEM_FIELD_HEADER_SIZE;
-
- /* Check that the field lengths agree */
- ut_ad(n == (ulint)mem_field_header_get_len(buf));
-#endif
-
- return(buf);
-}
-
-/*********************************************************************
-Frees the topmost element in a memory heap. The size of the element must be
-given. */
-UNIV_INLINE
-void
-mem_heap_free_top(
-/*==============*/
- mem_heap_t* heap, /* in: memory heap */
- ulint n) /* in: size of the topmost element */
-{
- mem_block_t* block;
-
- ut_ad(mem_heap_check(heap));
-
- block = UT_LIST_GET_LAST(heap->base);
-
- /* Subtract the free field of block */
- mem_block_set_free(block, mem_block_get_free(block)
- - MEM_SPACE_NEEDED(n));
- UNIV_MEM_ASSERT_W((byte*) block + mem_block_get_free(block), n);
-#ifdef UNIV_MEM_DEBUG
-
- ut_ad(mem_block_get_start(block) <= mem_block_get_free(block));
-
- /* In the debug version check the consistency, and erase field */
- mem_field_erase((byte*)block + mem_block_get_free(block), n);
-#endif
-
- /* If free == start, we may free the block if it is not the first
- one */
-
- if ((heap != block) && (mem_block_get_free(block)
- == mem_block_get_start(block))) {
- mem_heap_block_free(heap, block);
- } else {
- /* Avoid a bogus UNIV_MEM_ASSERT_W() warning in a
- subsequent invocation of mem_heap_free_top().
- Originally, this was UNIV_MEM_FREE(), to catch writes
- to freed memory. */
- UNIV_MEM_ALLOC((byte*) block + mem_block_get_free(block), n);
- }
-}
-
-/*********************************************************************
-NOTE: Use the corresponding macros instead of this function. Creates a
-memory heap. For debugging purposes, takes also the file name and line as
-argument. */
-UNIV_INLINE
-mem_heap_t*
-mem_heap_create_func(
-/*=================*/
- /* out, own: memory heap, NULL if
- did not succeed (only possible for
- MEM_HEAP_BTR_SEARCH type heaps)*/
- ulint n, /* in: desired start block size,
- this means that a single user buffer
- of size n will fit in the block,
- 0 creates a default size block;
- if init_block is not NULL, n tells
- its size in bytes */
- void* init_block, /* in: if very fast creation is
- wanted, the caller can reserve some
- memory from its stack, for example,
- and pass it as the the initial block
- to the heap: then no OS call of malloc
- is needed at the creation. CAUTION:
- the caller must make sure the initial
- block is not unintentionally erased
- (if allocated in the stack), before
- the memory heap is explicitly freed. */
- ulint type, /* in: heap type */
- const char* file_name, /* in: file name where created */
- ulint line) /* in: line where created */
-{
- mem_block_t* block;
-
- if (n > 0) {
- block = mem_heap_create_block(NULL, n, init_block, type,
- file_name, line);
- } else {
- block = mem_heap_create_block(NULL, MEM_BLOCK_START_SIZE,
- init_block, type,
- file_name, line);
- }
-
- if (block == NULL) {
-
- return(NULL);
- }
-
- UT_LIST_INIT(block->base);
-
- /* Add the created block itself as the first block in the list */
- UT_LIST_ADD_FIRST(list, block->base, block);
-
-#ifdef UNIV_MEM_DEBUG
-
- mem_hash_insert(block, file_name, line);
-
-#endif
-
- return(block);
-}
-
-/*********************************************************************
-NOTE: Use the corresponding macro instead of this function. Frees the space
-occupied by a memory heap. In the debug version erases the heap memory
-blocks. */
-UNIV_INLINE
-void
-mem_heap_free_func(
-/*===============*/
- mem_heap_t* heap, /* in, own: heap to be freed */
- const char* file_name __attribute__((unused)),
- /* in: file name where freed */
- ulint line __attribute__((unused)))
-{
- mem_block_t* block;
- mem_block_t* prev_block;
-
- ut_ad(mem_heap_check(heap));
-
- block = UT_LIST_GET_LAST(heap->base);
-
-#ifdef UNIV_MEM_DEBUG
-
- /* In the debug version remove the heap from the hash table of heaps
- and check its consistency */
-
- mem_hash_remove(heap, file_name, line);
-
-#endif
-
- if (heap->free_block) {
- mem_heap_free_block_free(heap);
- }
-
- while (block != NULL) {
- /* Store the contents of info before freeing current block
- (it is erased in freeing) */
-
- prev_block = UT_LIST_GET_PREV(list, block);
-
- mem_heap_block_free(heap, block);
-
- block = prev_block;
- }
-}
-
-/*******************************************************************
-NOTE: Use the corresponding macro instead of this function.
-Allocates a single buffer of memory from the dynamic memory of
-the C compiler. Is like malloc of C. The buffer must be freed
-with mem_free. */
-UNIV_INLINE
-void*
-mem_alloc_func(
-/*===========*/
- /* out, own: free storage */
- ulint n, /* in: desired number of bytes */
- const char* file_name, /* in: file name where created */
- ulint line /* in: line where created */
- )
-{
- mem_heap_t* heap;
- void* buf;
-
- heap = mem_heap_create_func(n, NULL, MEM_HEAP_DYNAMIC, file_name,
- line);
-
- /* Note that as we created the first block in the heap big enough
- for the buffer requested by the caller, the buffer will be in the
- first block and thus we can calculate the pointer to the heap from
- the pointer to the buffer when we free the memory buffer. */
-
- buf = mem_heap_alloc(heap, n);
-
- ut_a((byte*)heap == (byte*)buf - MEM_BLOCK_HEADER_SIZE
- - MEM_FIELD_HEADER_SIZE);
- return(buf);
-}
-
-/*******************************************************************
-NOTE: Use the corresponding macro instead of this function. Frees a single
-buffer of storage from the dynamic memory of the C compiler. Similar to the
-free of C. */
-UNIV_INLINE
-void
-mem_free_func(
-/*==========*/
- void* ptr, /* in, own: buffer to be freed */
- const char* file_name, /* in: file name where created */
- ulint line /* in: line where created */
- )
-{
- mem_heap_t* heap;
-
- heap = (mem_heap_t*)((byte*)ptr - MEM_BLOCK_HEADER_SIZE
- - MEM_FIELD_HEADER_SIZE);
- mem_heap_free_func(heap, file_name, line);
-}
-
-/*********************************************************************
-Returns the space in bytes occupied by a memory heap. */
-UNIV_INLINE
-ulint
-mem_heap_get_size(
-/*==============*/
- mem_heap_t* heap) /* in: heap */
-{
- mem_block_t* block;
- ulint size = 0;
-
- ut_ad(mem_heap_check(heap));
-
- block = heap;
-
- while (block != NULL) {
-
- size += mem_block_get_len(block);
- block = UT_LIST_GET_NEXT(list, block);
- }
-
- if (heap->free_block) {
- size += UNIV_PAGE_SIZE;
- }
-
- return(size);
-}
-
-/**************************************************************************
-Duplicates a NUL-terminated string. */
-UNIV_INLINE
-char*
-mem_strdup(
-/*=======*/
- /* out, own: a copy of the string,
- must be deallocated with mem_free */
- const char* str) /* in: string to be copied */
-{
- ulint len = strlen(str) + 1;
- return(memcpy(mem_alloc(len), str, len));
-}
-
-/**************************************************************************
-Makes a NUL-terminated copy of a nonterminated string. */
-UNIV_INLINE
-char*
-mem_strdupl(
-/*========*/
- /* out, own: a copy of the string,
- must be deallocated with mem_free */
- const char* str, /* in: string to be copied */
- ulint len) /* in: length of str, in bytes */
-{
- char* s = mem_alloc(len + 1);
- s[len] = 0;
- return(memcpy(s, str, len));
-}
-
-/**************************************************************************
-Makes a NUL-terminated copy of a nonterminated string,
-allocated from a memory heap. */
-UNIV_INLINE
-char*
-mem_heap_strdupl(
-/*=============*/
- /* out, own: a copy of the string */
- mem_heap_t* heap, /* in: memory heap where string is allocated */
- const char* str, /* in: string to be copied */
- ulint len) /* in: length of str, in bytes */
-{
- char* s = mem_heap_alloc(heap, len + 1);
- s[len] = 0;
- return(memcpy(s, str, len));
-}
diff --git a/storage/innobase/include/mem0pool.h b/storage/innobase/include/mem0pool.h
deleted file mode 100644
index bf659ca9a72..00000000000
--- a/storage/innobase/include/mem0pool.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/******************************************************
-The lowest-level memory management
-
-(c) 1994, 1995 Innobase Oy
-
-Created 6/9/1994 Heikki Tuuri
-*******************************************************/
-
-#ifndef mem0pool_h
-#define mem0pool_h
-
-#include "univ.i"
-#include "os0file.h"
-#include "ut0lst.h"
-
-typedef struct mem_area_struct mem_area_t;
-typedef struct mem_pool_struct mem_pool_t;
-
-/* The common memory pool */
-extern mem_pool_t* mem_comm_pool;
-
-/* Memory area header */
-
-struct mem_area_struct{
- ulint size_and_free; /* memory area size is obtained by
- anding with ~MEM_AREA_FREE; area in
- a free list if ANDing with
- MEM_AREA_FREE results in nonzero */
- UT_LIST_NODE_T(mem_area_t)
- free_list; /* free list node */
-};
-
-/* Each memory area takes this many extra bytes for control information */
-#define MEM_AREA_EXTRA_SIZE (ut_calc_align(sizeof(struct mem_area_struct),\
- UNIV_MEM_ALIGNMENT))
-
-/************************************************************************
-Creates a memory pool. */
-
-mem_pool_t*
-mem_pool_create(
-/*============*/
- /* out: memory pool */
- ulint size); /* in: pool size in bytes */
-/************************************************************************
-Allocates memory from a pool. NOTE: This low-level function should only be
-used in mem0mem.*! */
-
-void*
-mem_area_alloc(
-/*===========*/
- /* out, own: allocated memory buffer */
- ulint size, /* in: allocated size in bytes; for optimum
- space usage, the size should be a power of 2
- minus MEM_AREA_EXTRA_SIZE */
- mem_pool_t* pool); /* in: memory pool */
-/************************************************************************
-Frees memory to a pool. */
-
-void
-mem_area_free(
-/*==========*/
- void* ptr, /* in, own: pointer to allocated memory
- buffer */
- mem_pool_t* pool); /* in: memory pool */
-/************************************************************************
-Returns the amount of reserved memory. */
-
-ulint
-mem_pool_get_reserved(
-/*==================*/
- /* out: reserved mmeory in bytes */
- mem_pool_t* pool); /* in: memory pool */
-/************************************************************************
-Reserves the mem pool mutex. */
-
-void
-mem_pool_mutex_enter(void);
-/*======================*/
-/************************************************************************
-Releases the mem pool mutex. */
-
-void
-mem_pool_mutex_exit(void);
-/*=====================*/
-/************************************************************************
-Validates a memory pool. */
-
-ibool
-mem_pool_validate(
-/*==============*/
- /* out: TRUE if ok */
- mem_pool_t* pool); /* in: memory pool */
-/************************************************************************
-Prints info of a memory pool. */
-
-void
-mem_pool_print_info(
-/*================*/
- FILE* outfile,/* in: output file to write to */
- mem_pool_t* pool); /* in: memory pool */
-
-
-#ifndef UNIV_NONINL
-#include "mem0pool.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/mem0pool.ic b/storage/innobase/include/mem0pool.ic
deleted file mode 100644
index 4e8c08733ed..00000000000
--- a/storage/innobase/include/mem0pool.ic
+++ /dev/null
@@ -1,7 +0,0 @@
-/************************************************************************
-The lowest-level memory management
-
-(c) 1994, 1995 Innobase Oy
-
-Created 6/8/1994 Heikki Tuuri
-*************************************************************************/
diff --git a/storage/innobase/include/mtr0log.h b/storage/innobase/include/mtr0log.h
deleted file mode 100644
index 6a3920aa8a1..00000000000
--- a/storage/innobase/include/mtr0log.h
+++ /dev/null
@@ -1,217 +0,0 @@
-/******************************************************
-Mini-transaction logging routines
-
-(c) 1995 Innobase Oy
-
-Created 12/7/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef mtr0log_h
-#define mtr0log_h
-
-#include "univ.i"
-#include "mtr0mtr.h"
-#include "dict0types.h"
-
-/************************************************************
-Writes 1 - 4 bytes to a file page buffered in the buffer pool.
-Writes the corresponding log record to the mini-transaction log. */
-
-void
-mlog_write_ulint(
-/*=============*/
- byte* ptr, /* in: pointer where to write */
- ulint val, /* in: value to write */
- byte type, /* in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************
-Writes 8 bytes to a file page buffered in the buffer pool.
-Writes the corresponding log record to the mini-transaction log. */
-
-void
-mlog_write_dulint(
-/*==============*/
- byte* ptr, /* in: pointer where to write */
- dulint val, /* in: value to write */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************
-Writes a string to a file page buffered in the buffer pool. Writes the
-corresponding log record to the mini-transaction log. */
-
-void
-mlog_write_string(
-/*==============*/
- byte* ptr, /* in: pointer where to write */
- const byte* str, /* in: string to write */
- ulint len, /* in: string length */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************
-Writes initial part of a log record consisting of one-byte item
-type and four-byte space and page numbers. */
-
-void
-mlog_write_initial_log_record(
-/*==========================*/
- byte* ptr, /* in: pointer to (inside) a buffer frame
- holding the file page where modification
- is made */
- byte type, /* in: log item type: MLOG_1BYTE, ... */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************
-Writes a log record about an .ibd file create/delete/rename. */
-UNIV_INLINE
-byte*
-mlog_write_initial_log_record_for_file_op(
-/*======================================*/
- /* out: new value of log_ptr */
- ulint type, /* in: MLOG_FILE_CREATE, MLOG_FILE_DELETE, or
- MLOG_FILE_RENAME */
- ulint space_id,/* in: space id, if applicable */
- ulint page_no,/* in: page number (not relevant currently) */
- byte* log_ptr,/* in: pointer to mtr log which has been opened */
- mtr_t* mtr); /* in: mtr */
-/************************************************************
-Catenates 1 - 4 bytes to the mtr log. */
-UNIV_INLINE
-void
-mlog_catenate_ulint(
-/*================*/
- mtr_t* mtr, /* in: mtr */
- ulint val, /* in: value to write */
- ulint type); /* in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
-/************************************************************
-Catenates n bytes to the mtr log. */
-
-void
-mlog_catenate_string(
-/*=================*/
- mtr_t* mtr, /* in: mtr */
- const byte* str, /* in: string to write */
- ulint len); /* in: string length */
-/************************************************************
-Catenates a compressed ulint to mlog. */
-UNIV_INLINE
-void
-mlog_catenate_ulint_compressed(
-/*===========================*/
- mtr_t* mtr, /* in: mtr */
- ulint val); /* in: value to write */
-/************************************************************
-Catenates a compressed dulint to mlog. */
-UNIV_INLINE
-void
-mlog_catenate_dulint_compressed(
-/*============================*/
- mtr_t* mtr, /* in: mtr */
- dulint val); /* in: value to write */
-/************************************************************
-Opens a buffer to mlog. It must be closed with mlog_close. */
-UNIV_INLINE
-byte*
-mlog_open(
-/*======*/
- /* out: buffer, NULL if log mode MTR_LOG_NONE */
- mtr_t* mtr, /* in: mtr */
- ulint size); /* in: buffer size in bytes; MUST be
- smaller than DYN_ARRAY_DATA_SIZE! */
-/************************************************************
-Closes a buffer opened to mlog. */
-UNIV_INLINE
-void
-mlog_close(
-/*=======*/
- mtr_t* mtr, /* in: mtr */
- byte* ptr); /* in: buffer space from ptr up was not used */
-/************************************************************
-Writes the initial part of a log record (3..11 bytes).
-If the implementation of this function is changed, all
-size parameters to mlog_open() should be adjusted accordingly! */
-UNIV_INLINE
-byte*
-mlog_write_initial_log_record_fast(
-/*===============================*/
- /* out: new value of log_ptr */
- byte* ptr, /* in: pointer to (inside) a buffer frame holding the
- file page where modification is made */
- byte type, /* in: log item type: MLOG_1BYTE, ... */
- byte* log_ptr,/* in: pointer to mtr log which has been opened */
- mtr_t* mtr); /* in: mtr */
-/************************************************************
-Parses an initial log record written by mlog_write_initial_log_record. */
-
-byte*
-mlog_parse_initial_log_record(
-/*==========================*/
- /* out: parsed record end, NULL if not a complete
- record */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- byte* type, /* out: log record type: MLOG_1BYTE, ... */
- ulint* space, /* out: space id */
- ulint* page_no);/* out: page number */
-/************************************************************
-Parses a log record written by mlog_write_ulint or mlog_write_dulint. */
-
-byte*
-mlog_parse_nbytes(
-/*==============*/
- /* out: parsed record end, NULL if not a complete
- record */
- ulint type, /* in: log record type: MLOG_1BYTE, ... */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- byte* page); /* in: page where to apply the log record, or NULL */
-/************************************************************
-Parses a log record written by mlog_write_string. */
-
-byte*
-mlog_parse_string(
-/*==============*/
- /* out: parsed record end, NULL if not a complete
- record */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- byte* page); /* in: page where to apply the log record, or NULL */
-
-
-/************************************************************
-Opens a buffer for mlog, writes the initial log record and,
-if needed, the field lengths of an index. Reserves space
-for further log entries. The log entry must be closed with
-mtr_close(). */
-
-byte*
-mlog_open_and_write_index(
-/*======================*/
- /* out: buffer, NULL if log mode
- MTR_LOG_NONE */
- mtr_t* mtr, /* in: mtr */
- byte* rec, /* in: index record or page */
- dict_index_t* index, /* in: record descriptor */
- byte type, /* in: log item type */
- ulint size); /* in: requested buffer size in bytes
- (if 0, calls mlog_close() and returns NULL) */
-
-/************************************************************
-Parses a log record written by mlog_open_and_write_index. */
-
-byte*
-mlog_parse_index(
-/*=============*/
- /* out: parsed record end,
- NULL if not a complete record */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- /* out: new value of log_ptr */
- ibool comp, /* in: TRUE=compact record format */
- dict_index_t** index); /* out, own: dummy index */
-
-/* Insert, update, and maybe other functions may use this value to define an
-extra mlog buffer size for variable size data */
-#define MLOG_BUF_MARGIN 256
-
-#ifndef UNIV_NONINL
-#include "mtr0log.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/mtr0log.ic b/storage/innobase/include/mtr0log.ic
deleted file mode 100644
index 5b1d1ed34d9..00000000000
--- a/storage/innobase/include/mtr0log.ic
+++ /dev/null
@@ -1,227 +0,0 @@
-/******************************************************
-Mini-transaction logging routines
-
-(c) 1995 Innobase Oy
-
-Created 12/7/1995 Heikki Tuuri
-*******************************************************/
-
-#include "mach0data.h"
-#include "ut0lst.h"
-#include "buf0buf.h"
-
-/************************************************************
-Opens a buffer to mlog. It must be closed with mlog_close. */
-UNIV_INLINE
-byte*
-mlog_open(
-/*======*/
- /* out: buffer, NULL if log mode MTR_LOG_NONE */
- mtr_t* mtr, /* in: mtr */
- ulint size) /* in: buffer size in bytes; MUST be
- smaller than DYN_ARRAY_DATA_SIZE! */
-{
- dyn_array_t* mlog;
-
- mtr->modifications = TRUE;
-
- if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) {
-
- return(NULL);
- }
-
- mlog = &(mtr->log);
-
- return(dyn_array_open(mlog, size));
-}
-
-/************************************************************
-Closes a buffer opened to mlog. */
-UNIV_INLINE
-void
-mlog_close(
-/*=======*/
- mtr_t* mtr, /* in: mtr */
- byte* ptr) /* in: buffer space from ptr up was not used */
-{
- dyn_array_t* mlog;
-
- ut_ad(mtr_get_log_mode(mtr) != MTR_LOG_NONE);
-
- mlog = &(mtr->log);
-
- dyn_array_close(mlog, ptr);
-}
-
-/************************************************************
-Catenates 1 - 4 bytes to the mtr log. The value is not compressed. */
-UNIV_INLINE
-void
-mlog_catenate_ulint(
-/*================*/
- mtr_t* mtr, /* in: mtr */
- ulint val, /* in: value to write */
- ulint type) /* in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
-{
- dyn_array_t* mlog;
- byte* ptr;
-
- if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) {
-
- return;
- }
-
- mlog = &(mtr->log);
-
-#if MLOG_1BYTE != 1
-# error "MLOG_1BYTE != 1"
-#endif
-#if MLOG_2BYTES != 2
-# error "MLOG_2BYTES != 2"
-#endif
-#if MLOG_4BYTES != 4
-# error "MLOG_4BYTES != 4"
-#endif
-#if MLOG_8BYTES != 8
-# error "MLOG_8BYTES != 8"
-#endif
- ptr = dyn_array_push(mlog, type);
-
- if (type == MLOG_4BYTES) {
- mach_write_to_4(ptr, val);
- } else if (type == MLOG_2BYTES) {
- mach_write_to_2(ptr, val);
- } else {
- ut_ad(type == MLOG_1BYTE);
- mach_write_to_1(ptr, val);
- }
-}
-
-/************************************************************
-Catenates a compressed ulint to mlog. */
-UNIV_INLINE
-void
-mlog_catenate_ulint_compressed(
-/*===========================*/
- mtr_t* mtr, /* in: mtr */
- ulint val) /* in: value to write */
-{
- byte* log_ptr;
-
- log_ptr = mlog_open(mtr, 10);
-
- /* If no logging is requested, we may return now */
- if (log_ptr == NULL) {
-
- return;
- }
-
- log_ptr += mach_write_compressed(log_ptr, val);
-
- mlog_close(mtr, log_ptr);
-}
-
-/************************************************************
-Catenates a compressed dulint to mlog. */
-UNIV_INLINE
-void
-mlog_catenate_dulint_compressed(
-/*============================*/
- mtr_t* mtr, /* in: mtr */
- dulint val) /* in: value to write */
-{
- byte* log_ptr;
-
- log_ptr = mlog_open(mtr, 15);
-
- /* If no logging is requested, we may return now */
- if (log_ptr == NULL) {
-
- return;
- }
-
- log_ptr += mach_dulint_write_compressed(log_ptr, val);
-
- mlog_close(mtr, log_ptr);
-}
-
-/************************************************************
-Writes the initial part of a log record (3..11 bytes).
-If the implementation of this function is changed, all
-size parameters to mlog_open() should be adjusted accordingly! */
-UNIV_INLINE
-byte*
-mlog_write_initial_log_record_fast(
-/*===============================*/
- /* out: new value of log_ptr */
- byte* ptr, /* in: pointer to (inside) a buffer frame holding the
- file page where modification is made */
- byte type, /* in: log item type: MLOG_1BYTE, ... */
- byte* log_ptr,/* in: pointer to mtr log which has been opened */
- mtr_t* mtr) /* in: mtr */
-{
- buf_block_t* block;
- ulint space;
- ulint offset;
-
- ut_ad(mtr_memo_contains(mtr, buf_block_align(ptr),
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(type <= MLOG_BIGGEST_TYPE);
- ut_ad(ptr && log_ptr);
-
- block = buf_block_align(ptr);
-
- space = buf_block_get_space(block);
- offset = buf_block_get_page_no(block);
-
- mach_write_to_1(log_ptr, type);
- log_ptr++;
- log_ptr += mach_write_compressed(log_ptr, space);
- log_ptr += mach_write_compressed(log_ptr, offset);
-
- mtr->n_log_recs++;
-
-#ifdef UNIV_LOG_DEBUG
- /* fprintf(stderr,
- "Adding to mtr log record type %lu space %lu page no %lu\n",
- type, space, offset); */
-#endif
-
-#ifdef UNIV_DEBUG
- /* We now assume that all x-latched pages have been modified! */
-
- if (!mtr_memo_contains(mtr, block, MTR_MEMO_MODIFY)) {
-
- mtr_memo_push(mtr, block, MTR_MEMO_MODIFY);
- }
-#endif
- return(log_ptr);
-}
-
-/************************************************************
-Writes a log record about an .ibd file create/delete/rename. */
-UNIV_INLINE
-byte*
-mlog_write_initial_log_record_for_file_op(
-/*======================================*/
- /* out: new value of log_ptr */
- ulint type, /* in: MLOG_FILE_CREATE, MLOG_FILE_DELETE, or
- MLOG_FILE_RENAME */
- ulint space_id,/* in: space id, if applicable */
- ulint page_no,/* in: page number (not relevant currently) */
- byte* log_ptr,/* in: pointer to mtr log which has been opened */
- mtr_t* mtr) /* in: mtr */
-{
- ut_ad(log_ptr);
-
- mach_write_to_1(log_ptr, type);
- log_ptr++;
-
- /* We write dummy space id and page number */
- log_ptr += mach_write_compressed(log_ptr, space_id);
- log_ptr += mach_write_compressed(log_ptr, page_no);
-
- mtr->n_log_recs++;
-
- return(log_ptr);
-}
diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h
deleted file mode 100644
index 2a160d27e0c..00000000000
--- a/storage/innobase/include/mtr0mtr.h
+++ /dev/null
@@ -1,347 +0,0 @@
-/******************************************************
-Mini-transaction buffer
-
-(c) 1995 Innobase Oy
-
-Created 11/26/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef mtr0mtr_h
-#define mtr0mtr_h
-
-#include "univ.i"
-#include "mem0mem.h"
-#include "dyn0dyn.h"
-#include "buf0types.h"
-#include "sync0rw.h"
-#include "ut0byte.h"
-#include "mtr0types.h"
-#include "page0types.h"
-
-/* Logging modes for a mini-transaction */
-#define MTR_LOG_ALL 21 /* default mode: log all operations
- modifying disk-based data */
-#define MTR_LOG_NONE 22 /* log no operations */
-/*#define MTR_LOG_SPACE 23 */ /* log only operations modifying
- file space page allocation data
- (operations in fsp0fsp.* ) */
-#define MTR_LOG_SHORT_INSERTS 24 /* inserts are logged in a shorter
- form */
-
-/* Types for the mlock objects to store in the mtr memo; NOTE that the
-first 3 values must be RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
-#define MTR_MEMO_PAGE_S_FIX RW_S_LATCH
-#define MTR_MEMO_PAGE_X_FIX RW_X_LATCH
-#define MTR_MEMO_BUF_FIX RW_NO_LATCH
-#define MTR_MEMO_MODIFY 54
-#define MTR_MEMO_S_LOCK 55
-#define MTR_MEMO_X_LOCK 56
-
-/* Log item types: we have made them to be of the type 'byte'
-for the compiler to warn if val and type parameters are switched
-in a call to mlog_write_ulint. NOTE! For 1 - 8 bytes, the
-flag value must give the length also! */
-#define MLOG_SINGLE_REC_FLAG 128 /* if the mtr contains only
- one log record for one page,
- i.e., write_initial_log_record
- has been called only once,
- this flag is ORed to the type
- of that first log record */
-#define MLOG_1BYTE (1) /* one byte is written */
-#define MLOG_2BYTES (2) /* 2 bytes ... */
-#define MLOG_4BYTES (4) /* 4 bytes ... */
-#define MLOG_8BYTES (8) /* 8 bytes ... */
-#define MLOG_REC_INSERT ((byte)9) /* record insert */
-#define MLOG_REC_CLUST_DELETE_MARK ((byte)10) /* mark clustered index record
- deleted */
-#define MLOG_REC_SEC_DELETE_MARK ((byte)11) /* mark secondary index record
- deleted */
-#define MLOG_REC_UPDATE_IN_PLACE ((byte)13) /* update of a record,
- preserves record field sizes */
-#define MLOG_REC_DELETE ((byte)14) /* delete a record from a
- page */
-#define MLOG_LIST_END_DELETE ((byte)15) /* delete record list end on
- index page */
-#define MLOG_LIST_START_DELETE ((byte)16) /* delete record list start on
- index page */
-#define MLOG_LIST_END_COPY_CREATED ((byte)17) /* copy record list end to a
- new created index page */
-#define MLOG_PAGE_REORGANIZE ((byte)18) /* reorganize an index page */
-#define MLOG_PAGE_CREATE ((byte)19) /* create an index page */
-#define MLOG_UNDO_INSERT ((byte)20) /* insert entry in an undo
- log */
-#define MLOG_UNDO_ERASE_END ((byte)21) /* erase an undo log
- page end */
-#define MLOG_UNDO_INIT ((byte)22) /* initialize a page in an
- undo log */
-#define MLOG_UNDO_HDR_DISCARD ((byte)23) /* discard an update undo log
- header */
-#define MLOG_UNDO_HDR_REUSE ((byte)24) /* reuse an insert undo log
- header */
-#define MLOG_UNDO_HDR_CREATE ((byte)25) /* create an undo log header */
-#define MLOG_REC_MIN_MARK ((byte)26) /* mark an index record as the
- predefined minimum record */
-#define MLOG_IBUF_BITMAP_INIT ((byte)27) /* initialize an ibuf bitmap
- page */
-/*#define MLOG_FULL_PAGE ((byte)28) full contents of a page */
-#define MLOG_INIT_FILE_PAGE ((byte)29) /* this means that a file page
- is taken into use and the prior
- contents of the page should be
- ignored: in recovery we must
- not trust the lsn values stored
- to the file page */
-#define MLOG_WRITE_STRING ((byte)30) /* write a string to a page */
-#define MLOG_MULTI_REC_END ((byte)31) /* if a single mtr writes
- log records for several pages,
- this log record ends the
- sequence of these records */
-#define MLOG_DUMMY_RECORD ((byte)32) /* dummy log record used to
- pad a log block full */
-#define MLOG_FILE_CREATE ((byte)33) /* log record about an .ibd
- file creation */
-#define MLOG_FILE_RENAME ((byte)34) /* log record about an .ibd
- file rename */
-#define MLOG_FILE_DELETE ((byte)35) /* log record about an .ibd
- file deletion */
-#define MLOG_COMP_REC_MIN_MARK ((byte)36) /* mark a compact index record
- as the predefined minimum
- record */
-#define MLOG_COMP_PAGE_CREATE ((byte)37) /* create a compact
- index page */
-#define MLOG_COMP_REC_INSERT ((byte)38) /* compact record insert */
-#define MLOG_COMP_REC_CLUST_DELETE_MARK ((byte)39)
- /* mark compact clustered index
- record deleted */
-#define MLOG_COMP_REC_SEC_DELETE_MARK ((byte)40)/* mark compact secondary index
- record deleted; this log
- record type is redundant, as
- MLOG_REC_SEC_DELETE_MARK is
- independent of the record
- format. */
-#define MLOG_COMP_REC_UPDATE_IN_PLACE ((byte)41)/* update of a compact record,
- preserves record field sizes */
-#define MLOG_COMP_REC_DELETE ((byte)42) /* delete a compact record
- from a page */
-#define MLOG_COMP_LIST_END_DELETE ((byte)43) /* delete compact record list
- end on index page */
-#define MLOG_COMP_LIST_START_DELETE ((byte)44) /* delete compact record list
- start on index page */
-#define MLOG_COMP_LIST_END_COPY_CREATED ((byte)45)
- /* copy compact record list end
- to a new created index page */
-#define MLOG_COMP_PAGE_REORGANIZE ((byte)46) /* reorganize an index page */
-
-#define MLOG_BIGGEST_TYPE ((byte)46) /* biggest value (used in
- asserts) */
-
-/*******************************************************************
-Starts a mini-transaction and creates a mini-transaction handle
-and buffer in the memory buffer given by the caller. */
-UNIV_INLINE
-mtr_t*
-mtr_start(
-/*======*/
- /* out: mtr buffer which also acts as
- the mtr handle */
- mtr_t* mtr); /* in: memory buffer for the mtr buffer */
-/*******************************************************************
-Starts a mini-transaction and creates a mini-transaction handle
-and buffer in the memory buffer given by the caller. */
-
-mtr_t*
-mtr_start_noninline(
-/*================*/
- /* out: mtr buffer which also acts as
- the mtr handle */
- mtr_t* mtr); /* in: memory buffer for the mtr buffer */
-/*******************************************************************
-Commits a mini-transaction. */
-
-void
-mtr_commit(
-/*=======*/
- mtr_t* mtr); /* in: mini-transaction */
-/**************************************************************
-Sets and returns a savepoint in mtr. */
-UNIV_INLINE
-ulint
-mtr_set_savepoint(
-/*==============*/
- /* out: savepoint */
- mtr_t* mtr); /* in: mtr */
-/**************************************************************
-Releases the latches stored in an mtr memo down to a savepoint.
-NOTE! The mtr must not have made changes to buffer pages after the
-savepoint, as these can be handled only by mtr_commit. */
-
-void
-mtr_rollback_to_savepoint(
-/*======================*/
- mtr_t* mtr, /* in: mtr */
- ulint savepoint); /* in: savepoint */
-/**************************************************************
-Releases the (index tree) s-latch stored in an mtr memo after a
-savepoint. */
-UNIV_INLINE
-void
-mtr_release_s_latch_at_savepoint(
-/*=============================*/
- mtr_t* mtr, /* in: mtr */
- ulint savepoint, /* in: savepoint */
- rw_lock_t* lock); /* in: latch to release */
-/*******************************************************************
-Gets the logging mode of a mini-transaction. */
-UNIV_INLINE
-ulint
-mtr_get_log_mode(
-/*=============*/
- /* out: logging mode: MTR_LOG_NONE, ... */
- mtr_t* mtr); /* in: mtr */
-/*******************************************************************
-Changes the logging mode of a mini-transaction. */
-UNIV_INLINE
-ulint
-mtr_set_log_mode(
-/*=============*/
- /* out: old mode */
- mtr_t* mtr, /* in: mtr */
- ulint mode); /* in: logging mode: MTR_LOG_NONE, ... */
-/************************************************************
-Reads 1 - 4 bytes from a file page buffered in the buffer pool. */
-
-ulint
-mtr_read_ulint(
-/*===========*/
- /* out: value read */
- byte* ptr, /* in: pointer from where to read */
- ulint type, /* in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
- mtr_t* mtr); /* in: mini-transaction handle */
-/************************************************************
-Reads 8 bytes from a file page buffered in the buffer pool. */
-
-dulint
-mtr_read_dulint(
-/*============*/
- /* out: value read */
- byte* ptr, /* in: pointer from where to read */
- mtr_t* mtr); /* in: mini-transaction handle */
-/*************************************************************************
-This macro locks an rw-lock in s-mode. */
-#define mtr_s_lock(B, MTR) mtr_s_lock_func((B), __FILE__, __LINE__,\
- (MTR))
-/*************************************************************************
-This macro locks an rw-lock in x-mode. */
-#define mtr_x_lock(B, MTR) mtr_x_lock_func((B), __FILE__, __LINE__,\
- (MTR))
-/*************************************************************************
-NOTE! Use the macro above!
-Locks a lock in s-mode. */
-UNIV_INLINE
-void
-mtr_s_lock_func(
-/*============*/
- rw_lock_t* lock, /* in: rw-lock */
- const char* file, /* in: file name */
- ulint line, /* in: line number */
- mtr_t* mtr); /* in: mtr */
-/*************************************************************************
-NOTE! Use the macro above!
-Locks a lock in x-mode. */
-UNIV_INLINE
-void
-mtr_x_lock_func(
-/*============*/
- rw_lock_t* lock, /* in: rw-lock */
- const char* file, /* in: file name */
- ulint line, /* in: line number */
- mtr_t* mtr); /* in: mtr */
-
-/*******************************************************
-Releases an object in the memo stack. */
-
-void
-mtr_memo_release(
-/*=============*/
- mtr_t* mtr, /* in: mtr */
- void* object, /* in: object */
- ulint type); /* in: object type: MTR_MEMO_S_LOCK, ... */
-#ifdef UNIV_DEBUG
-/**************************************************************
-Checks if memo contains the given item. */
-UNIV_INLINE
-ibool
-mtr_memo_contains(
-/*==============*/
- /* out: TRUE if contains */
- mtr_t* mtr, /* in: mtr */
- void* object, /* in: object to search */
- ulint type); /* in: type of object */
-/*************************************************************
-Prints info of an mtr handle. */
-
-void
-mtr_print(
-/*======*/
- mtr_t* mtr); /* in: mtr */
-#endif /* UNIV_DEBUG */
-/*######################################################################*/
-
-#define MTR_BUF_MEMO_SIZE 200 /* number of slots in memo */
-
-/*******************************************************************
-Returns the log object of a mini-transaction buffer. */
-UNIV_INLINE
-dyn_array_t*
-mtr_get_log(
-/*========*/
- /* out: log */
- mtr_t* mtr); /* in: mini-transaction */
-/*******************************************************
-Pushes an object to an mtr memo stack. */
-UNIV_INLINE
-void
-mtr_memo_push(
-/*==========*/
- mtr_t* mtr, /* in: mtr */
- void* object, /* in: object */
- ulint type); /* in: object type: MTR_MEMO_S_LOCK, ... */
-
-
-/* Type definition of a mini-transaction memo stack slot. */
-typedef struct mtr_memo_slot_struct mtr_memo_slot_t;
-struct mtr_memo_slot_struct{
- ulint type; /* type of the stored object (MTR_MEMO_S_LOCK, ...) */
- void* object; /* pointer to the object */
-};
-
-/* Mini-transaction handle and buffer */
-struct mtr_struct{
- ulint state; /* MTR_ACTIVE, MTR_COMMITTING, MTR_COMMITTED */
- dyn_array_t memo; /* memo stack for locks etc. */
- dyn_array_t log; /* mini-transaction log */
- ibool modifications;
- /* TRUE if the mtr made modifications to
- buffer pool pages */
- ulint n_log_recs;
- /* count of how many page initial log records
- have been written to the mtr log */
- ulint log_mode; /* specifies which operations should be
- logged; default value MTR_LOG_ALL */
- dulint start_lsn;/* start lsn of the possible log entry for
- this mtr */
- dulint end_lsn;/* end lsn of the possible log entry for
- this mtr */
- ulint magic_n;
-};
-
-#define MTR_MAGIC_N 54551
-
-#define MTR_ACTIVE 12231
-#define MTR_COMMITTING 56456
-#define MTR_COMMITTED 34676
-
-#ifndef UNIV_NONINL
-#include "mtr0mtr.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/mtr0mtr.ic b/storage/innobase/include/mtr0mtr.ic
deleted file mode 100644
index 81eec3bfc92..00000000000
--- a/storage/innobase/include/mtr0mtr.ic
+++ /dev/null
@@ -1,251 +0,0 @@
-/******************************************************
-Mini-transaction buffer
-
-(c) 1995 Innobase Oy
-
-Created 11/26/1995 Heikki Tuuri
-*******************************************************/
-
-#include "sync0sync.h"
-#include "sync0rw.h"
-#include "mach0data.h"
-
-/*******************************************************************
-Starts a mini-transaction and creates a mini-transaction handle
-and a buffer in the memory buffer given by the caller. */
-UNIV_INLINE
-mtr_t*
-mtr_start(
-/*======*/
- /* out: mtr buffer which also acts as
- the mtr handle */
- mtr_t* mtr) /* in: memory buffer for the mtr buffer */
-{
- dyn_array_create(&(mtr->memo));
- dyn_array_create(&(mtr->log));
-
- mtr->log_mode = MTR_LOG_ALL;
- mtr->modifications = FALSE;
- mtr->n_log_recs = 0;
-
-#ifdef UNIV_DEBUG
- mtr->state = MTR_ACTIVE;
- mtr->magic_n = MTR_MAGIC_N;
-#endif
- return(mtr);
-}
-
-/*******************************************************
-Pushes an object to an mtr memo stack. */
-UNIV_INLINE
-void
-mtr_memo_push(
-/*==========*/
- mtr_t* mtr, /* in: mtr */
- void* object, /* in: object */
- ulint type) /* in: object type: MTR_MEMO_S_LOCK, ... */
-{
- dyn_array_t* memo;
- mtr_memo_slot_t* slot;
-
- ut_ad(object);
- ut_ad(type >= MTR_MEMO_PAGE_S_FIX);
- ut_ad(type <= MTR_MEMO_X_LOCK);
- ut_ad(mtr);
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
-
- memo = &(mtr->memo);
-
- slot = dyn_array_push(memo, sizeof(mtr_memo_slot_t));
-
- slot->object = object;
- slot->type = type;
-}
-
-/**************************************************************
-Sets and returns a savepoint in mtr. */
-UNIV_INLINE
-ulint
-mtr_set_savepoint(
-/*==============*/
- /* out: savepoint */
- mtr_t* mtr) /* in: mtr */
-{
- dyn_array_t* memo;
-
- ut_ad(mtr);
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
-
- memo = &(mtr->memo);
-
- return(dyn_array_get_data_size(memo));
-}
-
-/**************************************************************
-Releases the (index tree) s-latch stored in an mtr memo after a
-savepoint. */
-UNIV_INLINE
-void
-mtr_release_s_latch_at_savepoint(
-/*=============================*/
- mtr_t* mtr, /* in: mtr */
- ulint savepoint, /* in: savepoint */
- rw_lock_t* lock) /* in: latch to release */
-{
- mtr_memo_slot_t* slot;
- dyn_array_t* memo;
-
- ut_ad(mtr);
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
- ut_ad(mtr->state == MTR_ACTIVE);
-
- memo = &(mtr->memo);
-
- ut_ad(dyn_array_get_data_size(memo) > savepoint);
-
- slot = dyn_array_get_element(memo, savepoint);
-
- ut_ad(slot->object == lock);
- ut_ad(slot->type == MTR_MEMO_S_LOCK);
-
- rw_lock_s_unlock(lock);
-
- slot->object = NULL;
-}
-
-#ifdef UNIV_DEBUG
-/**************************************************************
-Checks if memo contains the given item. */
-UNIV_INLINE
-ibool
-mtr_memo_contains(
-/*==============*/
- /* out: TRUE if contains */
- mtr_t* mtr, /* in: mtr */
- void* object, /* in: object to search */
- ulint type) /* in: type of object */
-{
- mtr_memo_slot_t* slot;
- dyn_array_t* memo;
- ulint offset;
-
- ut_ad(mtr);
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
-
- memo = &(mtr->memo);
-
- offset = dyn_array_get_data_size(memo);
-
- while (offset > 0) {
- offset -= sizeof(mtr_memo_slot_t);
-
- slot = dyn_array_get_element(memo, offset);
-
- if ((object == slot->object) && (type == slot->type)) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-#endif /* UNIV_DEBUG */
-
-/*******************************************************************
-Returns the log object of a mini-transaction buffer. */
-UNIV_INLINE
-dyn_array_t*
-mtr_get_log(
-/*========*/
- /* out: log */
- mtr_t* mtr) /* in: mini-transaction */
-{
- ut_ad(mtr);
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
-
- return(&(mtr->log));
-}
-
-/*******************************************************************
-Gets the logging mode of a mini-transaction. */
-UNIV_INLINE
-ulint
-mtr_get_log_mode(
-/*=============*/
- /* out: logging mode: MTR_LOG_NONE, ... */
- mtr_t* mtr) /* in: mtr */
-{
- ut_ad(mtr);
- ut_ad(mtr->log_mode >= MTR_LOG_ALL);
- ut_ad(mtr->log_mode <= MTR_LOG_SHORT_INSERTS);
-
- return(mtr->log_mode);
-}
-
-/*******************************************************************
-Changes the logging mode of a mini-transaction. */
-UNIV_INLINE
-ulint
-mtr_set_log_mode(
-/*=============*/
- /* out: old mode */
- mtr_t* mtr, /* in: mtr */
- ulint mode) /* in: logging mode: MTR_LOG_NONE, ... */
-{
- ulint old_mode;
-
- ut_ad(mtr);
- ut_ad(mode >= MTR_LOG_ALL);
- ut_ad(mode <= MTR_LOG_SHORT_INSERTS);
-
- old_mode = mtr->log_mode;
-
- if ((mode == MTR_LOG_SHORT_INSERTS) && (old_mode == MTR_LOG_NONE)) {
- /* Do nothing */
- } else {
- mtr->log_mode = mode;
- }
-
- ut_ad(old_mode >= MTR_LOG_ALL);
- ut_ad(old_mode <= MTR_LOG_SHORT_INSERTS);
-
- return(old_mode);
-}
-
-/*************************************************************************
-Locks a lock in s-mode. */
-UNIV_INLINE
-void
-mtr_s_lock_func(
-/*============*/
- rw_lock_t* lock, /* in: rw-lock */
- const char* file, /* in: file name */
- ulint line, /* in: line number */
- mtr_t* mtr) /* in: mtr */
-{
- ut_ad(mtr);
- ut_ad(lock);
-
- rw_lock_s_lock_func(lock, 0, file, line);
-
- mtr_memo_push(mtr, lock, MTR_MEMO_S_LOCK);
-}
-
-/*************************************************************************
-Locks a lock in x-mode. */
-UNIV_INLINE
-void
-mtr_x_lock_func(
-/*============*/
- rw_lock_t* lock, /* in: rw-lock */
- const char* file, /* in: file name */
- ulint line, /* in: line number */
- mtr_t* mtr) /* in: mtr */
-{
- ut_ad(mtr);
- ut_ad(lock);
-
- rw_lock_x_lock_func(lock, 0, file, line);
-
- mtr_memo_push(mtr, lock, MTR_MEMO_X_LOCK);
-}
diff --git a/storage/innobase/include/mtr0types.h b/storage/innobase/include/mtr0types.h
deleted file mode 100644
index e3b6ec9a84f..00000000000
--- a/storage/innobase/include/mtr0types.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/******************************************************
-Mini-transaction buffer global types
-
-(c) 1995 Innobase Oy
-
-Created 11/26/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef mtr0types_h
-#define mtr0types_h
-
-typedef struct mtr_struct mtr_t;
-
-#endif
diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
deleted file mode 100644
index 70c07ea6d1a..00000000000
--- a/storage/innobase/include/os0file.h
+++ /dev/null
@@ -1,731 +0,0 @@
-/******************************************************
-The interface to the operating system file io
-
-(c) 1995 Innobase Oy
-
-Created 10/21/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef os0file_h
-#define os0file_h
-
-#include "univ.i"
-
-#ifndef __WIN__
-#include <dirent.h>
-#include <sys/stat.h>
-#include <time.h>
-#endif
-
-typedef struct fil_node_struct fil_node_t;
-
-#ifdef UNIV_DO_FLUSH
-extern ibool os_do_not_call_flush_at_each_write;
-#endif /* UNIV_DO_FLUSH */
-extern ibool os_has_said_disk_full;
-extern ibool os_aio_print_debug;
-
-extern ulint os_file_n_pending_preads;
-extern ulint os_file_n_pending_pwrites;
-
-extern ulint os_n_pending_reads;
-extern ulint os_n_pending_writes;
-
-#ifdef __WIN__
-
-/* We define always WIN_ASYNC_IO, and check at run-time whether
- the OS actually supports it: Win 95 does not, NT does. */
-#define WIN_ASYNC_IO
-
-#define UNIV_NON_BUFFERED_IO
-
-#endif
-
-#ifdef __WIN__
-#define os_file_t HANDLE
-#else
-typedef int os_file_t;
-#endif
-
-extern ulint os_innodb_umask;
-
-/* If this flag is TRUE, then we will use the native aio of the
-OS (provided we compiled Innobase with it in), otherwise we will
-use simulated aio we build below with threads */
-
-extern ibool os_aio_use_native_aio;
-
-#define OS_FILE_SECTOR_SIZE 512
-
-/* The next value should be smaller or equal to the smallest sector size used
-on any disk. A log block is required to be a portion of disk which is written
-so that if the start and the end of a block get written to disk, then the
-whole block gets written. This should be true even in most cases of a crash:
-if this fails for a log block, then it is equivalent to a media failure in the
-log. */
-
-#define OS_FILE_LOG_BLOCK_SIZE 512
-
-/* Options for file_create */
-#define OS_FILE_OPEN 51
-#define OS_FILE_CREATE 52
-#define OS_FILE_OVERWRITE 53
-#define OS_FILE_OPEN_RAW 54
-#define OS_FILE_CREATE_PATH 55
-#define OS_FILE_OPEN_RETRY 56 /* for os_file_create() on
- the first ibdata file */
-
-#define OS_FILE_READ_ONLY 333
-#define OS_FILE_READ_WRITE 444
-#define OS_FILE_READ_ALLOW_DELETE 555 /* for ibbackup */
-
-/* Options for file_create */
-#define OS_FILE_AIO 61
-#define OS_FILE_NORMAL 62
-
-/* Types for file create */
-#define OS_DATA_FILE 100
-#define OS_LOG_FILE 101
-
-/* Error codes from os_file_get_last_error */
-#define OS_FILE_NOT_FOUND 71
-#define OS_FILE_DISK_FULL 72
-#define OS_FILE_ALREADY_EXISTS 73
-#define OS_FILE_PATH_ERROR 74
-#define OS_FILE_AIO_RESOURCES_RESERVED 75 /* wait for OS aio resources
- to become available again */
-#define OS_FILE_SHARING_VIOLATION 76
-#define OS_FILE_ERROR_NOT_SPECIFIED 77
-
-/* Types for aio operations */
-#define OS_FILE_READ 10
-#define OS_FILE_WRITE 11
-
-#define OS_FILE_LOG 256 /* This can be ORed to type */
-
-#define OS_AIO_N_PENDING_IOS_PER_THREAD 32 /* Win NT does not allow more
- than 64 */
-
-/* Modes for aio operations */
-#define OS_AIO_NORMAL 21 /* Normal asynchronous i/o not for ibuf
- pages or ibuf bitmap pages */
-#define OS_AIO_IBUF 22 /* Asynchronous i/o for ibuf pages or ibuf
- bitmap pages */
-#define OS_AIO_LOG 23 /* Asynchronous i/o for the log */
-#define OS_AIO_SYNC 24 /* Asynchronous i/o where the calling thread
- will itself wait for the i/o to complete,
- doing also the job of the i/o-handler thread;
- can be used for any pages, ibuf or non-ibuf.
- This is used to save CPU time, as we can do
- with fewer thread switches. Plain synchronous
- i/o is not as good, because it must serialize
- the file seek and read or write, causing a
- bottleneck for parallelism. */
-
-#define OS_AIO_SIMULATED_WAKE_LATER 512 /* This can be ORed to mode
- in the call of os_aio(...),
- if the caller wants to post several i/o
- requests in a batch, and only after that
- wake the i/o-handler thread; this has
- effect only in simulated aio */
-#define OS_WIN31 1
-#define OS_WIN95 2
-#define OS_WINNT 3
-#define OS_WIN2000 4
-
-extern ulint os_n_file_reads;
-extern ulint os_n_file_writes;
-extern ulint os_n_fsyncs;
-
-/* File types for directory entry data type */
-
-enum os_file_type_enum{
- OS_FILE_TYPE_UNKNOWN = 0,
- OS_FILE_TYPE_FILE, /* regular file */
- OS_FILE_TYPE_DIR, /* directory */
- OS_FILE_TYPE_LINK /* symbolic link */
-};
-typedef enum os_file_type_enum os_file_type_t;
-
-/* Maximum path string length in bytes when referring to tables with in the
-'./databasename/tablename.ibd' path format; we can allocate at least 2 buffers
-of this size from the thread stack; that is why this should not be made much
-bigger than 4000 bytes */
-#define OS_FILE_MAX_PATH 4000
-
-/* Struct used in fetching information of a file in a directory */
-struct os_file_stat_struct{
- char name[OS_FILE_MAX_PATH]; /* path to a file */
- os_file_type_t type; /* file type */
- ib_longlong size; /* file size */
- time_t ctime; /* creation time */
- time_t mtime; /* modification time */
- time_t atime; /* access time */
-};
-typedef struct os_file_stat_struct os_file_stat_t;
-
-#ifdef __WIN__
-typedef HANDLE os_file_dir_t; /* directory stream */
-#else
-typedef DIR* os_file_dir_t; /* directory stream */
-#endif
-
-/***************************************************************************
-Gets the operating system version. Currently works only on Windows. */
-
-ulint
-os_get_os_version(void);
-/*===================*/
- /* out: OS_WIN95, OS_WIN31, OS_WINNT, or OS_WIN2000 */
-/********************************************************************
-Creates the seek mutexes used in positioned reads and writes. */
-
-void
-os_io_init_simple(void);
-/*===================*/
-/***************************************************************************
-Creates a temporary file. This function is like tmpfile(3), but
-the temporary file is created in the MySQL temporary directory.
-On Netware, this function is like tmpfile(3), because the C run-time
-library of Netware does not expose the delete-on-close flag. */
-
-FILE*
-os_file_create_tmpfile(void);
-/*========================*/
- /* out: temporary file handle, or NULL on error */
-/***************************************************************************
-The os_file_opendir() function opens a directory stream corresponding to the
-directory named by the dirname argument. The directory stream is positioned
-at the first entry. In both Unix and Windows we automatically skip the '.'
-and '..' items at the start of the directory listing. */
-
-os_file_dir_t
-os_file_opendir(
-/*============*/
- /* out: directory stream, NULL if
- error */
- const char* dirname, /* in: directory name; it must not
- contain a trailing '\' or '/' */
- ibool error_is_fatal);/* in: TRUE if we should treat an
- error as a fatal error; if we try to
- open symlinks then we do not wish a
- fatal error if it happens not to be
- a directory */
-/***************************************************************************
-Closes a directory stream. */
-
-int
-os_file_closedir(
-/*=============*/
- /* out: 0 if success, -1 if failure */
- os_file_dir_t dir); /* in: directory stream */
-/***************************************************************************
-This function returns information of the next file in the directory. We jump
-over the '.' and '..' entries in the directory. */
-
-int
-os_file_readdir_next_file(
-/*======================*/
- /* out: 0 if ok, -1 if error, 1 if at the end
- of the directory */
- const char* dirname,/* in: directory name or path */
- os_file_dir_t dir, /* in: directory stream */
- os_file_stat_t* info); /* in/out: buffer where the info is returned */
-/*********************************************************************
-This function attempts to create a directory named pathname. The new directory
-gets default permissions. On Unix, the permissions are (0770 & ~umask). If the
-directory exists already, nothing is done and the call succeeds, unless the
-fail_if_exists arguments is true. */
-
-ibool
-os_file_create_directory(
-/*=====================*/
- /* out: TRUE if call succeeds,
- FALSE on error */
- const char* pathname, /* in: directory name as
- null-terminated string */
- ibool fail_if_exists);/* in: if TRUE, pre-existing directory
- is treated as an error. */
-/********************************************************************
-A simple function to open or create a file. */
-
-os_file_t
-os_file_create_simple(
-/*==================*/
- /* out, own: handle to the file, not defined
- if error, error number can be retrieved with
- os_file_get_last_error */
- const char* name, /* in: name of the file or path as a
- null-terminated string */
- ulint create_mode,/* in: OS_FILE_OPEN if an existing file is
- opened (if does not exist, error), or
- OS_FILE_CREATE if a new file is created
- (if exists, error), or
- OS_FILE_CREATE_PATH if new file
- (if exists, error) and subdirectories along
- its path are created (if needed)*/
- ulint access_type,/* in: OS_FILE_READ_ONLY or
- OS_FILE_READ_WRITE */
- ibool* success);/* out: TRUE if succeed, FALSE if error */
-/********************************************************************
-A simple function to open or create a file. */
-
-os_file_t
-os_file_create_simple_no_error_handling(
-/*====================================*/
- /* out, own: handle to the file, not defined
- if error, error number can be retrieved with
- os_file_get_last_error */
- const char* name, /* in: name of the file or path as a
- null-terminated string */
- ulint create_mode,/* in: OS_FILE_OPEN if an existing file
- is opened (if does not exist, error), or
- OS_FILE_CREATE if a new file is created
- (if exists, error) */
- ulint access_type,/* in: OS_FILE_READ_ONLY,
- OS_FILE_READ_WRITE, or
- OS_FILE_READ_ALLOW_DELETE; the last option is
- used by a backup program reading the file */
- ibool* success);/* out: TRUE if succeed, FALSE if error */
-/********************************************************************
-Opens an existing file or creates a new. */
-
-os_file_t
-os_file_create(
-/*===========*/
- /* out, own: handle to the file, not defined
- if error, error number can be retrieved with
- os_file_get_last_error */
- const char* name, /* in: name of the file or path as a
- null-terminated string */
- ulint create_mode,/* in: OS_FILE_OPEN if an existing file
- is opened (if does not exist, error), or
- OS_FILE_CREATE if a new file is created
- (if exists, error),
- OS_FILE_OVERWRITE if a new file is created
- or an old overwritten;
- OS_FILE_OPEN_RAW, if a raw device or disk
- partition should be opened */
- ulint purpose,/* in: OS_FILE_AIO, if asynchronous,
- non-buffered i/o is desired,
- OS_FILE_NORMAL, if any normal file;
- NOTE that it also depends on type, os_aio_..
- and srv_.. variables whether we really use
- async i/o or unbuffered i/o: look in the
- function source code for the exact rules */
- ulint type, /* in: OS_DATA_FILE or OS_LOG_FILE */
- ibool* success);/* out: TRUE if succeed, FALSE if error */
-/***************************************************************************
-Deletes a file. The file has to be closed before calling this. */
-
-ibool
-os_file_delete(
-/*===========*/
- /* out: TRUE if success */
- const char* name); /* in: file path as a null-terminated string */
-
-/***************************************************************************
-Deletes a file if it exists. The file has to be closed before calling this. */
-
-ibool
-os_file_delete_if_exists(
-/*=====================*/
- /* out: TRUE if success */
- const char* name); /* in: file path as a null-terminated string */
-/***************************************************************************
-Renames a file (can also move it to another directory). It is safest that the
-file is closed before calling this function. */
-
-ibool
-os_file_rename(
-/*===========*/
- /* out: TRUE if success */
- const char* oldpath, /* in: old file path as a
- null-terminated string */
- const char* newpath); /* in: new file path */
-/***************************************************************************
-Closes a file handle. In case of error, error number can be retrieved with
-os_file_get_last_error. */
-
-ibool
-os_file_close(
-/*==========*/
- /* out: TRUE if success */
- os_file_t file); /* in, own: handle to a file */
-/***************************************************************************
-Closes a file handle. */
-
-ibool
-os_file_close_no_error_handling(
-/*============================*/
- /* out: TRUE if success */
- os_file_t file); /* in, own: handle to a file */
-/***************************************************************************
-Gets a file size. */
-
-ibool
-os_file_get_size(
-/*=============*/
- /* out: TRUE if success */
- os_file_t file, /* in: handle to a file */
- ulint* size, /* out: least significant 32 bits of file
- size */
- ulint* size_high);/* out: most significant 32 bits of size */
-/***************************************************************************
-Gets file size as a 64-bit integer ib_longlong. */
-
-ib_longlong
-os_file_get_size_as_iblonglong(
-/*===========================*/
- /* out: size in bytes, -1 if error */
- os_file_t file); /* in: handle to a file */
-/***************************************************************************
-Write the specified number of zeros to a newly created file. */
-
-ibool
-os_file_set_size(
-/*=============*/
- /* out: TRUE if success */
- const char* name, /* in: name of the file or path as a
- null-terminated string */
- os_file_t file, /* in: handle to a file */
- ulint size, /* in: least significant 32 bits of file
- size */
- ulint size_high);/* in: most significant 32 bits of size */
-/***************************************************************************
-Truncates a file at its current position. */
-
-ibool
-os_file_set_eof(
-/*============*/
- /* out: TRUE if success */
- FILE* file); /* in: file to be truncated */
-/***************************************************************************
-Flushes the write buffers of a given file to the disk. */
-
-ibool
-os_file_flush(
-/*==========*/
- /* out: TRUE if success */
- os_file_t file); /* in, own: handle to a file */
-/***************************************************************************
-Retrieves the last error number if an error occurs in a file io function.
-The number should be retrieved before any other OS calls (because they may
-overwrite the error number). If the number is not known to this program,
-the OS error number + 100 is returned. */
-
-ulint
-os_file_get_last_error(
-/*===================*/
- /* out: error number, or OS error
- number + 100 */
- ibool report_all_errors); /* in: TRUE if we want an error message
- printed of all errors */
-/***********************************************************************
-Requests a synchronous read operation. */
-
-ibool
-os_file_read(
-/*=========*/
- /* out: TRUE if request was
- successful, FALSE if fail */
- os_file_t file, /* in: handle to a file */
- void* buf, /* in: buffer where to read */
- ulint offset, /* in: least significant 32 bits of file
- offset where to read */
- ulint offset_high,/* in: most significant 32 bits of
- offset */
- ulint n); /* in: number of bytes to read */
-/***********************************************************************
-Rewind file to its start, read at most size - 1 bytes from it to str, and
-NUL-terminate str. All errors are silently ignored. This function is
-mostly meant to be used with temporary files. */
-
-void
-os_file_read_string(
-/*================*/
- FILE* file, /* in: file to read from */
- char* str, /* in: buffer where to read */
- ulint size); /* in: size of buffer */
-/***********************************************************************
-Requests a synchronous positioned read operation. This function does not do
-any error handling. In case of error it returns FALSE. */
-
-ibool
-os_file_read_no_error_handling(
-/*===========================*/
- /* out: TRUE if request was
- successful, FALSE if fail */
- os_file_t file, /* in: handle to a file */
- void* buf, /* in: buffer where to read */
- ulint offset, /* in: least significant 32 bits of file
- offset where to read */
- ulint offset_high,/* in: most significant 32 bits of
- offset */
- ulint n); /* in: number of bytes to read */
-
-/***********************************************************************
-Requests a synchronous write operation. */
-
-ibool
-os_file_write(
-/*==========*/
- /* out: TRUE if request was
- successful, FALSE if fail */
- const char* name, /* in: name of the file or path as a
- null-terminated string */
- os_file_t file, /* in: handle to a file */
- const void* buf, /* in: buffer from which to write */
- ulint offset, /* in: least significant 32 bits of file
- offset where to write */
- ulint offset_high,/* in: most significant 32 bits of
- offset */
- ulint n); /* in: number of bytes to write */
-/***********************************************************************
-Check the existence and type of the given file. */
-
-ibool
-os_file_status(
-/*===========*/
- /* out: TRUE if call succeeded */
- const char* path, /* in: pathname of the file */
- ibool* exists, /* out: TRUE if file exists */
- os_file_type_t* type); /* out: type of the file (if it exists) */
-/********************************************************************
-The function os_file_dirname returns a directory component of a
-null-terminated pathname string. In the usual case, dirname returns
-the string up to, but not including, the final '/', and basename
-is the component following the final '/'. Trailing '/' charac­
-ters are not counted as part of the pathname.
-
-If path does not contain a slash, dirname returns the string ".".
-
-Concatenating the string returned by dirname, a "/", and the basename
-yields a complete pathname.
-
-The return value is a copy of the directory component of the pathname.
-The copy is allocated from heap. It is the caller responsibility
-to free it after it is no longer needed.
-
-The following list of examples (taken from SUSv2) shows the strings
-returned by dirname and basename for different paths:
-
- path dirname basename
- "/usr/lib" "/usr" "lib"
- "/usr/" "/" "usr"
- "usr" "." "usr"
- "/" "/" "/"
- "." "." "."
- ".." "." ".."
-*/
-
-char*
-os_file_dirname(
-/*============*/
- /* out, own: directory component of the
- pathname */
- const char* path); /* in: pathname */
-/********************************************************************
-Creates all missing subdirectories along the given path. */
-
-ibool
-os_file_create_subdirs_if_needed(
-/*=============================*/
- /* out: TRUE if call succeeded
- FALSE otherwise */
- const char* path); /* in: path name */
-/****************************************************************************
-Initializes the asynchronous io system. Creates n_read_threads segments for
-read, n_write_threads segments for writes, one segment for the ibuf i/o, and
-one segment for log IO. Returns the number of segments created. When async
-IO is not used, and 4 threads should be created to process requests put
-in the segments. */
-
-ulint
-os_aio_init(
-/*========*/
- ulint ios_per_array, /* in: maximum number of pending aio operations
- allowed per array */
- ulint n_read_threads, /* in: number of read threads */
- ulint n_write_threads, /* in: number of write threads */
- ulint n_slots_sync); /* in: number of slots in the sync aio array */
-/***********************************************************************
-Requests an asynchronous i/o operation. */
-
-ibool
-os_aio(
-/*===*/
- /* out: TRUE if request was queued
- successfully, FALSE if fail */
- ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE */
- ulint mode, /* in: OS_AIO_NORMAL, ..., possibly ORed
- to OS_AIO_SIMULATED_WAKE_LATER: the
- last flag advises this function not to wake
- i/o-handler threads, but the caller will
- do the waking explicitly later, in this
- way the caller can post several requests in
- a batch; NOTE that the batch must not be
- so big that it exhausts the slots in aio
- arrays! NOTE that a simulated batch
- may introduce hidden chances of deadlocks,
- because i/os are not actually handled until
- all have been posted: use with great
- caution! */
- const char* name, /* in: name of the file or path as a
- null-terminated string */
- os_file_t file, /* in: handle to a file */
- void* buf, /* in: buffer where to read or from which
- to write */
- ulint offset, /* in: least significant 32 bits of file
- offset where to read or write */
- ulint offset_high, /* in: most significant 32 bits of
- offset */
- ulint n, /* in: number of bytes to read or write */
- fil_node_t* message1,/* in: messages for the aio handler (these
- can be used to identify a completed aio
- operation); if mode is OS_AIO_SYNC, these
- are ignored */
- void* message2);
-/****************************************************************************
-Wakes up all async i/o threads so that they know to exit themselves in
-shutdown. */
-
-void
-os_aio_wake_all_threads_at_shutdown(void);
-/*=====================================*/
-/****************************************************************************
-Waits until there are no pending writes in os_aio_write_array. There can
-be other, synchronous, pending writes. */
-
-void
-os_aio_wait_until_no_pending_writes(void);
-/*=====================================*/
-/**************************************************************************
-Wakes up simulated aio i/o-handler threads if they have something to do. */
-
-void
-os_aio_simulated_wake_handler_threads(void);
-/*=======================================*/
-/**************************************************************************
-This function can be called if one wants to post a batch of reads and
-prefers an i/o-handler thread to handle them all at once later. You must
-call os_aio_simulated_wake_handler_threads later to ensure the threads
-are not left sleeping! */
-
-void
-os_aio_simulated_put_read_threads_to_sleep(void);
-/*============================================*/
-
-#ifdef WIN_ASYNC_IO
-/**************************************************************************
-This function is only used in Windows asynchronous i/o.
-Waits for an aio operation to complete. This function is used to wait the
-for completed requests. The aio array of pending requests is divided
-into segments. The thread specifies which segment or slot it wants to wait
-for. NOTE: this function will also take care of freeing the aio slot,
-therefore no other thread is allowed to do the freeing! */
-
-ibool
-os_aio_windows_handle(
-/*==================*/
- /* out: TRUE if the aio operation succeeded */
- ulint segment, /* in: the number of the segment in the aio
- arrays to wait for; segment 0 is the ibuf
- i/o thread, segment 1 the log i/o thread,
- then follow the non-ibuf read threads, and as
- the last are the non-ibuf write threads; if
- this is ULINT_UNDEFINED, then it means that
- sync aio is used, and this parameter is
- ignored */
- ulint pos, /* this parameter is used only in sync aio:
- wait for the aio slot at this position */
- fil_node_t**message1, /* out: the messages passed with the aio
- request; note that also in the case where
- the aio operation failed, these output
- parameters are valid and can be used to
- restart the operation, for example */
- void** message2,
- ulint* type); /* out: OS_FILE_WRITE or ..._READ */
-#endif
-
-/* Currently we do not use Posix async i/o */
-#ifdef POSIX_ASYNC_IO
-/**************************************************************************
-This function is only used in Posix asynchronous i/o. Waits for an aio
-operation to complete. */
-
-ibool
-os_aio_posix_handle(
-/*================*/
- /* out: TRUE if the aio operation succeeded */
- ulint array_no, /* in: array number 0 - 3 */
- fil_node_t**message1, /* out: the messages passed with the aio
- request; note that also in the case where
- the aio operation failed, these output
- parameters are valid and can be used to
- restart the operation, for example */
- void** message2);
-#endif
-/**************************************************************************
-Does simulated aio. This function should be called by an i/o-handler
-thread. */
-
-ibool
-os_aio_simulated_handle(
-/*====================*/
- /* out: TRUE if the aio operation succeeded */
- ulint segment, /* in: the number of the segment in the aio
- arrays to wait for; segment 0 is the ibuf
- i/o thread, segment 1 the log i/o thread,
- then follow the non-ibuf read threads, and as
- the last are the non-ibuf write threads */
- fil_node_t**message1, /* out: the messages passed with the aio
- request; note that also in the case where
- the aio operation failed, these output
- parameters are valid and can be used to
- restart the operation, for example */
- void** message2,
- ulint* type); /* out: OS_FILE_WRITE or ..._READ */
-/**************************************************************************
-Validates the consistency of the aio system. */
-
-ibool
-os_aio_validate(void);
-/*=================*/
- /* out: TRUE if ok */
-/**************************************************************************
-Prints info of the aio arrays. */
-
-void
-os_aio_print(
-/*=========*/
- FILE* file); /* in: file where to print */
-/**************************************************************************
-Refreshes the statistics used to print per-second averages. */
-
-void
-os_aio_refresh_stats(void);
-/*======================*/
-
-#ifdef UNIV_DEBUG
-/**************************************************************************
-Checks that all slots in the system have been freed, that is, there are
-no pending io operations. */
-
-ibool
-os_aio_all_slots_free(void);
-/*=======================*/
-#endif /* UNIV_DEBUG */
-
-/***********************************************************************
-This function returns information about the specified file */
-ibool
-os_file_get_status(
-/*===============*/
- /* out: TRUE if stat
- information found */
- const char* path, /* in: pathname of the file */
- os_file_stat_t* stat_info); /* information of a file in a
- directory */
-
-#endif
diff --git a/storage/innobase/include/os0proc.h b/storage/innobase/include/os0proc.h
deleted file mode 100644
index f54e08de7ee..00000000000
--- a/storage/innobase/include/os0proc.h
+++ /dev/null
@@ -1,148 +0,0 @@
-/******************************************************
-The interface to the operating system
-process control primitives
-
-(c) 1995 Innobase Oy
-
-Created 9/30/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef os0proc_h
-#define os0proc_h
-
-#include "univ.i"
-
-#ifdef UNIV_LINUX
-#include <sys/ipc.h>
-#include <sys/shm.h>
-#endif
-
-typedef void* os_process_t;
-typedef unsigned long int os_process_id_t;
-
-/* The cell type in os_awe_allocate_mem page info */
-#if defined(__WIN2000__) && defined(ULONG_PTR)
-typedef ULONG_PTR os_awe_t;
-#else
-typedef ulint os_awe_t;
-#endif
-
-/* Physical page size when Windows AWE is used. This is the normal
-page size of an Intel x86 processor. We cannot use AWE with 2 MB or 4 MB
-pages. */
-#define OS_AWE_X86_PAGE_SIZE 4096
-
-extern ibool os_use_large_pages;
-/* Large page size. This may be a boot-time option on some platforms */
-extern ulint os_large_page_size;
-
-/********************************************************************
-Windows AWE support. Tries to enable the "lock pages in memory" privilege for
-the current process so that the current process can allocate memory-locked
-virtual address space to act as the window where AWE maps physical memory. */
-
-ibool
-os_awe_enable_lock_pages_in_mem(void);
-/*=================================*/
- /* out: TRUE if success, FALSE if error;
- prints error info to stderr if no success */
-/********************************************************************
-Allocates physical RAM memory up to 64 GB in an Intel 32-bit x86
-processor. */
-
-ibool
-os_awe_allocate_physical_mem(
-/*=========================*/
- /* out: TRUE if success */
- os_awe_t** page_info, /* out, own: array of opaque data containing
- the info for allocated physical memory pages;
- each allocated 4 kB physical memory page has
- one slot of type os_awe_t in the array */
- ulint n_megabytes); /* in: number of megabytes to allocate */
-/********************************************************************
-Allocates a window in the virtual address space where we can map then
-pages of physical memory. */
-
-byte*
-os_awe_allocate_virtual_mem_window(
-/*===============================*/
- /* out, own: allocated memory, or NULL if did not
- succeed */
- ulint size); /* in: virtual memory allocation size in bytes, must
- be < 2 GB */
-/********************************************************************
-With this function you can map parts of physical memory allocated with
-the ..._allocate_physical_mem to the virtual address space allocated with
-the previous function. Intel implements this so that the process page
-tables are updated accordingly. A test on a 1.5 GHz AMD processor and XP
-showed that this takes < 1 microsecond, much better than the estimated 80 us
-for copying a 16 kB page memory to memory. But, the operation will at least
-partially invalidate the translation lookaside buffer (TLB) of all
-processors. Under a real-world load the performance hit may be bigger. */
-
-ibool
-os_awe_map_physical_mem_to_window(
-/*==============================*/
- /* out: TRUE if success; the function
- calls exit(1) in case of an error */
- byte* ptr, /* in: a page-aligned pointer to
- somewhere in the virtual address
- space window; we map the physical mem
- pages here */
- ulint n_mem_pages, /* in: number of 4 kB mem pages to
- map */
- os_awe_t* page_info); /* in: array of page infos for those
- pages; each page has one slot in the
- array */
-/********************************************************************
-Converts the current process id to a number. It is not guaranteed that the
-number is unique. In Linux returns the 'process number' of the current
-thread. That number is the same as one sees in 'top', for example. In Linux
-the thread id is not the same as one sees in 'top'. */
-
-ulint
-os_proc_get_number(void);
-/*====================*/
-/********************************************************************
-Allocates non-cacheable memory. */
-
-void*
-os_mem_alloc_nocache(
-/*=================*/
- /* out: allocated memory */
- ulint n); /* in: number of bytes */
-/********************************************************************
-Allocates large pages memory. */
-
-void*
-os_mem_alloc_large(
-/*===============*/
- /* out: allocated memory */
- ulint n, /* in: number of bytes */
- ibool set_to_zero, /* in: TRUE if allocated memory
- should be set to zero if
- UNIV_SET_MEM_TO_ZERO is defined */
- ibool assert_on_error);/* in: if TRUE, we crash mysqld if
- the memory cannot be allocated */
-/********************************************************************
-Frees large pages memory. */
-
-void
-os_mem_free_large(
-/*==============*/
-void *ptr); /* in: number of bytes */
-/********************************************************************
-Sets the priority boost for threads released from waiting within the current
-process. */
-
-void
-os_process_set_priority_boost(
-/*==========================*/
- ibool do_boost); /* in: TRUE if priority boost should be done,
- FALSE if not */
-
-#ifndef UNIV_NONINL
-#include "os0proc.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/os0proc.ic b/storage/innobase/include/os0proc.ic
deleted file mode 100644
index 651ba1f17e3..00000000000
--- a/storage/innobase/include/os0proc.ic
+++ /dev/null
@@ -1,10 +0,0 @@
-/******************************************************
-The interface to the operating system
-process control primitives
-
-(c) 1995 Innobase Oy
-
-Created 9/30/1995 Heikki Tuuri
-*******************************************************/
-
-
diff --git a/storage/innobase/include/os0sync.h b/storage/innobase/include/os0sync.h
deleted file mode 100644
index 26d2786e33b..00000000000
--- a/storage/innobase/include/os0sync.h
+++ /dev/null
@@ -1,311 +0,0 @@
-/******************************************************
-The interface to the operating system
-synchronization primitives.
-
-(c) 1995 Innobase Oy
-
-Created 9/6/1995 Heikki Tuuri
-*******************************************************/
-#ifndef os0sync_h
-#define os0sync_h
-
-#include "univ.i"
-#include "ut0lst.h"
-
-#ifdef HAVE_SOLARIS_ATOMIC
-#include <atomic.h>
-#endif
-
-#ifdef __WIN__
-
-#define os_fast_mutex_t CRITICAL_SECTION
-
-typedef HANDLE os_native_event_t;
-
-typedef struct os_event_struct os_event_struct_t;
-typedef os_event_struct_t* os_event_t;
-
-struct os_event_struct {
- os_native_event_t handle;
- /* Windows event */
- UT_LIST_NODE_T(os_event_struct_t) os_event_list;
- /* list of all created events */
-};
-#else
-typedef pthread_mutex_t os_fast_mutex_t;
-
-typedef struct os_event_struct os_event_struct_t;
-typedef os_event_struct_t* os_event_t;
-
-struct os_event_struct {
- os_fast_mutex_t os_mutex; /* this mutex protects the next
- fields */
- ibool is_set; /* this is TRUE when the event is
- in the signaled state, i.e., a thread
- does not stop if it tries to wait for
- this event */
- ib_longlong signal_count; /* this is incremented each time
- the event becomes signaled */
- pthread_cond_t cond_var; /* condition variable is used in
- waiting for the event */
- UT_LIST_NODE_T(os_event_struct_t) os_event_list;
- /* list of all created events */
-};
-#endif
-
-typedef struct os_mutex_struct os_mutex_str_t;
-typedef os_mutex_str_t* os_mutex_t;
-
-#define OS_SYNC_INFINITE_TIME ((ulint)(-1))
-
-#define OS_SYNC_TIME_EXCEEDED 1
-
-/* Mutex protecting counts and the event and OS 'slow' mutex lists */
-extern os_mutex_t os_sync_mutex;
-
-/* This is incremented by 1 in os_thread_create and decremented by 1 in
-os_thread_exit */
-extern ulint os_thread_count;
-
-extern ulint os_event_count;
-extern ulint os_mutex_count;
-extern ulint os_fast_mutex_count;
-
-/*************************************************************
-Initializes global event and OS 'slow' mutex lists. */
-
-void
-os_sync_init(void);
-/*==============*/
-/*************************************************************
-Frees created events and OS 'slow' mutexes. */
-
-void
-os_sync_free(void);
-/*==============*/
-/*************************************************************
-Creates an event semaphore, i.e., a semaphore which may just have two states:
-signaled and nonsignaled. The created event is manual reset: it must be reset
-explicitly by calling sync_os_reset_event. */
-
-os_event_t
-os_event_create(
-/*============*/
- /* out: the event handle */
- const char* name); /* in: the name of the event, if NULL
- the event is created without a name */
-#ifdef __WIN__
-/*************************************************************
-Creates an auto-reset event semaphore, i.e., an event which is automatically
-reset when a single thread is released. Works only in Windows. */
-
-os_event_t
-os_event_create_auto(
-/*=================*/
- /* out: the event handle */
- const char* name); /* in: the name of the event, if NULL
- the event is created without a name */
-#endif
-/**************************************************************
-Sets an event semaphore to the signaled state: lets waiting threads
-proceed. */
-
-void
-os_event_set(
-/*=========*/
- os_event_t event); /* in: event to set */
-/**************************************************************
-Resets an event semaphore to the nonsignaled state. Waiting threads will
-stop to wait for the event.
-The return value should be passed to os_even_wait_low() if it is desired
-that this thread should not wait in case of an intervening call to
-os_event_set() between this os_event_reset() and the
-os_event_wait_low() call. See comments for os_event_wait_low(). */
-
-ib_longlong
-os_event_reset(
-/*===========*/
- os_event_t event); /* in: event to reset */
-/**************************************************************
-Frees an event object. */
-
-void
-os_event_free(
-/*==========*/
- os_event_t event); /* in: event to free */
-
-/**************************************************************
-Waits for an event object until it is in the signaled state. If
-srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS this also exits the
-waiting thread when the event becomes signaled (or immediately if the
-event is already in the signaled state).
-
-Typically, if the event has been signalled after the os_event_reset()
-we'll return immediately because event->is_set == TRUE.
-There are, however, situations (e.g.: sync_array code) where we may
-lose this information. For example:
-
-thread A calls os_event_reset()
-thread B calls os_event_set() [event->is_set == TRUE]
-thread C calls os_event_reset() [event->is_set == FALSE]
-thread A calls os_event_wait() [infinite wait!]
-thread C calls os_event_wait() [infinite wait!]
-
-Where such a scenario is possible, to avoid infinite wait, the
-value returned by os_event_reset() should be passed in as
-reset_sig_count. */
-
-#define os_event_wait(event) os_event_wait_low((event), 0)
-
-void
-os_event_wait_low(
-/*==============*/
- os_event_t event, /* in: event to wait */
- ib_longlong reset_sig_count);/* in: zero or the value
- returned by previous call of
- os_event_reset(). */
-
-/**************************************************************
-Waits for an event object until it is in the signaled state or
-a timeout is exceeded. In Unix the timeout is always infinite. */
-
-ulint
-os_event_wait_time(
-/*===============*/
- /* out: 0 if success,
- OS_SYNC_TIME_EXCEEDED if timeout
- was exceeded */
- os_event_t event, /* in: event to wait */
- ulint time); /* in: timeout in microseconds, or
- OS_SYNC_INFINITE_TIME */
-#ifdef __WIN__
-/**************************************************************
-Waits for any event in an OS native event array. Returns if even a single
-one is signaled or becomes signaled. */
-
-ulint
-os_event_wait_multiple(
-/*===================*/
- /* out: index of the event
- which was signaled */
- ulint n, /* in: number of events in the
- array */
- os_native_event_t* native_event_array);
- /* in: pointer to an array of event
- handles */
-#endif
-/*************************************************************
-Creates an operating system mutex semaphore. Because these are slow, the
-mutex semaphore of InnoDB itself (mutex_t) should be used where possible. */
-
-os_mutex_t
-os_mutex_create(
-/*============*/
- /* out: the mutex handle */
- const char* name); /* in: the name of the mutex, if NULL
- the mutex is created without a name */
-/**************************************************************
-Acquires ownership of a mutex semaphore. */
-
-void
-os_mutex_enter(
-/*===========*/
- os_mutex_t mutex); /* in: mutex to acquire */
-/**************************************************************
-Releases ownership of a mutex. */
-
-void
-os_mutex_exit(
-/*==========*/
- os_mutex_t mutex); /* in: mutex to release */
-/**************************************************************
-Frees an mutex object. */
-
-void
-os_mutex_free(
-/*==========*/
- os_mutex_t mutex); /* in: mutex to free */
-/**************************************************************
-Acquires ownership of a fast mutex. Currently in Windows this is the same
-as os_fast_mutex_lock! */
-UNIV_INLINE
-ulint
-os_fast_mutex_trylock(
-/*==================*/
- /* out: 0 if success, != 0 if
- was reserved by another
- thread */
- os_fast_mutex_t* fast_mutex); /* in: mutex to acquire */
-/**************************************************************
-Releases ownership of a fast mutex. */
-
-void
-os_fast_mutex_unlock(
-/*=================*/
- os_fast_mutex_t* fast_mutex); /* in: mutex to release */
-/*************************************************************
-Initializes an operating system fast mutex semaphore. */
-
-void
-os_fast_mutex_init(
-/*===============*/
- os_fast_mutex_t* fast_mutex); /* in: fast mutex */
-/**************************************************************
-Acquires ownership of a fast mutex. */
-
-void
-os_fast_mutex_lock(
-/*===============*/
- os_fast_mutex_t* fast_mutex); /* in: mutex to acquire */
-/**************************************************************
-Frees an mutex object. */
-
-void
-os_fast_mutex_free(
-/*===============*/
- os_fast_mutex_t* fast_mutex); /* in: mutex to free */
-
-#ifdef UNIV_SYNC_ATOMIC
-/**************************************************************
-Atomic compare-and-swap for InnoDB. Currently requires GCC atomic builtins. */
-UNIV_INLINE
-ibool
-os_compare_and_swap(
-/*================*/
- /* out: true if swapped */
- volatile lint* ptr, /* in: pointer to target */
- lint oldVal, /* in: value to compare to */
- lint newVal); /* in: value to swap in */
-
-/**************************************************************
-Atomic increment for InnoDB. Currently requires GCC atomic builtins. */
-UNIV_INLINE
-lint
-os_atomic_increment(
-/*================*/
- /* out: resulting value */
- volatile lint* ptr, /* in: pointer to target */
- lint amount); /* in: amount of increment */
-
-/**************************************************************
-Memory barrier operations for InnoDB.
-Currently requires GCC atomic builtins. */
-UNIV_INLINE
-void
-os_memory_barrier_load();
-
-UNIV_INLINE
-void
-os_memory_barrier_store();
-
-UNIV_INLINE
-void
-os_memory_barrier();
-
-#endif /* UNIV_SYNC_ATOMIC */
-
-#ifndef UNIV_NONINL
-#include "os0sync.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/os0sync.ic b/storage/innobase/include/os0sync.ic
deleted file mode 100644
index d1307134172..00000000000
--- a/storage/innobase/include/os0sync.ic
+++ /dev/null
@@ -1,152 +0,0 @@
-/******************************************************
-The interface to the operating system synchronization primitives.
-
-(c) 1995 Innobase Oy
-
-Created 9/6/1995 Heikki Tuuri
-*******************************************************/
-
-#ifdef __WIN__
-#include <winbase.h>
-#endif
-
-/**************************************************************
-Acquires ownership of a fast mutex. Currently in Windows this is the same
-as os_fast_mutex_lock! */
-UNIV_INLINE
-ulint
-os_fast_mutex_trylock(
-/*==================*/
- /* out: 0 if success, != 0 if
- was reserved by another
- thread */
- os_fast_mutex_t* fast_mutex) /* in: mutex to acquire */
-{
-#ifdef __WIN__
- EnterCriticalSection(fast_mutex);
-
- return(0);
-#else
-#if defined(UNIV_HOTBACKUP) && defined(UNIV_HPUX10)
- /* Since the hot backup version is standalone, MySQL does not redefine
- pthread_mutex_trylock for HP-UX-10.20, and consequently we must invert
- the return value here */
-
- return((ulint) (1 - pthread_mutex_trylock(fast_mutex)));
-#else
- /* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock
- so that it returns 0 on success. In the operating system
- libraries, HP-UX-10.20 follows the old Posix 1003.4a Draft 4 and
- returns 1 on success (but MySQL remaps that to 0), while Linux,
- FreeBSD, Solaris, AIX, Tru64 Unix, HP-UX-11.0 return 0 on success. */
-
- return((ulint) pthread_mutex_trylock(fast_mutex));
-#endif
-#endif
-}
-
-#ifdef UNIV_SYNC_ATOMIC
-/**************************************************************
-Atomic compare-and-swap for InnoDB. Currently requires GCC atomic builtins
-or Solaris atomic_* functions. */
-UNIV_INLINE
-ibool
-os_compare_and_swap(
-/*================*/
- /* out: true if swapped */
- volatile lint* ptr, /* in: pointer to target */
- lint oldVal, /* in: value to compare to */
- lint newVal) /* in: value to swap in */
-{
-#ifdef HAVE_GCC_ATOMIC_BUILTINS
- return (__sync_bool_compare_and_swap(ptr, oldVal, newVal));
-#elif HAVE_SOLARIS_ATOMIC
- lint retVal = (lint)atomic_cas_ulong((volatile ulong_t *)ptr,
- oldVal, newVal);
- return (retVal == oldVal);
-#elif WIN_ATOMICS32
- lint retVal = (lint)InterlockedCompareExchange(ptr, newVal, oldVal);
- return (retVal == oldVal);
-#elif WIN_ATOMICS64
- lint retVal = (lint)InterlockedCompareExchange64(ptr, newVal, oldVal);
- return (retVal == oldVal);
-#else
-#error "Need support for atomic ops"
-#endif
-}
-
-/**************************************************************
-Memory barrier for load */
-UNIV_INLINE
-void
-os_memory_barrier_load()
-{
-#ifdef HAVE_GCC_ATOMIC_BUILTINS
- __sync_synchronize();
-#elif HAVE_SOLARIS_ATOMIC
- membar_consumer();
-#elif WIN_ATOMICS32
- MemoryBarrier();
-#elif WIN_ATOMICS64
- MemoryBarrier();
-#endif
-}
-
-/**************************************************************
-Memory barrier for store */
-UNIV_INLINE
-void
-os_memory_barrier_store()
-{
-#ifdef HAVE_GCC_ATOMIC_BUILTINS
- __sync_synchronize();
-#elif HAVE_SOLARIS_ATOMIC
- membar_producer();
-#elif WIN_ATOMICS32
- MemoryBarrier();
-#elif WIN_ATOMICS64
- MemoryBarrier();
-#endif
-}
-
-/**************************************************************
-Memory barrier */
-UNIV_INLINE
-void
-os_memory_barrier()
-{
-#ifdef HAVE_GCC_ATOMIC_BUILTINS
- __sync_synchronize();
-#elif HAVE_SOLARIS_ATOMIC
- membar_enter();
-#elif WIN_ATOMICS32
- MemoryBarrier();
-#elif WIN_ATOMICS64
- MemoryBarrier();
-#endif
-}
-
-
-/**************************************************************
-Atomic increment for InnoDB. Currently requires GCC atomic builtins. */
-UNIV_INLINE
-lint
-os_atomic_increment(
-/*================*/
- /* out: resulting value */
- volatile lint* ptr, /* in: pointer to target */
- lint amount) /* in: amount of increment */
-{
-#ifdef HAVE_GCC_ATOMIC_BUILTINS
- return (__sync_add_and_fetch(ptr, amount));
-#elif HAVE_SOLARIS_ATOMIC
- return ((lint)atomic_add_long_nv((volatile ulong_t *)ptr, amount));
-#elif WIN_ATOMICS32
- return ((lint)InterlockedExchangeAdd(ptr, amount) + amount);
-#elif WIN_ATOMICS64
- return ((lint)InterlockedExchangeAdd64(ptr, amount) + amount);
-#else
-#error "Need support for atomic ops"
-#endif
-}
-#endif /* UNIV_SYNC_ATOMIC */
diff --git a/storage/innobase/include/os0thread.h b/storage/innobase/include/os0thread.h
deleted file mode 100644
index 3cf05feb3a9..00000000000
--- a/storage/innobase/include/os0thread.h
+++ /dev/null
@@ -1,145 +0,0 @@
-/******************************************************
-The interface to the operating system
-process and thread control primitives
-
-(c) 1995 Innobase Oy
-
-Created 9/8/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef os0thread_h
-#define os0thread_h
-
-#include "univ.i"
-
-/* Maximum number of threads which can be created in the program;
-this is also the size of the wait slot array for MySQL threads which
-can wait inside InnoDB */
-
-#define OS_THREAD_MAX_N srv_max_n_threads
-
-
-/* Possible fixed priorities for threads */
-#define OS_THREAD_PRIORITY_NONE 100
-#define OS_THREAD_PRIORITY_BACKGROUND 1
-#define OS_THREAD_PRIORITY_NORMAL 2
-#define OS_THREAD_PRIORITY_ABOVE_NORMAL 3
-
-#ifdef __WIN__
-typedef void* os_thread_t;
-typedef ulint os_thread_id_t; /* In Windows the thread id
- is an unsigned long int */
-#else
-typedef pthread_t os_thread_t;
-typedef os_thread_t os_thread_id_t; /* In Unix we use the thread
- handle itself as the id of
- the thread */
-#endif
-
-/* Define a function pointer type to use in a typecast */
-typedef void* (*os_posix_f_t) (void*);
-
-/*******************************************************************
-Compares two thread ids for equality. */
-
-ibool
-os_thread_eq(
-/*=========*/
- /* out: TRUE if equal */
- os_thread_id_t a, /* in: OS thread or thread id */
- os_thread_id_t b); /* in: OS thread or thread id */
-/********************************************************************
-Converts an OS thread id to a ulint. It is NOT guaranteed that the ulint is
-unique for the thread though! */
-
-ulint
-os_thread_pf(
-/*=========*/
- /* out: unsigned long int */
- os_thread_id_t a); /* in: thread or thread id */
-/********************************************************************
-Creates a new thread of execution. The execution starts from
-the function given. The start function takes a void* parameter
-and returns a ulint.
-NOTE: We count the number of threads in os_thread_exit(). A created
-thread should always use that to exit and not use return() to exit. */
-
-os_thread_t
-os_thread_create(
-/*=============*/
- /* out: handle to the thread */
-#ifndef __WIN__
- os_posix_f_t start_f,
-#else
- ulint (*start_f)(void*), /* in: pointer to function
- from which to start */
-#endif
- void* arg, /* in: argument to start
- function */
- os_thread_id_t* thread_id); /* out: id of the created
- thread, or NULL */
-int
-os_thread_join(
-/*===========*/
- os_thread_id_t thread_id); /* in: id of the thread to join */
-/*********************************************************************
-Exits the current thread. */
-
-void
-os_thread_exit(
-/*===========*/
- void* exit_value); /* in: exit value; in Windows this void*
- is cast as a DWORD */
-/*********************************************************************
-Returns the thread identifier of current thread. */
-
-os_thread_id_t
-os_thread_get_curr_id(void);
-/*========================*/
-/*********************************************************************
-Returns handle to the current thread. */
-
-os_thread_t
-os_thread_get_curr(void);
-/*====================*/
-/*********************************************************************
-Advises the os to give up remainder of the thread's time slice. */
-
-void
-os_thread_yield(void);
-/*=================*/
-/*********************************************************************
-The thread sleeps at least the time given in microseconds. */
-
-void
-os_thread_sleep(
-/*============*/
- ulint tm); /* in: time in microseconds */
-/**********************************************************************
-Gets a thread priority. */
-
-ulint
-os_thread_get_priority(
-/*===================*/
- /* out: priority */
- os_thread_t handle);/* in: OS handle to the thread */
-/**********************************************************************
-Sets a thread priority. */
-
-void
-os_thread_set_priority(
-/*===================*/
- os_thread_t handle, /* in: OS handle to the thread */
- ulint pri); /* in: priority: one of OS_PRIORITY_... */
-/**********************************************************************
-Gets the last operating system error code for the calling thread. */
-
-ulint
-os_thread_get_last_error(void);
-/*==========================*/
-
-#ifndef UNIV_NONINL
-#include "os0thread.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/os0thread.ic b/storage/innobase/include/os0thread.ic
deleted file mode 100644
index a75aa3abb34..00000000000
--- a/storage/innobase/include/os0thread.ic
+++ /dev/null
@@ -1,8 +0,0 @@
-/******************************************************
-The interface to the operating system
-process and thread control primitives
-
-(c) 1995 Innobase Oy
-
-Created 9/8/1995 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innobase/include/page0cur.h b/storage/innobase/include/page0cur.h
deleted file mode 100644
index 04f731414a3..00000000000
--- a/storage/innobase/include/page0cur.h
+++ /dev/null
@@ -1,286 +0,0 @@
-/************************************************************************
-The page cursor
-
-(c) 1994-1996 Innobase Oy
-
-Created 10/4/1994 Heikki Tuuri
-*************************************************************************/
-
-#ifndef page0cur_h
-#define page0cur_h
-
-#include "univ.i"
-
-#include "page0types.h"
-#include "page0page.h"
-#include "rem0rec.h"
-#include "data0data.h"
-#include "mtr0mtr.h"
-
-
-#define PAGE_CUR_ADAPT
-
-/* Page cursor search modes; the values must be in this order! */
-
-#define PAGE_CUR_UNSUPP 0
-#define PAGE_CUR_G 1
-#define PAGE_CUR_GE 2
-#define PAGE_CUR_L 3
-#define PAGE_CUR_LE 4
-/*#define PAGE_CUR_LE_OR_EXTENDS 5*/ /* This is a search mode used in
- "column LIKE 'abc%' ORDER BY column DESC";
- we have to find strings which are <= 'abc' or
- which extend it */
-#ifdef UNIV_SEARCH_DEBUG
-# define PAGE_CUR_DBG 6 /* As PAGE_CUR_LE, but skips search shortcut */
-#endif /* UNIV_SEARCH_DEBUG */
-
-#ifdef PAGE_CUR_ADAPT
-# ifdef UNIV_SEARCH_PERF_STAT
-extern ulint page_cur_short_succ;
-# endif /* UNIV_SEARCH_PERF_STAT */
-#endif /* PAGE_CUR_ADAPT */
-
-/*************************************************************
-Gets pointer to the page frame where the cursor is positioned. */
-UNIV_INLINE
-page_t*
-page_cur_get_page(
-/*==============*/
- /* out: page */
- page_cur_t* cur); /* in: page cursor */
-/*************************************************************
-Gets the record where the cursor is positioned. */
-UNIV_INLINE
-rec_t*
-page_cur_get_rec(
-/*=============*/
- /* out: record */
- page_cur_t* cur); /* in: page cursor */
-/*************************************************************
-Sets the cursor object to point before the first user record
-on the page. */
-UNIV_INLINE
-void
-page_cur_set_before_first(
-/*======================*/
- page_t* page, /* in: index page */
- page_cur_t* cur); /* in: cursor */
-/*************************************************************
-Sets the cursor object to point after the last user record on
-the page. */
-UNIV_INLINE
-void
-page_cur_set_after_last(
-/*====================*/
- page_t* page, /* in: index page */
- page_cur_t* cur); /* in: cursor */
-/*************************************************************
-Returns TRUE if the cursor is before first user record on page. */
-UNIV_INLINE
-ibool
-page_cur_is_before_first(
-/*=====================*/
- /* out: TRUE if at start */
- const page_cur_t* cur); /* in: cursor */
-/*************************************************************
-Returns TRUE if the cursor is after last user record. */
-UNIV_INLINE
-ibool
-page_cur_is_after_last(
-/*===================*/
- /* out: TRUE if at end */
- const page_cur_t* cur); /* in: cursor */
-/**************************************************************
-Positions the cursor on the given record. */
-UNIV_INLINE
-void
-page_cur_position(
-/*==============*/
- rec_t* rec, /* in: record on a page */
- page_cur_t* cur); /* in: page cursor */
-/**************************************************************
-Invalidates a page cursor by setting the record pointer NULL. */
-UNIV_INLINE
-void
-page_cur_invalidate(
-/*================*/
- page_cur_t* cur); /* in: page cursor */
-/**************************************************************
-Moves the cursor to the next record on page. */
-UNIV_INLINE
-void
-page_cur_move_to_next(
-/*==================*/
- page_cur_t* cur); /* in: cursor; must not be after last */
-/**************************************************************
-Moves the cursor to the previous record on page. */
-UNIV_INLINE
-void
-page_cur_move_to_prev(
-/*==================*/
- page_cur_t* cur); /* in: cursor; must not before first */
-/***************************************************************
-Inserts a record next to page cursor. Returns pointer to inserted record if
-succeed, i.e., enough space available, NULL otherwise. The cursor stays at
-the same position. */
-UNIV_INLINE
-rec_t*
-page_cur_tuple_insert(
-/*==================*/
- /* out: pointer to record if succeed, NULL
- otherwise */
- page_cur_t* cursor, /* in: a page cursor */
- dtuple_t* tuple, /* in: pointer to a data tuple */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr); /* in: mini-transaction handle */
-/***************************************************************
-Inserts a record next to page cursor. Returns pointer to inserted record if
-succeed, i.e., enough space available, NULL otherwise. The cursor stays at
-the same position. */
-UNIV_INLINE
-rec_t*
-page_cur_rec_insert(
-/*================*/
- /* out: pointer to record if succeed, NULL
- otherwise */
- page_cur_t* cursor, /* in: a page cursor */
- rec_t* rec, /* in: record to insert */
- dict_index_t* index, /* in: record descriptor */
- ulint* offsets,/* in: rec_get_offsets(rec, index) */
- mtr_t* mtr); /* in: mini-transaction handle */
-/***************************************************************
-Inserts a record next to page cursor. Returns pointer to inserted record if
-succeed, i.e., enough space available, NULL otherwise. The record to be
-inserted can be in a data tuple or as a physical record. The other parameter
-must then be NULL. The cursor stays at the same position. */
-
-rec_t*
-page_cur_insert_rec_low(
-/*====================*/
- /* out: pointer to record if succeed, NULL
- otherwise */
- page_cur_t* cursor, /* in: a page cursor */
- dtuple_t* tuple, /* in: pointer to a data tuple or NULL */
- dict_index_t* index, /* in: record descriptor */
- rec_t* rec, /* in: pointer to a physical record or NULL */
- ulint* offsets,/* in: rec_get_offsets(rec, index) or NULL */
- mtr_t* mtr); /* in: mini-transaction handle */
-/*****************************************************************
-Copies records from page to a newly created page, from a given record onward,
-including that record. Infimum and supremum records are not copied. */
-
-void
-page_copy_rec_list_end_to_created_page(
-/*===================================*/
- page_t* new_page, /* in: index page to copy to */
- page_t* page, /* in: index page */
- rec_t* rec, /* in: first record to copy */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr); /* in: mtr */
-/***************************************************************
-Deletes a record at the page cursor. The cursor is moved to the
-next record after the deleted one. */
-
-void
-page_cur_delete_rec(
-/*================*/
- page_cur_t* cursor, /* in: a page cursor */
- dict_index_t* index, /* in: record descriptor */
- const ulint* offsets,/* in: rec_get_offsets(cursor->rec, index) */
- mtr_t* mtr); /* in: mini-transaction handle */
-/********************************************************************
-Searches the right position for a page cursor. */
-UNIV_INLINE
-ulint
-page_cur_search(
-/*============*/
- /* out: number of matched fields on the left */
- page_t* page, /* in: index page */
- dict_index_t* index, /* in: record descriptor */
- dtuple_t* tuple, /* in: data tuple */
- ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
- or PAGE_CUR_GE */
- page_cur_t* cursor);/* out: page cursor */
-/********************************************************************
-Searches the right position for a page cursor. */
-
-void
-page_cur_search_with_match(
-/*=======================*/
- page_t* page, /* in: index page */
- dict_index_t* index, /* in: record descriptor */
- dtuple_t* tuple, /* in: data tuple */
- ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
- or PAGE_CUR_GE */
- ulint* iup_matched_fields,
- /* in/out: already matched fields in upper
- limit record */
- ulint* iup_matched_bytes,
- /* in/out: already matched bytes in a field
- not yet completely matched */
- ulint* ilow_matched_fields,
- /* in/out: already matched fields in lower
- limit record */
- ulint* ilow_matched_bytes,
- /* in/out: already matched bytes in a field
- not yet completely matched */
- page_cur_t* cursor); /* out: page cursor */
-/***************************************************************
-Positions a page cursor on a randomly chosen user record on a page. If there
-are no user records, sets the cursor on the infimum record. */
-
-void
-page_cur_open_on_rnd_user_rec(
-/*==========================*/
- page_t* page, /* in: page */
- page_cur_t* cursor);/* in/out: page cursor */
-/***************************************************************
-Parses a log record of a record insert on a page. */
-
-byte*
-page_cur_parse_insert_rec(
-/*======================*/
- /* out: end of log record or NULL */
- ibool is_short,/* in: TRUE if short inserts */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- dict_index_t* index, /* in: record descriptor */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr); /* in: mtr or NULL */
-/**************************************************************
-Parses a log record of copying a record list end to a new created page. */
-
-byte*
-page_parse_copy_rec_list_to_created_page(
-/*=====================================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- dict_index_t* index, /* in: record descriptor */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr); /* in: mtr or NULL */
-/***************************************************************
-Parses log record of a record delete on a page. */
-
-byte*
-page_cur_parse_delete_rec(
-/*======================*/
- /* out: pointer to record end or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- dict_index_t* index, /* in: record descriptor */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr); /* in: mtr or NULL */
-
-/* Index page cursor */
-
-struct page_cur_struct{
- byte* rec; /* pointer to a record on page */
-};
-
-#ifndef UNIV_NONINL
-#include "page0cur.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/page0cur.ic b/storage/innobase/include/page0cur.ic
deleted file mode 100644
index b747874abc2..00000000000
--- a/storage/innobase/include/page0cur.ic
+++ /dev/null
@@ -1,210 +0,0 @@
-/************************************************************************
-The page cursor
-
-(c) 1994-1996 Innobase Oy
-
-Created 10/4/1994 Heikki Tuuri
-*************************************************************************/
-
-#include "page0page.h"
-
-
-/*************************************************************
-Gets pointer to the page frame where the cursor is positioned. */
-UNIV_INLINE
-page_t*
-page_cur_get_page(
-/*==============*/
- /* out: page */
- page_cur_t* cur) /* in: page cursor */
-{
- ut_ad(cur);
-
- return(buf_frame_align(cur->rec));
-}
-
-/*************************************************************
-Gets the record where the cursor is positioned. */
-UNIV_INLINE
-rec_t*
-page_cur_get_rec(
-/*=============*/
- /* out: record */
- page_cur_t* cur) /* in: page cursor */
-{
- ut_ad(cur);
-
- return(cur->rec);
-}
-
-/*************************************************************
-Sets the cursor object to point before the first user record
-on the page. */
-UNIV_INLINE
-void
-page_cur_set_before_first(
-/*======================*/
- page_t* page, /* in: index page */
- page_cur_t* cur) /* in: cursor */
-{
- cur->rec = page_get_infimum_rec(page);
-}
-
-/*************************************************************
-Sets the cursor object to point after the last user record on
-the page. */
-UNIV_INLINE
-void
-page_cur_set_after_last(
-/*====================*/
- page_t* page, /* in: index page */
- page_cur_t* cur) /* in: cursor */
-{
- cur->rec = page_get_supremum_rec(page);
-}
-
-/*************************************************************
-Returns TRUE if the cursor is before first user record on page. */
-UNIV_INLINE
-ibool
-page_cur_is_before_first(
-/*=====================*/
- /* out: TRUE if at start */
- const page_cur_t* cur) /* in: cursor */
-{
- return(page_rec_is_infimum(cur->rec));
-}
-
-/*************************************************************
-Returns TRUE if the cursor is after last user record. */
-UNIV_INLINE
-ibool
-page_cur_is_after_last(
-/*===================*/
- /* out: TRUE if at end */
- const page_cur_t* cur) /* in: cursor */
-{
- return(page_rec_is_supremum(cur->rec));
-}
-
-/**************************************************************
-Positions the cursor on the given record. */
-UNIV_INLINE
-void
-page_cur_position(
-/*==============*/
- rec_t* rec, /* in: record on a page */
- page_cur_t* cur) /* in: page cursor */
-{
- ut_ad(rec && cur);
-
- cur->rec = rec;
-}
-
-/**************************************************************
-Invalidates a page cursor by setting the record pointer NULL. */
-UNIV_INLINE
-void
-page_cur_invalidate(
-/*================*/
- page_cur_t* cur) /* in: page cursor */
-{
- ut_ad(cur);
-
- cur->rec = NULL;
-}
-
-/**************************************************************
-Moves the cursor to the next record on page. */
-UNIV_INLINE
-void
-page_cur_move_to_next(
-/*==================*/
- page_cur_t* cur) /* in: cursor; must not be after last */
-{
- ut_ad(!page_cur_is_after_last(cur));
-
- cur->rec = page_rec_get_next(cur->rec);
-}
-
-/**************************************************************
-Moves the cursor to the previous record on page. */
-UNIV_INLINE
-void
-page_cur_move_to_prev(
-/*==================*/
- page_cur_t* cur) /* in: page cursor, not before first */
-{
- ut_ad(!page_cur_is_before_first(cur));
-
- cur->rec = page_rec_get_prev(cur->rec);
-}
-
-/********************************************************************
-Searches the right position for a page cursor. */
-UNIV_INLINE
-ulint
-page_cur_search(
-/*============*/
- /* out: number of matched fields on the left */
- page_t* page, /* in: index page */
- dict_index_t* index, /* in: record descriptor */
- dtuple_t* tuple, /* in: data tuple */
- ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
- or PAGE_CUR_GE */
- page_cur_t* cursor) /* out: page cursor */
-{
- ulint low_matched_fields = 0;
- ulint low_matched_bytes = 0;
- ulint up_matched_fields = 0;
- ulint up_matched_bytes = 0;
-
- ut_ad(dtuple_check_typed(tuple));
-
- page_cur_search_with_match(page, index, tuple, mode,
- &up_matched_fields,
- &up_matched_bytes,
- &low_matched_fields,
- &low_matched_bytes,
- cursor);
- return(low_matched_fields);
-}
-
-/***************************************************************
-Inserts a record next to page cursor. Returns pointer to inserted record if
-succeed, i.e., enough space available, NULL otherwise. The cursor stays at
-the same position. */
-UNIV_INLINE
-rec_t*
-page_cur_tuple_insert(
-/*==================*/
- /* out: pointer to record if succeed, NULL
- otherwise */
- page_cur_t* cursor, /* in: a page cursor */
- dtuple_t* tuple, /* in: pointer to a data tuple */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- return(page_cur_insert_rec_low(cursor, tuple, index, NULL, NULL, mtr));
-}
-
-/***************************************************************
-Inserts a record next to page cursor. Returns pointer to inserted record if
-succeed, i.e., enough space available, NULL otherwise. The cursor stays at
-the same position. */
-UNIV_INLINE
-rec_t*
-page_cur_rec_insert(
-/*================*/
- /* out: pointer to record if succeed, NULL
- otherwise */
- page_cur_t* cursor, /* in: a page cursor */
- rec_t* rec, /* in: record to insert */
- dict_index_t* index, /* in: record descriptor */
- ulint* offsets,/* in: rec_get_offsets(rec, index) */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- return(page_cur_insert_rec_low(cursor, NULL, index, rec,
- offsets, mtr));
-}
-
diff --git a/storage/innobase/include/page0page.h b/storage/innobase/include/page0page.h
deleted file mode 100644
index 273007c2778..00000000000
--- a/storage/innobase/include/page0page.h
+++ /dev/null
@@ -1,829 +0,0 @@
-/******************************************************
-Index page routines
-
-(c) 1994-1996 Innobase Oy
-
-Created 2/2/1994 Heikki Tuuri
-*******************************************************/
-
-#ifndef page0page_h
-#define page0page_h
-
-#include "univ.i"
-
-#include "page0types.h"
-#include "fil0fil.h"
-#include "buf0buf.h"
-#include "data0data.h"
-#include "dict0dict.h"
-#include "rem0rec.h"
-#include "fsp0fsp.h"
-#include "mtr0mtr.h"
-
-#ifdef UNIV_MATERIALIZE
-#undef UNIV_INLINE
-#define UNIV_INLINE
-#endif
-
-/* PAGE HEADER
- ===========
-
-Index page header starts at the first offset left free by the FIL-module */
-
-typedef byte page_header_t;
-
-#define PAGE_HEADER FSEG_PAGE_DATA /* index page header starts at this
- offset */
-/*-----------------------------*/
-#define PAGE_N_DIR_SLOTS 0 /* number of slots in page directory */
-#define PAGE_HEAP_TOP 2 /* pointer to record heap top */
-#define PAGE_N_HEAP 4 /* number of records in the heap,
- bit 15=flag: new-style compact page format */
-#define PAGE_FREE 6 /* pointer to start of page free record list */
-#define PAGE_GARBAGE 8 /* number of bytes in deleted records */
-#define PAGE_LAST_INSERT 10 /* pointer to the last inserted record, or
- NULL if this info has been reset by a delete,
- for example */
-#define PAGE_DIRECTION 12 /* last insert direction: PAGE_LEFT, ... */
-#define PAGE_N_DIRECTION 14 /* number of consecutive inserts to the same
- direction */
-#define PAGE_N_RECS 16 /* number of user records on the page */
-#define PAGE_MAX_TRX_ID 18 /* highest id of a trx which may have modified
- a record on the page; a dulint; defined only
- in secondary indexes; specifically, not in an
- ibuf tree; NOTE: this may be modified only
- when the thread has an x-latch to the page,
- and ALSO an x-latch to btr_search_latch
- if there is a hash index to the page! */
-#define PAGE_HEADER_PRIV_END 26 /* end of private data structure of the page
- header which are set in a page create */
-/*----*/
-#define PAGE_LEVEL 26 /* level of the node in an index tree; the
- leaf level is the level 0 */
-#define PAGE_INDEX_ID 28 /* index id where the page belongs */
-#define PAGE_BTR_SEG_LEAF 36 /* file segment header for the leaf pages in
- a B-tree: defined only on the root page of a
- B-tree, but not in the root of an ibuf tree */
-#define PAGE_BTR_IBUF_FREE_LIST PAGE_BTR_SEG_LEAF
-#define PAGE_BTR_IBUF_FREE_LIST_NODE PAGE_BTR_SEG_LEAF
- /* in the place of PAGE_BTR_SEG_LEAF and _TOP
- there is a free list base node if the page is
- the root page of an ibuf tree, and at the same
- place is the free list node if the page is in
- a free list */
-#define PAGE_BTR_SEG_TOP (36 + FSEG_HEADER_SIZE)
- /* file segment header for the non-leaf pages
- in a B-tree: defined only on the root page of
- a B-tree, but not in the root of an ibuf
- tree */
-/*----*/
-#define PAGE_DATA (PAGE_HEADER + 36 + 2 * FSEG_HEADER_SIZE)
- /* start of data on the page */
-
-#define PAGE_OLD_INFIMUM (PAGE_DATA + 1 + REC_N_OLD_EXTRA_BYTES)
- /* offset of the page infimum record on an
- old-style page */
-#define PAGE_OLD_SUPREMUM (PAGE_DATA + 2 + 2 * REC_N_OLD_EXTRA_BYTES + 8)
- /* offset of the page supremum record on an
- old-style page */
-#define PAGE_OLD_SUPREMUM_END (PAGE_OLD_SUPREMUM + 9)
- /* offset of the page supremum record end on
- an old-style page */
-#define PAGE_NEW_INFIMUM (PAGE_DATA + REC_N_NEW_EXTRA_BYTES)
- /* offset of the page infimum record on a
- new-style compact page */
-#define PAGE_NEW_SUPREMUM (PAGE_DATA + 2 * REC_N_NEW_EXTRA_BYTES + 8)
- /* offset of the page supremum record on a
- new-style compact page */
-#define PAGE_NEW_SUPREMUM_END (PAGE_NEW_SUPREMUM + 8)
- /* offset of the page supremum record end on
- a new-style compact page */
-/*-----------------------------*/
-
-/* Directions of cursor movement */
-#define PAGE_LEFT 1
-#define PAGE_RIGHT 2
-#define PAGE_SAME_REC 3
-#define PAGE_SAME_PAGE 4
-#define PAGE_NO_DIRECTION 5
-
-/* PAGE DIRECTORY
- ==============
-*/
-
-typedef byte page_dir_slot_t;
-typedef page_dir_slot_t page_dir_t;
-
-/* Offset of the directory start down from the page end. We call the
-slot with the highest file address directory start, as it points to
-the first record in the list of records. */
-#define PAGE_DIR FIL_PAGE_DATA_END
-
-/* We define a slot in the page directory as two bytes */
-#define PAGE_DIR_SLOT_SIZE 2
-
-/* The offset of the physically lower end of the directory, counted from
-page end, when the page is empty */
-#define PAGE_EMPTY_DIR_START (PAGE_DIR + 2 * PAGE_DIR_SLOT_SIZE)
-
-/* The maximum and minimum number of records owned by a directory slot. The
-number may drop below the minimum in the first and the last slot in the
-directory. */
-#define PAGE_DIR_SLOT_MAX_N_OWNED 8
-#define PAGE_DIR_SLOT_MIN_N_OWNED 4
-
-/****************************************************************
-Gets the start of a page. */
-UNIV_INLINE
-page_t*
-page_align(
-/*=======*/
- /* out: start of the page */
- void* ptr) /* in: pointer to page frame */
- __attribute__((const));
-/****************************************************************
-Gets the offset within a page. */
-UNIV_INLINE
-ulint
-page_offset(
-/*========*/
- /* out: offset from the start of the page */
- const void* ptr) /* in: pointer to page frame */
- __attribute__((const));
-/*****************************************************************
-Returns the max trx id field value. */
-UNIV_INLINE
-dulint
-page_get_max_trx_id(
-/*================*/
- page_t* page); /* in: page */
-/*****************************************************************
-Sets the max trx id field value. */
-
-void
-page_set_max_trx_id(
-/*================*/
- page_t* page, /* in: page */
- dulint trx_id);/* in: transaction id */
-/*****************************************************************
-Sets the max trx id field value if trx_id is bigger than the previous
-value. */
-UNIV_INLINE
-void
-page_update_max_trx_id(
-/*===================*/
- page_t* page, /* in: page */
- dulint trx_id); /* in: transaction id */
-/*****************************************************************
-Reads the given header field. */
-UNIV_INLINE
-ulint
-page_header_get_field(
-/*==================*/
- page_t* page, /* in: page */
- ulint field); /* in: PAGE_N_DIR_SLOTS, ... */
-/*****************************************************************
-Sets the given header field. */
-UNIV_INLINE
-void
-page_header_set_field(
-/*==================*/
- page_t* page, /* in: page */
- ulint field, /* in: PAGE_N_DIR_SLOTS, ... */
- ulint val); /* in: value */
-/*****************************************************************
-Returns the pointer stored in the given header field. */
-UNIV_INLINE
-byte*
-page_header_get_ptr(
-/*================*/
- /* out: pointer or NULL */
- page_t* page, /* in: page */
- ulint field); /* in: PAGE_FREE, ... */
-/*****************************************************************
-Sets the pointer stored in the given header field. */
-UNIV_INLINE
-void
-page_header_set_ptr(
-/*================*/
- page_t* page, /* in: page */
- ulint field, /* in: PAGE_FREE, ... */
- byte* ptr); /* in: pointer or NULL*/
-/*****************************************************************
-Resets the last insert info field in the page header. Writes to mlog
-about this operation. */
-UNIV_INLINE
-void
-page_header_reset_last_insert(
-/*==========================*/
- page_t* page, /* in: page */
- mtr_t* mtr); /* in: mtr */
-/****************************************************************
-Gets the first record on the page. */
-UNIV_INLINE
-rec_t*
-page_get_infimum_rec(
-/*=================*/
- /* out: the first record in record list */
- page_t* page); /* in: page which must have record(s) */
-/****************************************************************
-Gets the last record on the page. */
-UNIV_INLINE
-rec_t*
-page_get_supremum_rec(
-/*==================*/
- /* out: the last record in record list */
- page_t* page); /* in: page which must have record(s) */
-/****************************************************************
-Returns the middle record of record list. If there are an even number
-of records in the list, returns the first record of upper half-list. */
-
-rec_t*
-page_get_middle_rec(
-/*================*/
- /* out: middle record */
- page_t* page); /* in: page */
-/*****************************************************************
-Compares a data tuple to a physical record. Differs from the function
-cmp_dtuple_rec_with_match in the way that the record must reside on an
-index page, and also page infimum and supremum records can be given in
-the parameter rec. These are considered as the negative infinity and
-the positive infinity in the alphabetical order. */
-UNIV_INLINE
-int
-page_cmp_dtuple_rec_with_match(
-/*===========================*/
- /* out: 1, 0, -1, if dtuple is greater, equal,
- less than rec, respectively, when only the
- common first fields are compared */
- dtuple_t* dtuple, /* in: data tuple */
- rec_t* rec, /* in: physical record on a page; may also
- be page infimum or supremum, in which case
- matched-parameter values below are not
- affected */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint* matched_fields, /* in/out: number of already completely
- matched fields; when function returns
- contains the value for current comparison */
- ulint* matched_bytes); /* in/out: number of already matched
- bytes within the first field not completely
- matched; when function returns contains the
- value for current comparison */
-/*****************************************************************
-Gets the number of user records on page (the infimum and supremum records
-are not user records). */
-UNIV_INLINE
-ulint
-page_get_n_recs(
-/*============*/
- /* out: number of user records */
- page_t* page); /* in: index page */
-/*******************************************************************
-Returns the number of records before the given record in chain.
-The number includes infimum and supremum records. */
-
-ulint
-page_rec_get_n_recs_before(
-/*=======================*/
- /* out: number of records */
- rec_t* rec); /* in: the physical record */
-/*****************************************************************
-Gets the number of records in the heap. */
-UNIV_INLINE
-ulint
-page_dir_get_n_heap(
-/*================*/
- /* out: number of user records */
- page_t* page); /* in: index page */
-/*****************************************************************
-Sets the number of records in the heap. */
-UNIV_INLINE
-void
-page_dir_set_n_heap(
-/*================*/
- page_t* page, /* in: index page */
- ulint n_heap);/* in: number of records */
-/*****************************************************************
-Gets the number of dir slots in directory. */
-UNIV_INLINE
-ulint
-page_dir_get_n_slots(
-/*=================*/
- /* out: number of slots */
- page_t* page); /* in: index page */
-/*****************************************************************
-Sets the number of dir slots in directory. */
-UNIV_INLINE
-void
-page_dir_set_n_slots(
-/*=================*/
- /* out: number of slots */
- page_t* page, /* in: index page */
- ulint n_slots);/* in: number of slots */
-/*****************************************************************
-Gets pointer to nth directory slot. */
-UNIV_INLINE
-page_dir_slot_t*
-page_dir_get_nth_slot(
-/*==================*/
- /* out: pointer to dir slot */
- page_t* page, /* in: index page */
- ulint n); /* in: position */
-/******************************************************************
-Used to check the consistency of a record on a page. */
-UNIV_INLINE
-ibool
-page_rec_check(
-/*===========*/
- /* out: TRUE if succeed */
- rec_t* rec); /* in: record */
-/*******************************************************************
-Gets the record pointed to by a directory slot. */
-UNIV_INLINE
-rec_t*
-page_dir_slot_get_rec(
-/*==================*/
- /* out: pointer to record */
- page_dir_slot_t* slot); /* in: directory slot */
-/*******************************************************************
-This is used to set the record offset in a directory slot. */
-UNIV_INLINE
-void
-page_dir_slot_set_rec(
-/*==================*/
- page_dir_slot_t* slot, /* in: directory slot */
- rec_t* rec); /* in: record on the page */
-/*******************************************************************
-Gets the number of records owned by a directory slot. */
-UNIV_INLINE
-ulint
-page_dir_slot_get_n_owned(
-/*======================*/
- /* out: number of records */
- page_dir_slot_t* slot); /* in: page directory slot */
-/*******************************************************************
-This is used to set the owned records field of a directory slot. */
-UNIV_INLINE
-void
-page_dir_slot_set_n_owned(
-/*======================*/
- page_dir_slot_t* slot, /* in: directory slot */
- ulint n); /* in: number of records owned
- by the slot */
-/****************************************************************
-Calculates the space reserved for directory slots of a given
-number of records. The exact value is a fraction number
-n * PAGE_DIR_SLOT_SIZE / PAGE_DIR_SLOT_MIN_N_OWNED, and it is
-rounded upwards to an integer. */
-UNIV_INLINE
-ulint
-page_dir_calc_reserved_space(
-/*=========================*/
- ulint n_recs); /* in: number of records */
-/*******************************************************************
-Looks for the directory slot which owns the given record. */
-
-ulint
-page_dir_find_owner_slot(
-/*=====================*/
- /* out: the directory slot number */
- rec_t* rec); /* in: the physical record */
-/****************************************************************
-Determine whether the page is in new-style compact format. */
-UNIV_INLINE
-ulint
-page_is_comp(
-/*=========*/
- /* out: nonzero if the page is in compact
- format, zero if it is in old-style format */
- page_t* page); /* in: index page */
-/****************************************************************
-TRUE if the record is on a page in compact format. */
-UNIV_INLINE
-ulint
-page_rec_is_comp(
-/*=============*/
- /* out: nonzero if in compact format */
- const rec_t* rec); /* in: record */
-/****************************************************************
-Gets the pointer to the next record on the page. */
-UNIV_INLINE
-rec_t*
-page_rec_get_next(
-/*==============*/
- /* out: pointer to next record */
- rec_t* rec); /* in: pointer to record, must not be page
- supremum */
-/****************************************************************
-Sets the pointer to the next record on the page. */
-UNIV_INLINE
-void
-page_rec_set_next(
-/*==============*/
- rec_t* rec, /* in: pointer to record, must not be
- page supremum */
- rec_t* next); /* in: pointer to next record, must not
- be page infimum */
-/****************************************************************
-Gets the pointer to the previous record. */
-UNIV_INLINE
-rec_t*
-page_rec_get_prev(
-/*==============*/
- /* out: pointer to previous record */
- rec_t* rec); /* in: pointer to record,
- must not be page infimum */
-/****************************************************************
-TRUE if the record is a user record on the page. */
-UNIV_INLINE
-ibool
-page_rec_is_user_rec_low(
-/*=====================*/
- /* out: TRUE if a user record */
- ulint offset);/* in: record offset on page */
-/****************************************************************
-TRUE if the record is the supremum record on a page. */
-UNIV_INLINE
-ibool
-page_rec_is_supremum_low(
-/*=====================*/
- /* out: TRUE if the supremum record */
- ulint offset);/* in: record offset on page */
-/****************************************************************
-TRUE if the record is the infimum record on a page. */
-UNIV_INLINE
-ibool
-page_rec_is_infimum_low(
-/*====================*/
- /* out: TRUE if the infimum record */
- ulint offset);/* in: record offset on page */
-
-/****************************************************************
-TRUE if the record is a user record on the page. */
-UNIV_INLINE
-ibool
-page_rec_is_user_rec(
-/*=================*/
- /* out: TRUE if a user record */
- const rec_t* rec); /* in: record */
-/****************************************************************
-TRUE if the record is the supremum record on a page. */
-UNIV_INLINE
-ibool
-page_rec_is_supremum(
-/*=================*/
- /* out: TRUE if the supremum record */
- const rec_t* rec); /* in: record */
-/****************************************************************
-TRUE if the record is the infimum record on a page. */
-UNIV_INLINE
-ibool
-page_rec_is_infimum(
-/*================*/
- /* out: TRUE if the infimum record */
- const rec_t* rec); /* in: record */
-/*******************************************************************
-Looks for the record which owns the given record. */
-UNIV_INLINE
-rec_t*
-page_rec_find_owner_rec(
-/*====================*/
- /* out: the owner record */
- rec_t* rec); /* in: the physical record */
-/***************************************************************************
-This is a low-level operation which is used in a database index creation
-to update the page number of a created B-tree to a data dictionary
-record. */
-
-void
-page_rec_write_index_page_no(
-/*=========================*/
- rec_t* rec, /* in: record to update */
- ulint i, /* in: index of the field to update */
- ulint page_no,/* in: value to write */
- mtr_t* mtr); /* in: mtr */
-/****************************************************************
-Returns the maximum combined size of records which can be inserted on top
-of record heap. */
-UNIV_INLINE
-ulint
-page_get_max_insert_size(
-/*=====================*/
- /* out: maximum combined size for inserted records */
- page_t* page, /* in: index page */
- ulint n_recs); /* in: number of records */
-/****************************************************************
-Returns the maximum combined size of records which can be inserted on top
-of record heap if page is first reorganized. */
-UNIV_INLINE
-ulint
-page_get_max_insert_size_after_reorganize(
-/*======================================*/
- /* out: maximum combined size for inserted records */
- page_t* page, /* in: index page */
- ulint n_recs);/* in: number of records */
-/*****************************************************************
-Calculates free space if a page is emptied. */
-UNIV_INLINE
-ulint
-page_get_free_space_of_empty(
-/*=========================*/
- /* out: free space */
- ulint comp) /* in: nonzero=compact page format */
- __attribute__((const));
-/*****************************************************************
-Calculates free space if a page is emptied. */
-
-ulint
-page_get_free_space_of_empty_noninline(
-/*===================================*/
- /* out: free space */
- ulint comp) /* in: nonzero=compact page format */
- __attribute__((const));
-/****************************************************************
-Returns the sum of the sizes of the records in the record list
-excluding the infimum and supremum records. */
-UNIV_INLINE
-ulint
-page_get_data_size(
-/*===============*/
- /* out: data in bytes */
- page_t* page); /* in: index page */
-/****************************************************************
-Allocates a block of memory from an index page. */
-
-byte*
-page_mem_alloc(
-/*===========*/
- /* out: pointer to start of allocated
- buffer, or NULL if allocation fails */
- page_t* page, /* in: index page */
- ulint need, /* in: number of bytes needed */
- dict_index_t* index, /* in: record descriptor */
- ulint* heap_no);/* out: this contains the heap number
- of the allocated record
- if allocation succeeds */
-/****************************************************************
-Puts a record to free list. */
-UNIV_INLINE
-void
-page_mem_free(
-/*==========*/
- page_t* page, /* in: index page */
- rec_t* rec, /* in: pointer to the (origin of) record */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-/**************************************************************
-The index page creation function. */
-
-page_t*
-page_create(
-/*========*/
- /* out: pointer to the page */
- buf_frame_t* frame, /* in: a buffer frame where the page is
- created */
- mtr_t* mtr, /* in: mini-transaction handle */
- ulint comp); /* in: nonzero=compact page format */
-/*****************************************************************
-Differs from page_copy_rec_list_end, because this function does not
-touch the lock table and max trx id on page. */
-
-void
-page_copy_rec_list_end_no_locks(
-/*============================*/
- page_t* new_page, /* in: index page to copy to */
- page_t* page, /* in: index page */
- rec_t* rec, /* in: record on page */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
-Copies records from page to new_page, from the given record onward,
-including that record. Infimum and supremum records are not copied.
-The records are copied to the start of the record list on new_page. */
-
-void
-page_copy_rec_list_end(
-/*===================*/
- page_t* new_page, /* in: index page to copy to */
- page_t* page, /* in: index page */
- rec_t* rec, /* in: record on page */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
-Copies records from page to new_page, up to the given record, NOT
-including that record. Infimum and supremum records are not copied.
-The records are copied to the end of the record list on new_page. */
-
-void
-page_copy_rec_list_start(
-/*=====================*/
- page_t* new_page, /* in: index page to copy to */
- page_t* page, /* in: index page */
- rec_t* rec, /* in: record on page */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
-Deletes records from a page from a given record onward, including that record.
-The infimum and supremum records are not deleted. */
-
-void
-page_delete_rec_list_end(
-/*=====================*/
- page_t* page, /* in: index page */
- rec_t* rec, /* in: record on page */
- dict_index_t* index, /* in: record descriptor */
- ulint n_recs, /* in: number of records to delete,
- or ULINT_UNDEFINED if not known */
- ulint size, /* in: the sum of the sizes of the
- records in the end of the chain to
- delete, or ULINT_UNDEFINED if not known */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
-Deletes records from page, up to the given record, NOT including
-that record. Infimum and supremum records are not deleted. */
-
-void
-page_delete_rec_list_start(
-/*=======================*/
- page_t* page, /* in: index page */
- rec_t* rec, /* in: record on page */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
-Moves record list end to another page. Moved records include
-split_rec. */
-
-void
-page_move_rec_list_end(
-/*===================*/
- page_t* new_page, /* in: index page where to move */
- page_t* page, /* in: index page */
- rec_t* split_rec, /* in: first record to move */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
-Moves record list start to another page. Moved records do not include
-split_rec. */
-
-void
-page_move_rec_list_start(
-/*=====================*/
- page_t* new_page, /* in: index page where to move */
- page_t* page, /* in: index page */
- rec_t* split_rec, /* in: first record not to move */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr); /* in: mtr */
-/********************************************************************
-Splits a directory slot which owns too many records. */
-
-void
-page_dir_split_slot(
-/*================*/
- page_t* page, /* in: the index page in question */
- ulint slot_no); /* in: the directory slot */
-/*****************************************************************
-Tries to balance the given directory slot with too few records
-with the upper neighbor, so that there are at least the minimum number
-of records owned by the slot; this may result in the merging of
-two slots. */
-
-void
-page_dir_balance_slot(
-/*==================*/
- page_t* page, /* in: index page */
- ulint slot_no); /* in: the directory slot */
-/**************************************************************
-Parses a log record of a record list end or start deletion. */
-
-byte*
-page_parse_delete_rec_list(
-/*=======================*/
- /* out: end of log record or NULL */
- byte type, /* in: MLOG_LIST_END_DELETE,
- MLOG_LIST_START_DELETE,
- MLOG_COMP_LIST_END_DELETE or
- MLOG_COMP_LIST_START_DELETE */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- dict_index_t* index, /* in: record descriptor */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr); /* in: mtr or NULL */
-/***************************************************************
-Parses a redo log record of creating a page. */
-
-byte*
-page_parse_create(
-/*==============*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- ulint comp, /* in: nonzero=compact page format */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr); /* in: mtr or NULL */
-/****************************************************************
-Prints record contents including the data relevant only in
-the index page context. */
-
-void
-page_rec_print(
-/*===========*/
- rec_t* rec, /* in: physical record */
- const ulint* offsets);/* in: record descriptor */
-/*******************************************************************
-This is used to print the contents of the directory for
-debugging purposes. */
-
-void
-page_dir_print(
-/*===========*/
- page_t* page, /* in: index page */
- ulint pr_n); /* in: print n first and n last entries */
-/*******************************************************************
-This is used to print the contents of the page record list for
-debugging purposes. */
-
-void
-page_print_list(
-/*============*/
- page_t* page, /* in: index page */
- dict_index_t* index, /* in: dictionary index of the page */
- ulint pr_n); /* in: print n first and n last entries */
-/*******************************************************************
-Prints the info in a page header. */
-
-void
-page_header_print(
-/*==============*/
- page_t* page);
-/*******************************************************************
-This is used to print the contents of the page for
-debugging purposes. */
-
-void
-page_print(
-/*=======*/
- page_t* page, /* in: index page */
- dict_index_t* index, /* in: dictionary index of the page */
- ulint dn, /* in: print dn first and last entries
- in directory */
- ulint rn); /* in: print rn first and last records
- in directory */
-/*******************************************************************
-The following is used to validate a record on a page. This function
-differs from rec_validate as it can also check the n_owned field and
-the heap_no field. */
-
-ibool
-page_rec_validate(
-/*==============*/
- /* out: TRUE if ok */
- rec_t* rec, /* in: physical record */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-/*******************************************************************
-Checks that the first directory slot points to the infimum record and
-the last to the supremum. This function is intended to track if the
-bug fixed in 4.0.14 has caused corruption to users' databases. */
-
-void
-page_check_dir(
-/*===========*/
- page_t* page); /* in: index page */
-/*******************************************************************
-This function checks the consistency of an index page when we do not
-know the index. This is also resilient so that this should never crash
-even if the page is total garbage. */
-
-ibool
-page_simple_validate(
-/*=================*/
- /* out: TRUE if ok */
- page_t* page); /* in: index page */
-/*******************************************************************
-This function checks the consistency of an index page. */
-
-ibool
-page_validate(
-/*==========*/
- /* out: TRUE if ok */
- page_t* page, /* in: index page */
- dict_index_t* index); /* in: data dictionary index containing
- the page record type definition */
-/*******************************************************************
-Looks in the page record list for a record with the given heap number. */
-
-rec_t*
-page_find_rec_with_heap_no(
-/*=======================*/
- /* out: record, NULL if not found */
- page_t* page, /* in: index page */
- ulint heap_no);/* in: heap number */
-
-#ifdef UNIV_MATERIALIZE
-#undef UNIV_INLINE
-#define UNIV_INLINE UNIV_INLINE_ORIGINAL
-#endif
-
-#ifndef UNIV_NONINL
-#include "page0page.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/page0page.ic b/storage/innobase/include/page0page.ic
deleted file mode 100644
index d9e67f3eeeb..00000000000
--- a/storage/innobase/include/page0page.ic
+++ /dev/null
@@ -1,851 +0,0 @@
-/******************************************************
-Index page routines
-
-(c) 1994-1996 Innobase Oy
-
-Created 2/2/1994 Heikki Tuuri
-*******************************************************/
-
-#include "mach0data.h"
-#include "rem0cmp.h"
-#include "mtr0log.h"
-
-#ifdef UNIV_MATERIALIZE
-#undef UNIV_INLINE
-#define UNIV_INLINE
-#endif
-
-/****************************************************************
-Gets the start of a page. */
-UNIV_INLINE
-page_t*
-page_align(
-/*=======*/
- /* out: start of the page */
- void* ptr) /* in: pointer to page frame */
-{
- return((page_t*) ut_align_down(ptr, UNIV_PAGE_SIZE));
-}
-/****************************************************************
-Gets the offset within a page. */
-UNIV_INLINE
-ulint
-page_offset(
-/*========*/
- /* out: offset from the start of the page */
- const void* ptr) /* in: pointer to page frame */
-{
- return(ut_align_offset(ptr, UNIV_PAGE_SIZE));
-}
-/*****************************************************************
-Returns the max trx id field value. */
-UNIV_INLINE
-dulint
-page_get_max_trx_id(
-/*================*/
- page_t* page) /* in: page */
-{
- ut_ad(page);
-
- return(mach_read_from_8(page + PAGE_HEADER + PAGE_MAX_TRX_ID));
-}
-
-/*****************************************************************
-Sets the max trx id field value if trx_id is bigger than the previous
-value. */
-UNIV_INLINE
-void
-page_update_max_trx_id(
-/*===================*/
- page_t* page, /* in: page */
- dulint trx_id) /* in: transaction id */
-{
- ut_ad(page);
-
- if (ut_dulint_cmp(page_get_max_trx_id(page), trx_id) < 0) {
-
- page_set_max_trx_id(page, trx_id);
- }
-}
-
-/*****************************************************************
-Reads the given header field. */
-UNIV_INLINE
-ulint
-page_header_get_field(
-/*==================*/
- page_t* page, /* in: page */
- ulint field) /* in: PAGE_LEVEL, ... */
-{
- ut_ad(page);
- ut_ad(field <= PAGE_INDEX_ID);
-
- return(mach_read_from_2(page + PAGE_HEADER + field));
-}
-
-/*****************************************************************
-Sets the given header field. */
-UNIV_INLINE
-void
-page_header_set_field(
-/*==================*/
- page_t* page, /* in: page */
- ulint field, /* in: PAGE_LEVEL, ... */
- ulint val) /* in: value */
-{
- ut_ad(page);
- ut_ad(field <= PAGE_N_RECS);
- ut_ad(field == PAGE_N_HEAP || val < UNIV_PAGE_SIZE);
- ut_ad(field != PAGE_N_HEAP || (val & 0x7fff) < UNIV_PAGE_SIZE);
-
- mach_write_to_2(page + PAGE_HEADER + field, val);
-}
-
-/*****************************************************************
-Returns the pointer stored in the given header field. */
-UNIV_INLINE
-byte*
-page_header_get_ptr(
-/*================*/
- /* out: pointer or NULL */
- page_t* page, /* in: page */
- ulint field) /* in: PAGE_FREE, ... */
-{
- ulint offs;
-
- ut_ad(page);
- ut_ad((field == PAGE_FREE)
- || (field == PAGE_LAST_INSERT)
- || (field == PAGE_HEAP_TOP));
-
- offs = page_header_get_field(page, field);
-
- ut_ad((field != PAGE_HEAP_TOP) || offs);
-
- if (offs == 0) {
-
- return(NULL);
- }
-
- return(page + offs);
-}
-
-/*****************************************************************
-Sets the pointer stored in the given header field. */
-UNIV_INLINE
-void
-page_header_set_ptr(
-/*================*/
- page_t* page, /* in: page */
- ulint field, /* in: PAGE_FREE, ... */
- byte* ptr) /* in: pointer or NULL*/
-{
- ulint offs;
-
- ut_ad(page);
- ut_ad((field == PAGE_FREE)
- || (field == PAGE_LAST_INSERT)
- || (field == PAGE_HEAP_TOP));
-
- if (ptr == NULL) {
- offs = 0;
- } else {
- offs = ptr - page;
- }
-
- ut_ad((field != PAGE_HEAP_TOP) || offs);
-
- page_header_set_field(page, field, offs);
-}
-
-/*****************************************************************
-Resets the last insert info field in the page header. Writes to mlog
-about this operation. */
-UNIV_INLINE
-void
-page_header_reset_last_insert(
-/*==========================*/
- page_t* page, /* in: page */
- mtr_t* mtr) /* in: mtr */
-{
- ut_ad(page && mtr);
-
- mlog_write_ulint(page + PAGE_HEADER + PAGE_LAST_INSERT, 0,
- MLOG_2BYTES, mtr);
-}
-
-/****************************************************************
-Determine whether the page is in new-style compact format. */
-UNIV_INLINE
-ulint
-page_is_comp(
-/*=========*/
- /* out: nonzero if the page is in compact
- format, zero if it is in old-style format */
- page_t* page) /* in: index page */
-{
- return(UNIV_EXPECT(page_header_get_field(page, PAGE_N_HEAP) & 0x8000,
- 0x8000));
-}
-
-/****************************************************************
-TRUE if the record is on a page in compact format. */
-UNIV_INLINE
-ulint
-page_rec_is_comp(
-/*=============*/
- /* out: nonzero if in compact format */
- const rec_t* rec) /* in: record */
-{
- return(page_is_comp(page_align((rec_t*) rec)));
-}
-
-/****************************************************************
-Gets the first record on the page. */
-UNIV_INLINE
-rec_t*
-page_get_infimum_rec(
-/*=================*/
- /* out: the first record in record list */
- page_t* page) /* in: page which must have record(s) */
-{
- ut_ad(page);
-
- if (page_is_comp(page)) {
- return(page + PAGE_NEW_INFIMUM);
- } else {
- return(page + PAGE_OLD_INFIMUM);
- }
-}
-
-/****************************************************************
-Gets the last record on the page. */
-UNIV_INLINE
-rec_t*
-page_get_supremum_rec(
-/*==================*/
- /* out: the last record in record list */
- page_t* page) /* in: page which must have record(s) */
-{
- ut_ad(page);
-
- if (page_is_comp(page)) {
- return(page + PAGE_NEW_SUPREMUM);
- } else {
- return(page + PAGE_OLD_SUPREMUM);
- }
-}
-
-/****************************************************************
-TRUE if the record is a user record on the page. */
-UNIV_INLINE
-ibool
-page_rec_is_user_rec_low(
-/*=====================*/
- /* out: TRUE if a user record */
- ulint offset) /* in: record offset on page */
-{
- ut_ad(offset >= PAGE_NEW_INFIMUM);
-#if PAGE_OLD_INFIMUM < PAGE_NEW_INFIMUM
-# error "PAGE_OLD_INFIMUM < PAGE_NEW_INFIMUM"
-#endif
-#if PAGE_OLD_SUPREMUM < PAGE_NEW_SUPREMUM
-# error "PAGE_OLD_SUPREMUM < PAGE_NEW_SUPREMUM"
-#endif
-#if PAGE_NEW_INFIMUM > PAGE_OLD_SUPREMUM
-# error "PAGE_NEW_INFIMUM > PAGE_OLD_SUPREMUM"
-#endif
-#if PAGE_OLD_INFIMUM > PAGE_NEW_SUPREMUM
-# error "PAGE_OLD_INFIMUM > PAGE_NEW_SUPREMUM"
-#endif
-#if PAGE_NEW_SUPREMUM > PAGE_OLD_SUPREMUM_END
-# error "PAGE_NEW_SUPREMUM > PAGE_OLD_SUPREMUM_END"
-#endif
-#if PAGE_OLD_SUPREMUM > PAGE_NEW_SUPREMUM_END
-# error "PAGE_OLD_SUPREMUM > PAGE_NEW_SUPREMUM_END"
-#endif
- ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START);
-
- return(UNIV_LIKELY(offset != PAGE_NEW_SUPREMUM)
- && UNIV_LIKELY(offset != PAGE_NEW_INFIMUM)
- && UNIV_LIKELY(offset != PAGE_OLD_INFIMUM)
- && UNIV_LIKELY(offset != PAGE_OLD_SUPREMUM));
-}
-
-/****************************************************************
-TRUE if the record is the supremum record on a page. */
-UNIV_INLINE
-ibool
-page_rec_is_supremum_low(
-/*=====================*/
- /* out: TRUE if the supremum record */
- ulint offset) /* in: record offset on page */
-{
- ut_ad(offset >= PAGE_NEW_INFIMUM);
- ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START);
-
- return(UNIV_UNLIKELY(offset == PAGE_NEW_SUPREMUM)
- || UNIV_UNLIKELY(offset == PAGE_OLD_SUPREMUM));
-}
-
-/****************************************************************
-TRUE if the record is the infimum record on a page. */
-UNIV_INLINE
-ibool
-page_rec_is_infimum_low(
-/*====================*/
- /* out: TRUE if the infimum record */
- ulint offset) /* in: record offset on page */
-{
- ut_ad(offset >= PAGE_NEW_INFIMUM);
- ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START);
-
- return(UNIV_UNLIKELY(offset == PAGE_NEW_INFIMUM)
- || UNIV_UNLIKELY(offset == PAGE_OLD_INFIMUM));
-}
-
-/****************************************************************
-TRUE if the record is a user record on the page. */
-UNIV_INLINE
-ibool
-page_rec_is_user_rec(
-/*=================*/
- /* out: TRUE if a user record */
- const rec_t* rec) /* in: record */
-{
- return(page_rec_is_user_rec_low(page_offset(rec)));
-}
-
-/****************************************************************
-TRUE if the record is the supremum record on a page. */
-UNIV_INLINE
-ibool
-page_rec_is_supremum(
-/*=================*/
- /* out: TRUE if the supremum record */
- const rec_t* rec) /* in: record */
-{
- return(page_rec_is_supremum_low(page_offset(rec)));
-}
-
-/****************************************************************
-TRUE if the record is the infimum record on a page. */
-UNIV_INLINE
-ibool
-page_rec_is_infimum(
-/*================*/
- /* out: TRUE if the infimum record */
- const rec_t* rec) /* in: record */
-{
- return(page_rec_is_infimum_low(page_offset(rec)));
-}
-
-/*****************************************************************
-Compares a data tuple to a physical record. Differs from the function
-cmp_dtuple_rec_with_match in the way that the record must reside on an
-index page, and also page infimum and supremum records can be given in
-the parameter rec. These are considered as the negative infinity and
-the positive infinity in the alphabetical order. */
-UNIV_INLINE
-int
-page_cmp_dtuple_rec_with_match(
-/*===========================*/
- /* out: 1, 0, -1, if dtuple is greater, equal,
- less than rec, respectively, when only the
- common first fields are compared */
- dtuple_t* dtuple, /* in: data tuple */
- rec_t* rec, /* in: physical record on a page; may also
- be page infimum or supremum, in which case
- matched-parameter values below are not
- affected */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint* matched_fields, /* in/out: number of already completely
- matched fields; when function returns
- contains the value for current comparison */
- ulint* matched_bytes) /* in/out: number of already matched
- bytes within the first field not completely
- matched; when function returns contains the
- value for current comparison */
-{
- ulint rec_offset;
-
- ut_ad(dtuple_check_typed(dtuple));
- ut_ad(rec_offs_validate(rec, NULL, offsets));
- ut_ad(!rec_offs_comp(offsets) == !page_rec_is_comp(rec));
-
- rec_offset = page_offset(rec);
-
- if (UNIV_UNLIKELY(rec_offset == PAGE_NEW_INFIMUM)
- || UNIV_UNLIKELY(rec_offset == PAGE_OLD_INFIMUM)) {
- return(1);
- }
- if (UNIV_UNLIKELY(rec_offset == PAGE_NEW_SUPREMUM)
- || UNIV_UNLIKELY(rec_offset == PAGE_OLD_SUPREMUM)) {
- return(-1);
- }
-
- return(cmp_dtuple_rec_with_match(dtuple, rec, offsets,
- matched_fields,
- matched_bytes));
-}
-
-/*****************************************************************
-Gets the number of user records on page (infimum and supremum records
-are not user records). */
-UNIV_INLINE
-ulint
-page_get_n_recs(
-/*============*/
- /* out: number of user records */
- page_t* page) /* in: index page */
-{
- return(page_header_get_field(page, PAGE_N_RECS));
-}
-
-/*****************************************************************
-Gets the number of dir slots in directory. */
-UNIV_INLINE
-ulint
-page_dir_get_n_slots(
-/*=================*/
- /* out: number of slots */
- page_t* page) /* in: index page */
-{
- return(page_header_get_field(page, PAGE_N_DIR_SLOTS));
-}
-/*****************************************************************
-Sets the number of dir slots in directory. */
-UNIV_INLINE
-void
-page_dir_set_n_slots(
-/*=================*/
- /* out: number of slots */
- page_t* page, /* in: index page */
- ulint n_slots)/* in: number of slots */
-{
- page_header_set_field(page, PAGE_N_DIR_SLOTS, n_slots);
-}
-
-/*****************************************************************
-Gets the number of records in the heap. */
-UNIV_INLINE
-ulint
-page_dir_get_n_heap(
-/*================*/
- /* out: number of user records */
- page_t* page) /* in: index page */
-{
- return(page_header_get_field(page, PAGE_N_HEAP) & 0x7fff);
-}
-
-/*****************************************************************
-Sets the number of records in the heap. */
-UNIV_INLINE
-void
-page_dir_set_n_heap(
-/*================*/
- page_t* page, /* in: index page */
- ulint n_heap) /* in: number of records */
-{
- ut_ad(n_heap < 0x8000);
-
- page_header_set_field(page, PAGE_N_HEAP, n_heap
- | (0x8000
- & page_header_get_field(page, PAGE_N_HEAP)));
-}
-
-/*****************************************************************
-Gets pointer to nth directory slot. */
-UNIV_INLINE
-page_dir_slot_t*
-page_dir_get_nth_slot(
-/*==================*/
- /* out: pointer to dir slot */
- page_t* page, /* in: index page */
- ulint n) /* in: position */
-{
- ut_ad(page_dir_get_n_slots(page) > n);
-
- return(page + UNIV_PAGE_SIZE - PAGE_DIR
- - (n + 1) * PAGE_DIR_SLOT_SIZE);
-}
-
-/******************************************************************
-Used to check the consistency of a record on a page. */
-UNIV_INLINE
-ibool
-page_rec_check(
-/*===========*/
- /* out: TRUE if succeed */
- rec_t* rec) /* in: record */
-{
- page_t* page;
-
- ut_a(rec);
-
- page = buf_frame_align(rec);
-
- ut_a(rec <= page_header_get_ptr(page, PAGE_HEAP_TOP));
- ut_a(rec >= page + PAGE_DATA);
-
- return(TRUE);
-}
-
-/*******************************************************************
-Gets the record pointed to by a directory slot. */
-UNIV_INLINE
-rec_t*
-page_dir_slot_get_rec(
-/*==================*/
- /* out: pointer to record */
- page_dir_slot_t* slot) /* in: directory slot */
-{
- return(buf_frame_align(slot) + mach_read_from_2(slot));
-}
-
-/*******************************************************************
-This is used to set the record offset in a directory slot. */
-UNIV_INLINE
-void
-page_dir_slot_set_rec(
-/*==================*/
- page_dir_slot_t* slot, /* in: directory slot */
- rec_t* rec) /* in: record on the page */
-{
- ut_ad(page_rec_check(rec));
-
- mach_write_to_2(slot, page_offset(rec));
-}
-
-/*******************************************************************
-Gets the number of records owned by a directory slot. */
-UNIV_INLINE
-ulint
-page_dir_slot_get_n_owned(
-/*======================*/
- /* out: number of records */
- page_dir_slot_t* slot) /* in: page directory slot */
-{
- rec_t* rec = page_dir_slot_get_rec(slot);
- return(rec_get_n_owned(rec, page_rec_is_comp(rec)));
-}
-
-/*******************************************************************
-This is used to set the owned records field of a directory slot. */
-UNIV_INLINE
-void
-page_dir_slot_set_n_owned(
-/*======================*/
- page_dir_slot_t* slot, /* in: directory slot */
- ulint n) /* in: number of records owned
- by the slot */
-{
- rec_t* rec = page_dir_slot_get_rec(slot);
- rec_set_n_owned(rec, page_rec_is_comp(rec), n);
-}
-
-/****************************************************************
-Calculates the space reserved for directory slots of a given number of
-records. The exact value is a fraction number n * PAGE_DIR_SLOT_SIZE /
-PAGE_DIR_SLOT_MIN_N_OWNED, and it is rounded upwards to an integer. */
-UNIV_INLINE
-ulint
-page_dir_calc_reserved_space(
-/*=========================*/
- ulint n_recs) /* in: number of records */
-{
- return((PAGE_DIR_SLOT_SIZE * n_recs + PAGE_DIR_SLOT_MIN_N_OWNED - 1)
- / PAGE_DIR_SLOT_MIN_N_OWNED);
-}
-
-/****************************************************************
-Gets the pointer to the next record on the page. */
-UNIV_INLINE
-rec_t*
-page_rec_get_next(
-/*==============*/
- /* out: pointer to next record */
- rec_t* rec) /* in: pointer to record */
-{
- ulint offs;
- page_t* page;
-
- ut_ad(page_rec_check(rec));
-
- page = page_align(rec);
-
- offs = rec_get_next_offs(rec, page_is_comp(page));
-
- if (UNIV_UNLIKELY(offs >= UNIV_PAGE_SIZE)) {
- fprintf(stderr,
- "InnoDB: Next record offset is nonsensical %lu"
- " in record at offset %lu\n"
- "InnoDB: rec address %p, first buffer frame %p\n"
- "InnoDB: buffer pool high end %p, buf fix count %lu\n",
- (ulong)offs, (ulong)(rec - page),
- (void*) rec, (void*) buf_pool->frame_zero,
- (void*) buf_pool->high_end,
- (ulong) buf_block_align(rec)->buf_fix_count);
- buf_page_print(page);
-
- ut_error;
- }
-
- if (UNIV_UNLIKELY(offs == 0)) {
-
- return(NULL);
- }
-
- return(page + offs);
-}
-
-/****************************************************************
-Sets the pointer to the next record on the page. */
-UNIV_INLINE
-void
-page_rec_set_next(
-/*==============*/
- rec_t* rec, /* in: pointer to record, must not be page supremum */
- rec_t* next) /* in: pointer to next record, must not be page
- infimum */
-{
- page_t* page;
- ulint offs;
-
- ut_ad(page_rec_check(rec));
- ut_ad(!page_rec_is_supremum(rec));
- page = page_align(rec);
-
- if (next) {
- ut_ad(!page_rec_is_infimum(next));
- ut_ad(page == page_align(next));
- offs = (ulint) (next - page);
- } else {
- offs = 0;
- }
-
- rec_set_next_offs(rec, page_is_comp(page), offs);
-}
-
-/****************************************************************
-Gets the pointer to the previous record. */
-UNIV_INLINE
-rec_t*
-page_rec_get_prev(
-/*==============*/
- /* out: pointer to previous record */
- rec_t* rec) /* in: pointer to record, must not be page
- infimum */
-{
- page_dir_slot_t* slot;
- ulint slot_no;
- rec_t* rec2;
- rec_t* prev_rec = NULL;
- page_t* page;
-
- ut_ad(page_rec_check(rec));
-
- page = page_align(rec);
-
- ut_ad(!page_rec_is_infimum(rec));
-
- slot_no = page_dir_find_owner_slot(rec);
-
- ut_a(slot_no != 0);
-
- slot = page_dir_get_nth_slot(page, slot_no - 1);
-
- rec2 = page_dir_slot_get_rec(slot);
-
- while (rec != rec2) {
- prev_rec = rec2;
- rec2 = page_rec_get_next(rec2);
- }
-
- ut_a(prev_rec);
-
- return(prev_rec);
-}
-
-/*******************************************************************
-Looks for the record which owns the given record. */
-UNIV_INLINE
-rec_t*
-page_rec_find_owner_rec(
-/*====================*/
- /* out: the owner record */
- rec_t* rec) /* in: the physical record */
-{
- ut_ad(page_rec_check(rec));
-
- if (page_rec_is_comp(rec)) {
- while (rec_get_n_owned(rec, TRUE) == 0) {
- rec = page_rec_get_next(rec);
- }
- } else {
- while (rec_get_n_owned(rec, FALSE) == 0) {
- rec = page_rec_get_next(rec);
- }
- }
-
- return(rec);
-}
-
-/****************************************************************
-Returns the sum of the sizes of the records in the record list, excluding
-the infimum and supremum records. */
-UNIV_INLINE
-ulint
-page_get_data_size(
-/*===============*/
- /* out: data in bytes */
- page_t* page) /* in: index page */
-{
- ulint ret;
-
- ret = (ulint)(page_header_get_field(page, PAGE_HEAP_TOP)
- - (page_is_comp(page)
- ? PAGE_NEW_SUPREMUM_END
- : PAGE_OLD_SUPREMUM_END)
- - page_header_get_field(page, PAGE_GARBAGE));
-
- ut_ad(ret < UNIV_PAGE_SIZE);
-
- return(ret);
-}
-
-/*****************************************************************
-Calculates free space if a page is emptied. */
-UNIV_INLINE
-ulint
-page_get_free_space_of_empty(
-/*=========================*/
- /* out: free space */
- ulint comp) /* in: nonzero=compact page layout */
-{
- if (UNIV_LIKELY(comp)) {
- return((ulint)(UNIV_PAGE_SIZE
- - PAGE_NEW_SUPREMUM_END
- - PAGE_DIR
- - 2 * PAGE_DIR_SLOT_SIZE));
- }
-
- return((ulint)(UNIV_PAGE_SIZE
- - PAGE_OLD_SUPREMUM_END
- - PAGE_DIR
- - 2 * PAGE_DIR_SLOT_SIZE));
-}
-
-/****************************************************************
-Each user record on a page, and also the deleted user records in the heap
-takes its size plus the fraction of the dir cell size /
-PAGE_DIR_SLOT_MIN_N_OWNED bytes for it. If the sum of these exceeds the
-value of page_get_free_space_of_empty, the insert is impossible, otherwise
-it is allowed. This function returns the maximum combined size of records
-which can be inserted on top of the record heap. */
-UNIV_INLINE
-ulint
-page_get_max_insert_size(
-/*=====================*/
- /* out: maximum combined size for inserted records */
- page_t* page, /* in: index page */
- ulint n_recs) /* in: number of records */
-{
- ulint occupied;
- ulint free_space;
-
- if (page_is_comp(page)) {
- occupied = page_header_get_field(page, PAGE_HEAP_TOP)
- - PAGE_NEW_SUPREMUM_END
- + page_dir_calc_reserved_space(
- n_recs + page_dir_get_n_heap(page) - 2);
-
- free_space = page_get_free_space_of_empty(TRUE);
- } else {
- occupied = page_header_get_field(page, PAGE_HEAP_TOP)
- - PAGE_OLD_SUPREMUM_END
- + page_dir_calc_reserved_space(
- n_recs + page_dir_get_n_heap(page) - 2);
-
- free_space = page_get_free_space_of_empty(FALSE);
- }
-
- /* Above the 'n_recs +' part reserves directory space for the new
- inserted records; the '- 2' excludes page infimum and supremum
- records */
-
- if (occupied > free_space) {
-
- return(0);
- }
-
- return(free_space - occupied);
-}
-
-/****************************************************************
-Returns the maximum combined size of records which can be inserted on top
-of the record heap if a page is first reorganized. */
-UNIV_INLINE
-ulint
-page_get_max_insert_size_after_reorganize(
-/*======================================*/
- /* out: maximum combined size for inserted records */
- page_t* page, /* in: index page */
- ulint n_recs) /* in: number of records */
-{
- ulint occupied;
- ulint free_space;
-
- occupied = page_get_data_size(page)
- + page_dir_calc_reserved_space(n_recs + page_get_n_recs(page));
-
- free_space = page_get_free_space_of_empty(page_is_comp(page));
-
- if (occupied > free_space) {
-
- return(0);
- }
-
- return(free_space - occupied);
-}
-
-/****************************************************************
-Puts a record to free list. */
-UNIV_INLINE
-void
-page_mem_free(
-/*==========*/
- page_t* page, /* in: index page */
- rec_t* rec, /* in: pointer to the (origin of) record */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
-{
- rec_t* free;
- ulint garbage;
-
- ut_ad(rec_offs_validate(rec, NULL, offsets));
- ut_ad(!rec_offs_comp(offsets) == !page_rec_is_comp(rec));
- free = page_header_get_ptr(page, PAGE_FREE);
-
- page_rec_set_next(rec, free);
- page_header_set_ptr(page, PAGE_FREE, rec);
-
-#if 0 /* It's better not to destroy the user's data. */
-
- /* Clear the data bytes of the deleted record in order to improve
- the compression ratio of the page and to make it easier to read
- page dumps in corruption reports. The extra bytes of the record
- cannot be cleared, because page_mem_alloc() needs them in order
- to determine the size of the deleted record. */
- memset(rec, 0, rec_offs_data_size(offsets));
-#endif
-
- garbage = page_header_get_field(page, PAGE_GARBAGE);
-
- page_header_set_field(page, PAGE_GARBAGE,
- garbage + rec_offs_size(offsets));
-}
-
-#ifdef UNIV_MATERIALIZE
-#undef UNIV_INLINE
-#define UNIV_INLINE UNIV_INLINE_ORIGINAL
-#endif
diff --git a/storage/innobase/include/page0types.h b/storage/innobase/include/page0types.h
deleted file mode 100644
index 1fbeeb0f60f..00000000000
--- a/storage/innobase/include/page0types.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/******************************************************
-Index page routines
-
-(c) 1994-1996 Innobase Oy
-
-Created 2/2/1994 Heikki Tuuri
-*******************************************************/
-
-#ifndef page0types_h
-#define page0types_h
-
-#include "univ.i"
-
-/* Type of the index page */
-/* The following define eliminates a name collision on HP-UX */
-#define page_t ib_page_t
-typedef byte page_t;
-typedef struct page_search_struct page_search_t;
-typedef struct page_cur_struct page_cur_t;
-
-
-#endif
diff --git a/storage/innobase/include/pars0grm.h b/storage/innobase/include/pars0grm.h
deleted file mode 100644
index 0062b8314ee..00000000000
--- a/storage/innobase/include/pars0grm.h
+++ /dev/null
@@ -1,234 +0,0 @@
-/* A Bison parser, made by GNU Bison 1.875d. */
-
-/* Skeleton parser for Yacc-like parsing with Bison,
- Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
-
-/* As a special exception, when this file is copied by Bison into a
- Bison output file, you may use that output file without restriction.
- This special exception was added by the Free Software Foundation
- in version 1.24 of Bison. */
-
-/* Tokens. */
-#ifndef YYTOKENTYPE
-# define YYTOKENTYPE
- /* Put the tokens into the symbol table, so that GDB and other debuggers
- know about them. */
- enum yytokentype {
- PARS_INT_LIT = 258,
- PARS_FLOAT_LIT = 259,
- PARS_STR_LIT = 260,
- PARS_FIXBINARY_LIT = 261,
- PARS_BLOB_LIT = 262,
- PARS_NULL_LIT = 263,
- PARS_ID_TOKEN = 264,
- PARS_AND_TOKEN = 265,
- PARS_OR_TOKEN = 266,
- PARS_NOT_TOKEN = 267,
- PARS_GE_TOKEN = 268,
- PARS_LE_TOKEN = 269,
- PARS_NE_TOKEN = 270,
- PARS_PROCEDURE_TOKEN = 271,
- PARS_IN_TOKEN = 272,
- PARS_OUT_TOKEN = 273,
- PARS_BINARY_TOKEN = 274,
- PARS_BLOB_TOKEN = 275,
- PARS_INT_TOKEN = 276,
- PARS_INTEGER_TOKEN = 277,
- PARS_FLOAT_TOKEN = 278,
- PARS_CHAR_TOKEN = 279,
- PARS_IS_TOKEN = 280,
- PARS_BEGIN_TOKEN = 281,
- PARS_END_TOKEN = 282,
- PARS_IF_TOKEN = 283,
- PARS_THEN_TOKEN = 284,
- PARS_ELSE_TOKEN = 285,
- PARS_ELSIF_TOKEN = 286,
- PARS_LOOP_TOKEN = 287,
- PARS_WHILE_TOKEN = 288,
- PARS_RETURN_TOKEN = 289,
- PARS_SELECT_TOKEN = 290,
- PARS_SUM_TOKEN = 291,
- PARS_COUNT_TOKEN = 292,
- PARS_DISTINCT_TOKEN = 293,
- PARS_FROM_TOKEN = 294,
- PARS_WHERE_TOKEN = 295,
- PARS_FOR_TOKEN = 296,
- PARS_DDOT_TOKEN = 297,
- PARS_READ_TOKEN = 298,
- PARS_ORDER_TOKEN = 299,
- PARS_BY_TOKEN = 300,
- PARS_ASC_TOKEN = 301,
- PARS_DESC_TOKEN = 302,
- PARS_INSERT_TOKEN = 303,
- PARS_INTO_TOKEN = 304,
- PARS_VALUES_TOKEN = 305,
- PARS_UPDATE_TOKEN = 306,
- PARS_SET_TOKEN = 307,
- PARS_DELETE_TOKEN = 308,
- PARS_CURRENT_TOKEN = 309,
- PARS_OF_TOKEN = 310,
- PARS_CREATE_TOKEN = 311,
- PARS_TABLE_TOKEN = 312,
- PARS_INDEX_TOKEN = 313,
- PARS_UNIQUE_TOKEN = 314,
- PARS_CLUSTERED_TOKEN = 315,
- PARS_DOES_NOT_FIT_IN_MEM_TOKEN = 316,
- PARS_ON_TOKEN = 317,
- PARS_ASSIGN_TOKEN = 318,
- PARS_DECLARE_TOKEN = 319,
- PARS_CURSOR_TOKEN = 320,
- PARS_SQL_TOKEN = 321,
- PARS_OPEN_TOKEN = 322,
- PARS_FETCH_TOKEN = 323,
- PARS_CLOSE_TOKEN = 324,
- PARS_NOTFOUND_TOKEN = 325,
- PARS_TO_CHAR_TOKEN = 326,
- PARS_TO_NUMBER_TOKEN = 327,
- PARS_TO_BINARY_TOKEN = 328,
- PARS_BINARY_TO_NUMBER_TOKEN = 329,
- PARS_SUBSTR_TOKEN = 330,
- PARS_REPLSTR_TOKEN = 331,
- PARS_CONCAT_TOKEN = 332,
- PARS_INSTR_TOKEN = 333,
- PARS_LENGTH_TOKEN = 334,
- PARS_SYSDATE_TOKEN = 335,
- PARS_PRINTF_TOKEN = 336,
- PARS_ASSERT_TOKEN = 337,
- PARS_RND_TOKEN = 338,
- PARS_RND_STR_TOKEN = 339,
- PARS_ROW_PRINTF_TOKEN = 340,
- PARS_COMMIT_TOKEN = 341,
- PARS_ROLLBACK_TOKEN = 342,
- PARS_WORK_TOKEN = 343,
- PARS_UNSIGNED_TOKEN = 344,
- PARS_EXIT_TOKEN = 345,
- PARS_FUNCTION_TOKEN = 346,
- PARS_LOCK_TOKEN = 347,
- PARS_SHARE_TOKEN = 348,
- PARS_MODE_TOKEN = 349,
- NEG = 350
- };
-#endif
-#define PARS_INT_LIT 258
-#define PARS_FLOAT_LIT 259
-#define PARS_STR_LIT 260
-#define PARS_FIXBINARY_LIT 261
-#define PARS_BLOB_LIT 262
-#define PARS_NULL_LIT 263
-#define PARS_ID_TOKEN 264
-#define PARS_AND_TOKEN 265
-#define PARS_OR_TOKEN 266
-#define PARS_NOT_TOKEN 267
-#define PARS_GE_TOKEN 268
-#define PARS_LE_TOKEN 269
-#define PARS_NE_TOKEN 270
-#define PARS_PROCEDURE_TOKEN 271
-#define PARS_IN_TOKEN 272
-#define PARS_OUT_TOKEN 273
-#define PARS_BINARY_TOKEN 274
-#define PARS_BLOB_TOKEN 275
-#define PARS_INT_TOKEN 276
-#define PARS_INTEGER_TOKEN 277
-#define PARS_FLOAT_TOKEN 278
-#define PARS_CHAR_TOKEN 279
-#define PARS_IS_TOKEN 280
-#define PARS_BEGIN_TOKEN 281
-#define PARS_END_TOKEN 282
-#define PARS_IF_TOKEN 283
-#define PARS_THEN_TOKEN 284
-#define PARS_ELSE_TOKEN 285
-#define PARS_ELSIF_TOKEN 286
-#define PARS_LOOP_TOKEN 287
-#define PARS_WHILE_TOKEN 288
-#define PARS_RETURN_TOKEN 289
-#define PARS_SELECT_TOKEN 290
-#define PARS_SUM_TOKEN 291
-#define PARS_COUNT_TOKEN 292
-#define PARS_DISTINCT_TOKEN 293
-#define PARS_FROM_TOKEN 294
-#define PARS_WHERE_TOKEN 295
-#define PARS_FOR_TOKEN 296
-#define PARS_DDOT_TOKEN 297
-#define PARS_READ_TOKEN 298
-#define PARS_ORDER_TOKEN 299
-#define PARS_BY_TOKEN 300
-#define PARS_ASC_TOKEN 301
-#define PARS_DESC_TOKEN 302
-#define PARS_INSERT_TOKEN 303
-#define PARS_INTO_TOKEN 304
-#define PARS_VALUES_TOKEN 305
-#define PARS_UPDATE_TOKEN 306
-#define PARS_SET_TOKEN 307
-#define PARS_DELETE_TOKEN 308
-#define PARS_CURRENT_TOKEN 309
-#define PARS_OF_TOKEN 310
-#define PARS_CREATE_TOKEN 311
-#define PARS_TABLE_TOKEN 312
-#define PARS_INDEX_TOKEN 313
-#define PARS_UNIQUE_TOKEN 314
-#define PARS_CLUSTERED_TOKEN 315
-#define PARS_DOES_NOT_FIT_IN_MEM_TOKEN 316
-#define PARS_ON_TOKEN 317
-#define PARS_ASSIGN_TOKEN 318
-#define PARS_DECLARE_TOKEN 319
-#define PARS_CURSOR_TOKEN 320
-#define PARS_SQL_TOKEN 321
-#define PARS_OPEN_TOKEN 322
-#define PARS_FETCH_TOKEN 323
-#define PARS_CLOSE_TOKEN 324
-#define PARS_NOTFOUND_TOKEN 325
-#define PARS_TO_CHAR_TOKEN 326
-#define PARS_TO_NUMBER_TOKEN 327
-#define PARS_TO_BINARY_TOKEN 328
-#define PARS_BINARY_TO_NUMBER_TOKEN 329
-#define PARS_SUBSTR_TOKEN 330
-#define PARS_REPLSTR_TOKEN 331
-#define PARS_CONCAT_TOKEN 332
-#define PARS_INSTR_TOKEN 333
-#define PARS_LENGTH_TOKEN 334
-#define PARS_SYSDATE_TOKEN 335
-#define PARS_PRINTF_TOKEN 336
-#define PARS_ASSERT_TOKEN 337
-#define PARS_RND_TOKEN 338
-#define PARS_RND_STR_TOKEN 339
-#define PARS_ROW_PRINTF_TOKEN 340
-#define PARS_COMMIT_TOKEN 341
-#define PARS_ROLLBACK_TOKEN 342
-#define PARS_WORK_TOKEN 343
-#define PARS_UNSIGNED_TOKEN 344
-#define PARS_EXIT_TOKEN 345
-#define PARS_FUNCTION_TOKEN 346
-#define PARS_LOCK_TOKEN 347
-#define PARS_SHARE_TOKEN 348
-#define PARS_MODE_TOKEN 349
-#define NEG 350
-
-
-
-
-#if ! defined (YYSTYPE) && ! defined (YYSTYPE_IS_DECLARED)
-typedef int YYSTYPE;
-# define yystype YYSTYPE /* obsolescent; will be withdrawn */
-# define YYSTYPE_IS_DECLARED 1
-# define YYSTYPE_IS_TRIVIAL 1
-#endif
-
-extern YYSTYPE yylval;
-
-
-
diff --git a/storage/innobase/include/pars0opt.h b/storage/innobase/include/pars0opt.h
deleted file mode 100644
index ff92cc062d9..00000000000
--- a/storage/innobase/include/pars0opt.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/******************************************************
-Simple SQL optimizer
-
-(c) 1997 Innobase Oy
-
-Created 12/21/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef pars0opt_h
-#define pars0opt_h
-
-#include "univ.i"
-#include "que0types.h"
-#include "usr0types.h"
-#include "pars0sym.h"
-#include "dict0types.h"
-#include "row0sel.h"
-
-/***********************************************************************
-Optimizes a select. Decides which indexes to tables to use. The tables
-are accessed in the order that they were written to the FROM part in the
-select statement. */
-
-void
-opt_search_plan(
-/*============*/
- sel_node_t* sel_node); /* in: parsed select node */
-/***********************************************************************
-Looks for occurrences of the columns of the table in the query subgraph and
-adds them to the list of columns if an occurrence of the same column does not
-already exist in the list. If the column is already in the list, puts a value
-indirection to point to the occurrence in the column list, except if the
-column occurrence we are looking at is in the column list, in which case
-nothing is done. */
-
-void
-opt_find_all_cols(
-/*==============*/
- ibool copy_val, /* in: if TRUE, new found columns are
- added as columns to copy */
- dict_index_t* index, /* in: index to use */
- sym_node_list_t* col_list, /* in: base node of a list where
- to add new found columns */
- plan_t* plan, /* in: plan or NULL */
- que_node_t* exp); /* in: expression or condition */
-/************************************************************************
-Prints info of a query plan. */
-
-void
-opt_print_query_plan(
-/*=================*/
- sel_node_t* sel_node); /* in: select node */
-
-#ifndef UNIV_NONINL
-#include "pars0opt.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/pars0opt.ic b/storage/innobase/include/pars0opt.ic
deleted file mode 100644
index 0bfa8526bee..00000000000
--- a/storage/innobase/include/pars0opt.ic
+++ /dev/null
@@ -1,7 +0,0 @@
-/******************************************************
-Simple SQL optimizer
-
-(c) 1997 Innobase Oy
-
-Created 12/21/1997 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innobase/include/pars0pars.h b/storage/innobase/include/pars0pars.h
deleted file mode 100644
index 1c6c550d313..00000000000
--- a/storage/innobase/include/pars0pars.h
+++ /dev/null
@@ -1,731 +0,0 @@
-/******************************************************
-SQL parser
-
-(c) 1996 Innobase Oy
-
-Created 11/19/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef pars0pars_h
-#define pars0pars_h
-
-#include "univ.i"
-#include "que0types.h"
-#include "usr0types.h"
-#include "pars0types.h"
-#include "row0types.h"
-#include "trx0types.h"
-#include "ut0vec.h"
-
-/* Type of the user functions. The first argument is always InnoDB-supplied
-and varies in type, while 'user_arg' is a user-supplied argument. The
-meaning of the return type also varies. See the individual use cases, e.g.
-the FETCH statement, for details on them. */
-typedef void* (*pars_user_func_cb_t)(void* arg, void* user_arg);
-
-extern int yydebug;
-
-/* If the following is set TRUE, the lexer will print the SQL string
-as it tokenizes it */
-
-#ifdef UNIV_SQL_DEBUG
-extern ibool pars_print_lexed;
-#endif /* UNIV_SQL_DEBUG */
-
-/* Global variable used while parsing a single procedure or query : the code is
-NOT re-entrant */
-extern sym_tab_t* pars_sym_tab_global;
-
-extern pars_res_word_t pars_to_char_token;
-extern pars_res_word_t pars_to_number_token;
-extern pars_res_word_t pars_to_binary_token;
-extern pars_res_word_t pars_binary_to_number_token;
-extern pars_res_word_t pars_substr_token;
-extern pars_res_word_t pars_replstr_token;
-extern pars_res_word_t pars_concat_token;
-extern pars_res_word_t pars_length_token;
-extern pars_res_word_t pars_instr_token;
-extern pars_res_word_t pars_sysdate_token;
-extern pars_res_word_t pars_printf_token;
-extern pars_res_word_t pars_assert_token;
-extern pars_res_word_t pars_rnd_token;
-extern pars_res_word_t pars_rnd_str_token;
-extern pars_res_word_t pars_count_token;
-extern pars_res_word_t pars_sum_token;
-extern pars_res_word_t pars_distinct_token;
-extern pars_res_word_t pars_binary_token;
-extern pars_res_word_t pars_blob_token;
-extern pars_res_word_t pars_int_token;
-extern pars_res_word_t pars_char_token;
-extern pars_res_word_t pars_float_token;
-extern pars_res_word_t pars_update_token;
-extern pars_res_word_t pars_asc_token;
-extern pars_res_word_t pars_desc_token;
-extern pars_res_word_t pars_open_token;
-extern pars_res_word_t pars_close_token;
-extern pars_res_word_t pars_share_token;
-extern pars_res_word_t pars_unique_token;
-extern pars_res_word_t pars_clustered_token;
-
-extern ulint pars_star_denoter;
-
-/* Procedure parameter types */
-#define PARS_INPUT 0
-#define PARS_OUTPUT 1
-#define PARS_NOT_PARAM 2
-
-int
-yyparse(void);
-
-/*****************************************************************
-Parses an SQL string returning the query graph. */
-
-que_t*
-pars_sql(
-/*=====*/
- /* out, own: the query graph */
- pars_info_t* info, /* in: extra information, or NULL */
- const char* str); /* in: SQL string */
-/*****************************************************************
-Retrieves characters to the lexical analyzer. */
-
-void
-pars_get_lex_chars(
-/*===============*/
- char* buf, /* in/out: buffer where to copy */
- int* result, /* out: number of characters copied or EOF */
- int max_size); /* in: maximum number of characters which fit
- in the buffer */
-/*****************************************************************
-Called by yyparse on error. */
-
-void
-yyerror(
-/*====*/
- const char* s); /* in: error message string */
-/*************************************************************************
-Parses a variable declaration. */
-
-sym_node_t*
-pars_variable_declaration(
-/*======================*/
- /* out, own: symbol table node of type
- SYM_VAR */
- sym_node_t* node, /* in: symbol table node allocated for the
- id of the variable */
- pars_res_word_t* type); /* in: pointer to a type token */
-/*************************************************************************
-Parses a function expression. */
-
-func_node_t*
-pars_func(
-/*======*/
- /* out, own: function node in a query tree */
- que_node_t* res_word,/* in: function name reserved word */
- que_node_t* arg); /* in: first argument in the argument list */
-/*************************************************************************
-Parses an operator expression. */
-
-func_node_t*
-pars_op(
-/*====*/
- /* out, own: function node in a query tree */
- int func, /* in: operator token code */
- que_node_t* arg1, /* in: first argument */
- que_node_t* arg2); /* in: second argument or NULL for an unary
- operator */
-/*************************************************************************
-Parses an ORDER BY clause. Order by a single column only is supported. */
-
-order_node_t*
-pars_order_by(
-/*==========*/
- /* out, own: order-by node in a query tree */
- sym_node_t* column, /* in: column name */
- pars_res_word_t* asc); /* in: &pars_asc_token or pars_desc_token */
-/*************************************************************************
-Parses a select list; creates a query graph node for the whole SELECT
-statement. */
-
-sel_node_t*
-pars_select_list(
-/*=============*/
- /* out, own: select node in a query
- tree */
- que_node_t* select_list, /* in: select list */
- sym_node_t* into_list); /* in: variables list or NULL */
-/*************************************************************************
-Parses a cursor declaration. */
-
-que_node_t*
-pars_cursor_declaration(
-/*====================*/
- /* out: sym_node */
- sym_node_t* sym_node, /* in: cursor id node in the symbol
- table */
- sel_node_t* select_node); /* in: select node */
-/*************************************************************************
-Parses a function declaration. */
-
-que_node_t*
-pars_function_declaration(
-/*======================*/
- /* out: sym_node */
- sym_node_t* sym_node); /* in: function id node in the symbol
- table */
-/*************************************************************************
-Parses a select statement. */
-
-sel_node_t*
-pars_select_statement(
-/*==================*/
- /* out, own: select node in a query
- tree */
- sel_node_t* select_node, /* in: select node already containing
- the select list */
- sym_node_t* table_list, /* in: table list */
- que_node_t* search_cond, /* in: search condition or NULL */
- pars_res_word_t* for_update, /* in: NULL or &pars_update_token */
- pars_res_word_t* consistent_read,/* in: NULL or
- &pars_consistent_token */
- order_node_t* order_by); /* in: NULL or an order-by node */
-/*************************************************************************
-Parses a column assignment in an update. */
-
-col_assign_node_t*
-pars_column_assignment(
-/*===================*/
- /* out: column assignment node */
- sym_node_t* column, /* in: column to assign */
- que_node_t* exp); /* in: value to assign */
-/*************************************************************************
-Parses a delete or update statement start. */
-
-upd_node_t*
-pars_update_statement_start(
-/*========================*/
- /* out, own: update node in a query
- tree */
- ibool is_delete, /* in: TRUE if delete */
- sym_node_t* table_sym, /* in: table name node */
- col_assign_node_t* col_assign_list);/* in: column assignment list, NULL
- if delete */
-/*************************************************************************
-Parses an update or delete statement. */
-
-upd_node_t*
-pars_update_statement(
-/*==================*/
- /* out, own: update node in a query
- tree */
- upd_node_t* node, /* in: update node */
- sym_node_t* cursor_sym, /* in: pointer to a cursor entry in
- the symbol table or NULL */
- que_node_t* search_cond); /* in: search condition or NULL */
-/*************************************************************************
-Parses an insert statement. */
-
-ins_node_t*
-pars_insert_statement(
-/*==================*/
- /* out, own: update node in a query
- tree */
- sym_node_t* table_sym, /* in: table name node */
- que_node_t* values_list, /* in: value expression list or NULL */
- sel_node_t* select); /* in: select condition or NULL */
-/*************************************************************************
-Parses a procedure parameter declaration. */
-
-sym_node_t*
-pars_parameter_declaration(
-/*=======================*/
- /* out, own: symbol table node of type
- SYM_VAR */
- sym_node_t* node, /* in: symbol table node allocated for the
- id of the parameter */
- ulint param_type,
- /* in: PARS_INPUT or PARS_OUTPUT */
- pars_res_word_t* type); /* in: pointer to a type token */
-/*************************************************************************
-Parses an elsif element. */
-
-elsif_node_t*
-pars_elsif_element(
-/*===============*/
- /* out: elsif node */
- que_node_t* cond, /* in: if-condition */
- que_node_t* stat_list); /* in: statement list */
-/*************************************************************************
-Parses an if-statement. */
-
-if_node_t*
-pars_if_statement(
-/*==============*/
- /* out: if-statement node */
- que_node_t* cond, /* in: if-condition */
- que_node_t* stat_list, /* in: statement list */
- que_node_t* else_part); /* in: else-part statement list */
-/*************************************************************************
-Parses a for-loop-statement. */
-
-for_node_t*
-pars_for_statement(
-/*===============*/
- /* out: for-statement node */
- sym_node_t* loop_var, /* in: loop variable */
- que_node_t* loop_start_limit,/* in: loop start expression */
- que_node_t* loop_end_limit, /* in: loop end expression */
- que_node_t* stat_list); /* in: statement list */
-/*************************************************************************
-Parses a while-statement. */
-
-while_node_t*
-pars_while_statement(
-/*=================*/
- /* out: while-statement node */
- que_node_t* cond, /* in: while-condition */
- que_node_t* stat_list); /* in: statement list */
-/*************************************************************************
-Parses an exit statement. */
-
-exit_node_t*
-pars_exit_statement(void);
-/*=====================*/
- /* out: exit statement node */
-/*************************************************************************
-Parses a return-statement. */
-
-return_node_t*
-pars_return_statement(void);
-/*=======================*/
- /* out: return-statement node */
-/*************************************************************************
-Parses a procedure call. */
-
-func_node_t*
-pars_procedure_call(
-/*================*/
- /* out: function node */
- que_node_t* res_word,/* in: procedure name reserved word */
- que_node_t* args); /* in: argument list */
-/*************************************************************************
-Parses an assignment statement. */
-
-assign_node_t*
-pars_assignment_statement(
-/*======================*/
- /* out: assignment statement node */
- sym_node_t* var, /* in: variable to assign */
- que_node_t* val); /* in: value to assign */
-/*************************************************************************
-Parses a fetch statement. into_list or user_func (but not both) must be
-non-NULL. */
-
-fetch_node_t*
-pars_fetch_statement(
-/*=================*/
- /* out: fetch statement node */
- sym_node_t* cursor, /* in: cursor node */
- sym_node_t* into_list, /* in: variables to set, or NULL */
- sym_node_t* user_func); /* in: user function name, or NULL */
-/*************************************************************************
-Parses an open or close cursor statement. */
-
-open_node_t*
-pars_open_statement(
-/*================*/
- /* out: fetch statement node */
- ulint type, /* in: ROW_SEL_OPEN_CURSOR
- or ROW_SEL_CLOSE_CURSOR */
- sym_node_t* cursor); /* in: cursor node */
-/*************************************************************************
-Parses a row_printf-statement. */
-
-row_printf_node_t*
-pars_row_printf_statement(
-/*======================*/
- /* out: row_printf-statement node */
- sel_node_t* sel_node); /* in: select node */
-/*************************************************************************
-Parses a commit statement. */
-
-commit_node_t*
-pars_commit_statement(void);
-/*=======================*/
-/*************************************************************************
-Parses a rollback statement. */
-
-roll_node_t*
-pars_rollback_statement(void);
-/*=========================*/
-/*************************************************************************
-Parses a column definition at a table creation. */
-
-sym_node_t*
-pars_column_def(
-/*============*/
- /* out: column sym table
- node */
- sym_node_t* sym_node, /* in: column node in the
- symbol table */
- pars_res_word_t* type, /* in: data type */
- sym_node_t* len, /* in: length of column, or
- NULL */
- void* is_unsigned, /* in: if not NULL, column
- is of type UNSIGNED. */
- void* is_not_null); /* in: if not NULL, column
- is of type NOT NULL. */
-/*************************************************************************
-Parses a table creation operation. */
-
-tab_node_t*
-pars_create_table(
-/*==============*/
- /* out: table create subgraph */
- sym_node_t* table_sym, /* in: table name node in the symbol
- table */
- sym_node_t* column_defs, /* in: list of column names */
- void* not_fit_in_memory);/* in: a non-NULL pointer means that
- this is a table which in simulations
- should be simulated as not fitting
- in memory; thread is put to sleep
- to simulate disk accesses; NOTE that
- this flag is not stored to the data
- dictionary on disk, and the database
- will forget about non-NULL value if
- it has to reload the table definition
- from disk */
-/*************************************************************************
-Parses an index creation operation. */
-
-ind_node_t*
-pars_create_index(
-/*==============*/
- /* out: index create subgraph */
- pars_res_word_t* unique_def, /* in: not NULL if a unique index */
- pars_res_word_t* clustered_def, /* in: not NULL if a clustered index */
- sym_node_t* index_sym, /* in: index name node in the symbol
- table */
- sym_node_t* table_sym, /* in: table name node in the symbol
- table */
- sym_node_t* column_list); /* in: list of column names */
-/*************************************************************************
-Parses a procedure definition. */
-
-que_fork_t*
-pars_procedure_definition(
-/*======================*/
- /* out: query fork node */
- sym_node_t* sym_node, /* in: procedure id node in the symbol
- table */
- sym_node_t* param_list, /* in: parameter declaration list */
- que_node_t* stat_list); /* in: statement list */
-
-/*****************************************************************
-Parses a stored procedure call, when this is not within another stored
-procedure, that is, the client issues a procedure call directly.
-In MySQL/InnoDB, stored InnoDB procedures are invoked via the
-parsed procedure tree, not via InnoDB SQL, so this function is not used. */
-
-que_fork_t*
-pars_stored_procedure_call(
-/*=======================*/
- /* out: query graph */
- sym_node_t* sym_node); /* in: stored procedure name */
-/**********************************************************************
-Completes a query graph by adding query thread and fork nodes
-above it and prepares the graph for running. The fork created is of
-type QUE_FORK_MYSQL_INTERFACE. */
-
-que_thr_t*
-pars_complete_graph_for_exec(
-/*=========================*/
- /* out: query thread node to run */
- que_node_t* node, /* in: root node for an incomplete
- query graph */
- trx_t* trx, /* in: transaction handle */
- mem_heap_t* heap); /* in: memory heap from which allocated */
-
-/********************************************************************
-Create parser info struct.*/
-
-pars_info_t*
-pars_info_create(void);
-/*==================*/
- /* out, own: info struct */
-
-/********************************************************************
-Free info struct and everything it contains.*/
-
-void
-pars_info_free(
-/*===========*/
- pars_info_t* info); /* in: info struct */
-
-/********************************************************************
-Add bound literal. */
-
-void
-pars_info_add_literal(
-/*==================*/
- pars_info_t* info, /* in: info struct */
- const char* name, /* in: name */
- const void* address, /* in: address */
- ulint length, /* in: length of data */
- ulint type, /* in: type, e.g. DATA_FIXBINARY */
- ulint prtype); /* in: precise type, e.g.
- DATA_UNSIGNED */
-
-/********************************************************************
-Equivalent to pars_info_add_literal(info, name, str, strlen(str),
-DATA_VARCHAR, DATA_ENGLISH). */
-
-void
-pars_info_add_str_literal(
-/*======================*/
- pars_info_t* info, /* in: info struct */
- const char* name, /* in: name */
- const char* str); /* in: string */
-
-/********************************************************************
-Equivalent to:
-
-char buf[4];
-mach_write_to_4(buf, val);
-pars_info_add_literal(info, name, buf, 4, DATA_INT, 0);
-
-except that the buffer is dynamically allocated from the info struct's
-heap. */
-
-void
-pars_info_add_int4_literal(
-/*=======================*/
- pars_info_t* info, /* in: info struct */
- const char* name, /* in: name */
- lint val); /* in: value */
-
-/********************************************************************
-Equivalent to:
-
-char buf[8];
-mach_write_to_8(buf, val);
-pars_info_add_literal(info, name, buf, 8, DATA_BINARY, 0);
-
-except that the buffer is dynamically allocated from the info struct's
-heap. */
-
-void
-pars_info_add_dulint_literal(
-/*=========================*/
- pars_info_t* info, /* in: info struct */
- const char* name, /* in: name */
- dulint val); /* in: value */
-/********************************************************************
-Add user function. */
-
-void
-pars_info_add_function(
-/*===================*/
- pars_info_t* info, /* in: info struct */
- const char* name, /* in: function name */
- pars_user_func_cb_t func, /* in: function address */
- void* arg); /* in: user-supplied argument */
-
-/********************************************************************
-Add bound id. */
-
-void
-pars_info_add_id(
-/*=============*/
- pars_info_t* info, /* in: info struct */
- const char* name, /* in: name */
- const char* id); /* in: id */
-
-/********************************************************************
-Get user function with the given name.*/
-
-pars_user_func_t*
-pars_info_get_user_func(
-/*====================*/
- /* out: user func, or NULL if not
- found */
- pars_info_t* info, /* in: info struct */
- const char* name); /* in: function name to find*/
-
-/********************************************************************
-Get bound literal with the given name.*/
-
-pars_bound_lit_t*
-pars_info_get_bound_lit(
-/*====================*/
- /* out: bound literal, or NULL if
- not found */
- pars_info_t* info, /* in: info struct */
- const char* name); /* in: bound literal name to find */
-
-/********************************************************************
-Get bound id with the given name.*/
-
-pars_bound_id_t*
-pars_info_get_bound_id(
-/*===================*/
- /* out: bound id, or NULL if not
- found */
- pars_info_t* info, /* in: info struct */
- const char* name); /* in: bound id name to find */
-
-
-/* Extra information supplied for pars_sql(). */
-struct pars_info_struct {
- mem_heap_t* heap; /* our own memory heap */
-
- ib_vector_t* funcs; /* user functions, or NUll
- (pars_user_func_t*) */
- ib_vector_t* bound_lits; /* bound literals, or NULL
- (pars_bound_lit_t*) */
- ib_vector_t* bound_ids; /* bound ids, or NULL
- (pars_bound_id_t*) */
-
- ibool graph_owns_us; /* if TRUE (which is the default),
- que_graph_free() will free us */
-};
-
-/* User-supplied function and argument. */
-struct pars_user_func_struct {
- const char* name; /* function name */
- pars_user_func_cb_t func; /* function address */
- void* arg; /* user-supplied argument */
-};
-
-/* Bound literal. */
-struct pars_bound_lit_struct {
- const char* name; /* name */
- const void* address; /* address */
- ulint length; /* length of data */
- ulint type; /* type, e.g. DATA_FIXBINARY */
- ulint prtype; /* precise type, e.g. DATA_UNSIGNED */
-};
-
-/* Bound id. */
-struct pars_bound_id_struct {
- const char* name; /* name */
- const char* id; /* id */
-};
-
-/* Struct used to denote a reserved word in a parsing tree */
-struct pars_res_word_struct{
- int code; /* the token code for the reserved word from
- pars0grm.h */
-};
-
-/* A predefined function or operator node in a parsing tree; this construct
-is also used for some non-functions like the assignment ':=' */
-struct func_node_struct{
- que_common_t common; /* type: QUE_NODE_FUNC */
- int func; /* token code of the function name */
- ulint class; /* class of the function */
- que_node_t* args; /* argument(s) of the function */
- UT_LIST_NODE_T(func_node_t) cond_list;
- /* list of comparison conditions; defined
- only for comparison operator nodes except,
- presently, for OPT_SCROLL_TYPE ones */
- UT_LIST_NODE_T(func_node_t) func_node_list;
- /* list of function nodes in a parsed
- query graph */
-};
-
-/* An order-by node in a select */
-struct order_node_struct{
- que_common_t common; /* type: QUE_NODE_ORDER */
- sym_node_t* column; /* order-by column */
- ibool asc; /* TRUE if ascending, FALSE if descending */
-};
-
-/* Procedure definition node */
-struct proc_node_struct{
- que_common_t common; /* type: QUE_NODE_PROC */
- sym_node_t* proc_id; /* procedure name symbol in the symbol
- table of this same procedure */
- sym_node_t* param_list; /* input and output parameters */
- que_node_t* stat_list; /* statement list */
- sym_tab_t* sym_tab; /* symbol table of this procedure */
-};
-
-/* elsif-element node */
-struct elsif_node_struct{
- que_common_t common; /* type: QUE_NODE_ELSIF */
- que_node_t* cond; /* if condition */
- que_node_t* stat_list; /* statement list */
-};
-
-/* if-statement node */
-struct if_node_struct{
- que_common_t common; /* type: QUE_NODE_IF */
- que_node_t* cond; /* if condition */
- que_node_t* stat_list; /* statement list */
- que_node_t* else_part; /* else-part statement list */
- elsif_node_t* elsif_list; /* elsif element list */
-};
-
-/* while-statement node */
-struct while_node_struct{
- que_common_t common; /* type: QUE_NODE_WHILE */
- que_node_t* cond; /* while condition */
- que_node_t* stat_list; /* statement list */
-};
-
-/* for-loop-statement node */
-struct for_node_struct{
- que_common_t common; /* type: QUE_NODE_FOR */
- sym_node_t* loop_var; /* loop variable: this is the
- dereferenced symbol from the
- variable declarations, not the
- symbol occurrence in the for loop
- definition */
- que_node_t* loop_start_limit;/* initial value of loop variable */
- que_node_t* loop_end_limit; /* end value of loop variable */
- lint loop_end_value; /* evaluated value for the end value:
- it is calculated only when the loop
- is entered, and will not change within
- the loop */
- que_node_t* stat_list; /* statement list */
-};
-
-/* exit statement node */
-struct exit_node_struct{
- que_common_t common; /* type: QUE_NODE_EXIT */
-};
-
-/* return-statement node */
-struct return_node_struct{
- que_common_t common; /* type: QUE_NODE_RETURN */
-};
-
-/* Assignment statement node */
-struct assign_node_struct{
- que_common_t common; /* type: QUE_NODE_ASSIGNMENT */
- sym_node_t* var; /* variable to set */
- que_node_t* val; /* value to assign */
-};
-
-/* Column assignment node */
-struct col_assign_node_struct{
- que_common_t common; /* type: QUE_NODE_COL_ASSIGN */
- sym_node_t* col; /* column to set */
- que_node_t* val; /* value to assign */
-};
-
-/* Classes of functions */
-#define PARS_FUNC_ARITH 1 /* +, -, *, / */
-#define PARS_FUNC_LOGICAL 2
-#define PARS_FUNC_CMP 3
-#define PARS_FUNC_PREDEFINED 4 /* TO_NUMBER, SUBSTR, ... */
-#define PARS_FUNC_AGGREGATE 5 /* COUNT, DISTINCT, SUM */
-#define PARS_FUNC_OTHER 6 /* these are not real functions,
- e.g., := */
-
-#ifndef UNIV_NONINL
-#include "pars0pars.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/pars0pars.ic b/storage/innobase/include/pars0pars.ic
deleted file mode 100644
index 155b6659ace..00000000000
--- a/storage/innobase/include/pars0pars.ic
+++ /dev/null
@@ -1,7 +0,0 @@
-/******************************************************
-SQL parser
-
-(c) 1996 Innobase Oy
-
-Created 11/19/1996 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innobase/include/pars0sym.h b/storage/innobase/include/pars0sym.h
deleted file mode 100644
index fc7df92ff60..00000000000
--- a/storage/innobase/include/pars0sym.h
+++ /dev/null
@@ -1,223 +0,0 @@
-/******************************************************
-SQL parser symbol table
-
-(c) 1997 Innobase Oy
-
-Created 12/15/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef pars0sym_h
-#define pars0sym_h
-
-#include "univ.i"
-#include "que0types.h"
-#include "usr0types.h"
-#include "dict0types.h"
-#include "pars0types.h"
-#include "row0types.h"
-
-/**********************************************************************
-Creates a symbol table for a single stored procedure or query. */
-
-sym_tab_t*
-sym_tab_create(
-/*===========*/
- /* out, own: symbol table */
- mem_heap_t* heap); /* in: memory heap where to create */
-/**********************************************************************
-Frees the memory allocated dynamically AFTER parsing phase for variables
-etc. in the symbol table. Does not free the mem heap where the table was
-originally created. Frees also SQL explicit cursor definitions. */
-
-void
-sym_tab_free_private(
-/*=================*/
- sym_tab_t* sym_tab); /* in, own: symbol table */
-/**********************************************************************
-Adds an integer literal to a symbol table. */
-
-sym_node_t*
-sym_tab_add_int_lit(
-/*================*/
- /* out: symbol table node */
- sym_tab_t* sym_tab, /* in: symbol table */
- ulint val); /* in: integer value */
-/**********************************************************************
-Adds an string literal to a symbol table. */
-
-sym_node_t*
-sym_tab_add_str_lit(
-/*================*/
- /* out: symbol table node */
- sym_tab_t* sym_tab, /* in: symbol table */
- byte* str, /* in: string with no quotes around
- it */
- ulint len); /* in: string length */
-/**********************************************************************
-Add a bound literal to a symbol table. */
-
-sym_node_t*
-sym_tab_add_bound_lit(
-/*==================*/
- /* out: symbol table node */
- sym_tab_t* sym_tab, /* in: symbol table */
- const char* name, /* in: name of bound literal */
- ulint* lit_type); /* out: type of literal (PARS_*_LIT) */
-/**********************************************************************
-Adds an SQL null literal to a symbol table. */
-
-sym_node_t*
-sym_tab_add_null_lit(
-/*=================*/
- /* out: symbol table node */
- sym_tab_t* sym_tab); /* in: symbol table */
-/**********************************************************************
-Adds an identifier to a symbol table. */
-
-sym_node_t*
-sym_tab_add_id(
-/*===========*/
- /* out: symbol table node */
- sym_tab_t* sym_tab, /* in: symbol table */
- byte* name, /* in: identifier name */
- ulint len); /* in: identifier length */
-
-/**********************************************************************
-Add a bound identifier to a symbol table. */
-
-sym_node_t*
-sym_tab_add_bound_id(
-/*===========*/
- /* out: symbol table node */
- sym_tab_t* sym_tab, /* in: symbol table */
- const char* name); /* in: name of bound id */
-
-#define SYM_CLUST_FIELD_NO 0
-#define SYM_SEC_FIELD_NO 1
-
-struct sym_node_struct{
- que_common_t common; /* node type:
- QUE_NODE_SYMBOL */
- /* NOTE: if the data field in 'common.val' is not NULL and the symbol
- table node is not for a temporary column, the memory for the value has
- been allocated from dynamic memory and it should be freed when the
- symbol table is discarded */
-
- /* 'alias' and 'indirection' are almost the same, but not quite.
- 'alias' always points to the primary instance of the variable, while
- 'indirection' does the same only if we should use the primary
- instance's values for the node's data. This is usually the case, but
- when initializing a cursor (e.g., "DECLARE CURSOR c IS SELECT * FROM
- t WHERE id = x;"), we copy the values from the primary instance to
- the cursor's instance so that they are fixed for the duration of the
- cursor, and set 'indirection' to NULL. If we did not, the value of
- 'x' could change between fetches and things would break horribly.
-
- TODO: It would be cleaner to make 'indirection' a boolean field and
- always use 'alias' to refer to the primary node. */
-
- sym_node_t* indirection; /* pointer to
- another symbol table
- node which contains
- the value for this
- node, NULL otherwise */
- sym_node_t* alias; /* pointer to
- another symbol table
- node for which this
- node is an alias,
- NULL otherwise */
- UT_LIST_NODE_T(sym_node_t) col_var_list; /* list of table
- columns or a list of
- input variables for an
- explicit cursor */
- ibool copy_val; /* TRUE if a column
- and its value should
- be copied to dynamic
- memory when fetched */
- ulint field_nos[2]; /* if a column, in
- the position
- SYM_CLUST_FIELD_NO is
- the field number in the
- clustered index; in
- the position
- SYM_SEC_FIELD_NO
- the field number in the
- non-clustered index to
- use first; if not found
- from the index, then
- ULINT_UNDEFINED */
- ibool resolved; /* TRUE if the
- meaning of a variable
- or a column has been
- resolved; for literals
- this is always TRUE */
- ulint token_type; /* SYM_VAR, SYM_COLUMN,
- SYM_IMPLICIT_VAR,
- SYM_LIT, SYM_TABLE,
- SYM_CURSOR, ... */
- const char* name; /* name of an id */
- ulint name_len; /* id name length */
- dict_table_t* table; /* table definition
- if a table id or a
- column id */
- ulint col_no; /* column number if a
- column */
- sel_buf_t* prefetch_buf; /* NULL, or a buffer
- for cached column
- values for prefetched
- rows */
- sel_node_t* cursor_def; /* cursor definition
- select node if a
- named cursor */
- ulint param_type; /* PARS_INPUT,
- PARS_OUTPUT, or
- PARS_NOT_PARAM if not a
- procedure parameter */
- sym_tab_t* sym_table; /* back pointer to
- the symbol table */
- UT_LIST_NODE_T(sym_node_t) sym_list; /* list of symbol
- nodes */
-};
-
-struct sym_tab_struct{
- que_t* query_graph;
- /* query graph generated by the
- parser */
- const char* sql_string;
- /* SQL string to parse */
- size_t string_len;
- /* SQL string length */
- int next_char_pos;
- /* position of the next character in
- sql_string to give to the lexical
- analyzer */
- pars_info_t* info; /* extra information, or NULL */
- sym_node_list_t sym_list;
- /* list of symbol nodes in the symbol
- table */
- UT_LIST_BASE_NODE_T(func_node_t)
- func_node_list;
- /* list of function nodes in the
- parsed query graph */
- mem_heap_t* heap; /* memory heap from which we can
- allocate space */
-};
-
-/* Types of a symbol table entry */
-#define SYM_VAR 91 /* declared parameter or local
- variable of a procedure */
-#define SYM_IMPLICIT_VAR 92 /* storage for a intermediate result
- of a calculation */
-#define SYM_LIT 93 /* literal */
-#define SYM_TABLE 94 /* database table name */
-#define SYM_COLUMN 95 /* database table name */
-#define SYM_CURSOR 96 /* named cursor */
-#define SYM_PROCEDURE_NAME 97 /* stored procedure name */
-#define SYM_INDEX 98 /* database index name */
-#define SYM_FUNCTION 99 /* user function name */
-
-#ifndef UNIV_NONINL
-#include "pars0sym.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/pars0sym.ic b/storage/innobase/include/pars0sym.ic
deleted file mode 100644
index 9508d423769..00000000000
--- a/storage/innobase/include/pars0sym.ic
+++ /dev/null
@@ -1,7 +0,0 @@
-/******************************************************
-SQL parser symbol table
-
-(c) 1997 Innobase Oy
-
-Created 12/15/1997 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innobase/include/pars0types.h b/storage/innobase/include/pars0types.h
deleted file mode 100644
index bf7df89a883..00000000000
--- a/storage/innobase/include/pars0types.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/******************************************************
-SQL parser global types
-
-(c) 1997 Innobase Oy
-
-Created 1/11/1998 Heikki Tuuri
-*******************************************************/
-
-#ifndef pars0types_h
-#define pars0types_h
-
-typedef struct pars_info_struct pars_info_t;
-typedef struct pars_user_func_struct pars_user_func_t;
-typedef struct pars_bound_lit_struct pars_bound_lit_t;
-typedef struct pars_bound_id_struct pars_bound_id_t;
-typedef struct sym_node_struct sym_node_t;
-typedef struct sym_tab_struct sym_tab_t;
-typedef struct pars_res_word_struct pars_res_word_t;
-typedef struct func_node_struct func_node_t;
-typedef struct order_node_struct order_node_t;
-typedef struct proc_node_struct proc_node_t;
-typedef struct elsif_node_struct elsif_node_t;
-typedef struct if_node_struct if_node_t;
-typedef struct while_node_struct while_node_t;
-typedef struct for_node_struct for_node_t;
-typedef struct exit_node_struct exit_node_t;
-typedef struct return_node_struct return_node_t;
-typedef struct assign_node_struct assign_node_t;
-typedef struct col_assign_node_struct col_assign_node_t;
-
-typedef UT_LIST_BASE_NODE_T(sym_node_t) sym_node_list_t;
-
-#endif
diff --git a/storage/innobase/include/que0que.h b/storage/innobase/include/que0que.h
deleted file mode 100644
index 8fbf5330c89..00000000000
--- a/storage/innobase/include/que0que.h
+++ /dev/null
@@ -1,510 +0,0 @@
-/******************************************************
-Query graph
-
-(c) 1996 Innobase Oy
-
-Created 5/27/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef que0que_h
-#define que0que_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "dict0types.h"
-#include "trx0trx.h"
-#include "srv0srv.h"
-#include "usr0types.h"
-#include "que0types.h"
-#include "row0types.h"
-#include "pars0types.h"
-
-/* If the following flag is set TRUE, the module will print trace info
-of SQL execution in the UNIV_SQL_DEBUG version */
-extern ibool que_trace_on;
-
-/***************************************************************************
-Adds a query graph to the session's list of graphs. */
-
-void
-que_graph_publish(
-/*==============*/
- que_t* graph, /* in: graph */
- sess_t* sess); /* in: session */
-/***************************************************************************
-Creates a query graph fork node. */
-
-que_fork_t*
-que_fork_create(
-/*============*/
- /* out, own: fork node */
- que_t* graph, /* in: graph, if NULL then this
- fork node is assumed to be the
- graph root */
- que_node_t* parent, /* in: parent node */
- ulint fork_type, /* in: fork type */
- mem_heap_t* heap); /* in: memory heap where created */
-/***************************************************************************
-Gets the first thr in a fork. */
-UNIV_INLINE
-que_thr_t*
-que_fork_get_first_thr(
-/*===================*/
- que_fork_t* fork); /* in: query fork */
-/***************************************************************************
-Gets the child node of the first thr in a fork. */
-UNIV_INLINE
-que_node_t*
-que_fork_get_child(
-/*===============*/
- que_fork_t* fork); /* in: query fork */
-/***************************************************************************
-Sets the parent of a graph node. */
-UNIV_INLINE
-void
-que_node_set_parent(
-/*================*/
- que_node_t* node, /* in: graph node */
- que_node_t* parent);/* in: parent */
-/***************************************************************************
-Creates a query graph thread node. */
-
-que_thr_t*
-que_thr_create(
-/*===========*/
- /* out, own: query thread node */
- que_fork_t* parent, /* in: parent node, i.e., a fork node */
- mem_heap_t* heap); /* in: memory heap where created */
-/**************************************************************************
-Checks if the query graph is in a state where it should be freed, and
-frees it in that case. If the session is in a state where it should be
-closed, also this is done. */
-
-ibool
-que_graph_try_free(
-/*===============*/
- /* out: TRUE if freed */
- que_t* graph); /* in: query graph */
-/**************************************************************************
-Frees a query graph, but not the heap where it was created. Does not free
-explicit cursor declarations, they are freed in que_graph_free. */
-
-void
-que_graph_free_recursive(
-/*=====================*/
- que_node_t* node); /* in: query graph node */
-/**************************************************************************
-Frees a query graph. */
-
-void
-que_graph_free(
-/*===========*/
- que_t* graph); /* in: query graph; we assume that the memory
- heap where this graph was created is private
- to this graph: if not, then use
- que_graph_free_recursive and free the heap
- afterwards! */
-/**************************************************************************
-Stops a query thread if graph or trx is in a state requiring it. The
-conditions are tested in the order (1) graph, (2) trx. The kernel mutex has
-to be reserved. */
-
-ibool
-que_thr_stop(
-/*=========*/
- /* out: TRUE if stopped */
- que_thr_t* thr); /* in: query thread */
-/**************************************************************************
-Moves a thread from another state to the QUE_THR_RUNNING state. Increments
-the n_active_thrs counters of the query graph and transaction. */
-
-void
-que_thr_move_to_run_state_for_mysql(
-/*================================*/
- que_thr_t* thr, /* in: an query thread */
- trx_t* trx); /* in: transaction */
-/**************************************************************************
-A patch for MySQL used to 'stop' a dummy query thread used in MySQL
-select, when there is no error or lock wait. */
-
-void
-que_thr_stop_for_mysql_no_error(
-/*============================*/
- que_thr_t* thr, /* in: query thread */
- trx_t* trx); /* in: transaction */
-/**************************************************************************
-A patch for MySQL used to 'stop' a dummy query thread used in MySQL. The
-query thread is stopped and made inactive, except in the case where
-it was put to the lock wait state in lock0lock.c, but the lock has already
-been granted or the transaction chosen as a victim in deadlock resolution. */
-
-void
-que_thr_stop_for_mysql(
-/*===================*/
- que_thr_t* thr); /* in: query thread */
-/**************************************************************************
-Run a query thread. Handles lock waits. */
-
-void
-que_run_threads(
-/*============*/
- que_thr_t* thr); /* in: query thread */
-/**************************************************************************
-After signal handling is finished, returns control to a query graph error
-handling routine. (Currently, just returns the control to the root of the
-graph so that the graph can communicate an error message to the client.) */
-
-void
-que_fork_error_handle(
-/*==================*/
- trx_t* trx, /* in: trx */
- que_t* fork); /* in: query graph which was run before signal
- handling started, NULL not allowed */
-/**************************************************************************
-Moves a suspended query thread to the QUE_THR_RUNNING state and releases
-a single worker thread to execute it. This function should be used to end
-the wait state of a query thread waiting for a lock or a stored procedure
-completion. */
-
-void
-que_thr_end_wait(
-/*=============*/
- que_thr_t* thr, /* in: query thread in the
- QUE_THR_LOCK_WAIT,
- or QUE_THR_PROCEDURE_WAIT, or
- QUE_THR_SIG_REPLY_WAIT state */
- que_thr_t** next_thr); /* in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread */
-/**************************************************************************
-Same as que_thr_end_wait, but no parameter next_thr available. */
-
-void
-que_thr_end_wait_no_next_thr(
-/*=========================*/
- que_thr_t* thr); /* in: query thread in the
- QUE_THR_LOCK_WAIT,
- or QUE_THR_PROCEDURE_WAIT, or
- QUE_THR_SIG_REPLY_WAIT state */
-/**************************************************************************
-Starts execution of a command in a query fork. Picks a query thread which
-is not in the QUE_THR_RUNNING state and moves it to that state. If none
-can be chosen, a situation which may arise in parallelized fetches, NULL
-is returned. */
-
-que_thr_t*
-que_fork_start_command(
-/*===================*/
- /* out: a query thread of the graph moved to
- QUE_THR_RUNNING state, or NULL; the query
- thread should be executed by que_run_threads
- by the caller */
- que_fork_t* fork); /* in: a query fork */
-/***************************************************************************
-Gets the trx of a query thread. */
-UNIV_INLINE
-trx_t*
-thr_get_trx(
-/*========*/
- que_thr_t* thr); /* in: query thread */
-/***************************************************************************
-Gets the type of a graph node. */
-UNIV_INLINE
-ulint
-que_node_get_type(
-/*==============*/
- que_node_t* node); /* in: graph node */
-/***************************************************************************
-Gets pointer to the value data type field of a graph node. */
-UNIV_INLINE
-dtype_t*
-que_node_get_data_type(
-/*===================*/
- que_node_t* node); /* in: graph node */
-/***************************************************************************
-Gets pointer to the value dfield of a graph node. */
-UNIV_INLINE
-dfield_t*
-que_node_get_val(
-/*=============*/
- que_node_t* node); /* in: graph node */
-/***************************************************************************
-Gets the value buffer size of a graph node. */
-UNIV_INLINE
-ulint
-que_node_get_val_buf_size(
-/*======================*/
- /* out: val buffer size, not defined if
- val.data == NULL in node */
- que_node_t* node); /* in: graph node */
-/***************************************************************************
-Sets the value buffer size of a graph node. */
-UNIV_INLINE
-void
-que_node_set_val_buf_size(
-/*======================*/
- que_node_t* node, /* in: graph node */
- ulint size); /* in: size */
-/*************************************************************************
-Gets the next list node in a list of query graph nodes. */
-UNIV_INLINE
-que_node_t*
-que_node_get_next(
-/*==============*/
- que_node_t* node); /* in: node in a list */
-/*************************************************************************
-Gets the parent node of a query graph node. */
-UNIV_INLINE
-que_node_t*
-que_node_get_parent(
-/*================*/
- /* out: parent node or NULL */
- que_node_t* node); /* in: node */
-/********************************************************************
-Get the first containing loop node (e.g. while_node_t or for_node_t) for the
-given node, or NULL if the node is not within a loop. */
-
-que_node_t*
-que_node_get_containing_loop_node(
-/*==============================*/
- /* out: containing loop node, or NULL. */
- que_node_t* node); /* in: node */
-/*************************************************************************
-Catenates a query graph node to a list of them, possible empty list. */
-UNIV_INLINE
-que_node_t*
-que_node_list_add_last(
-/*===================*/
- /* out: one-way list of nodes */
- que_node_t* node_list, /* in: node list, or NULL */
- que_node_t* node); /* in: node */
-/*************************************************************************
-Gets a query graph node list length. */
-UNIV_INLINE
-ulint
-que_node_list_get_len(
-/*==================*/
- /* out: length, for NULL list 0 */
- que_node_t* node_list); /* in: node list, or NULL */
-/**************************************************************************
-Checks if graph, trx, or session is in a state where the query thread should
-be stopped. */
-UNIV_INLINE
-ibool
-que_thr_peek_stop(
-/*==============*/
- /* out: TRUE if should be stopped; NOTE that
- if the peek is made without reserving the
- kernel mutex, then another peek with the
- mutex reserved is necessary before deciding
- the actual stopping */
- que_thr_t* thr); /* in: query thread */
-/***************************************************************************
-Returns TRUE if the query graph is for a SELECT statement. */
-UNIV_INLINE
-ibool
-que_graph_is_select(
-/*================*/
- /* out: TRUE if a select */
- que_t* graph); /* in: graph */
-/**************************************************************************
-Prints info of an SQL query graph node. */
-
-void
-que_node_print_info(
-/*================*/
- que_node_t* node); /* in: query graph node */
-/*************************************************************************
-Evaluate the given SQL */
-
-ulint
-que_eval_sql(
-/*=========*/
- /* out: error code or DB_SUCCESS */
- pars_info_t* info, /* in: info struct, or NULL */
- const char* sql, /* in: SQL string */
- ibool reserve_dict_mutex,
- /* in: if TRUE, acquire/release
- dict_sys->mutex around call to pars_sql. */
- trx_t* trx); /* in: trx */
-
-/* Query graph query thread node: the fields are protected by the kernel
-mutex with the exceptions named below */
-
-struct que_thr_struct{
- que_common_t common; /* type: QUE_NODE_THR */
- ulint magic_n; /* magic number to catch memory
- corruption */
- que_node_t* child; /* graph child node */
- que_t* graph; /* graph where this node belongs */
- ibool is_active; /* TRUE if the thread has been set
- to the run state in
- que_thr_move_to_run_state, but not
- deactivated in
- que_thr_dec_reference_count */
- ulint state; /* state of the query thread */
- UT_LIST_NODE_T(que_thr_t)
- thrs; /* list of thread nodes of the fork
- node */
- UT_LIST_NODE_T(que_thr_t)
- trx_thrs; /* lists of threads in wait list of
- the trx */
- UT_LIST_NODE_T(que_thr_t)
- queue; /* list of runnable thread nodes in
- the server task queue */
- /*------------------------------*/
- /* The following fields are private to the OS thread executing the
- query thread, and are not protected by the kernel mutex: */
-
- que_node_t* run_node; /* pointer to the node where the
- subgraph down from this node is
- currently executed */
- que_node_t* prev_node; /* pointer to the node from which
- the control came */
- ulint resource; /* resource usage of the query thread
- thus far */
- ulint lock_state; /* lock state of thread (table or
- row) */
-};
-
-#define QUE_THR_MAGIC_N 8476583
-#define QUE_THR_MAGIC_FREED 123461526
-
-/* Query graph fork node: its fields are protected by the kernel mutex */
-struct que_fork_struct{
- que_common_t common; /* type: QUE_NODE_FORK */
- que_t* graph; /* query graph of this node */
- ulint fork_type; /* fork type */
- ulint n_active_thrs; /* if this is the root of a graph, the
- number query threads that have been
- started in que_thr_move_to_run_state
- but for which que_thr_dec_refer_count
- has not yet been called */
- trx_t* trx; /* transaction: this is set only in
- the root node */
- ulint state; /* state of the fork node */
- que_thr_t* caller; /* pointer to a possible calling query
- thread */
- UT_LIST_BASE_NODE_T(que_thr_t)
- thrs; /* list of query threads */
- /*------------------------------*/
- /* The fields in this section are defined only in the root node */
- sym_tab_t* sym_tab; /* symbol table of the query,
- generated by the parser, or NULL
- if the graph was created 'by hand' */
- pars_info_t* info; /* in: info struct, or NULL */
- /* The following cur_... fields are relevant only in a select graph */
-
- ulint cur_end; /* QUE_CUR_NOT_DEFINED, QUE_CUR_START,
- QUE_CUR_END */
- ulint cur_pos; /* if there are n rows in the result
- set, values 0 and n + 1 mean before
- first row, or after last row, depending
- on cur_end; values 1...n mean a row
- index */
- ibool cur_on_row; /* TRUE if cursor is on a row, i.e.,
- it is not before the first row or
- after the last row */
- dulint n_inserts; /* number of rows inserted */
- dulint n_updates; /* number of rows updated */
- dulint n_deletes; /* number of rows deleted */
- sel_node_t* last_sel_node; /* last executed select node, or NULL
- if none */
- UT_LIST_NODE_T(que_fork_t)
- graphs; /* list of query graphs of a session
- or a stored procedure */
- /*------------------------------*/
- mem_heap_t* heap; /* memory heap where the fork was
- created */
-
-};
-
-/* Query fork (or graph) types */
-#define QUE_FORK_SELECT_NON_SCROLL 1 /* forward-only cursor */
-#define QUE_FORK_SELECT_SCROLL 2 /* scrollable cursor */
-#define QUE_FORK_INSERT 3
-#define QUE_FORK_UPDATE 4
-#define QUE_FORK_ROLLBACK 5
- /* This is really the undo graph used in rollback,
- no signal-sending roll_node in this graph */
-#define QUE_FORK_PURGE 6
-#define QUE_FORK_EXECUTE 7
-#define QUE_FORK_PROCEDURE 8
-#define QUE_FORK_PROCEDURE_CALL 9
-#define QUE_FORK_MYSQL_INTERFACE 10
-#define QUE_FORK_RECOVERY 11
-
-/* Query fork (or graph) states */
-#define QUE_FORK_ACTIVE 1
-#define QUE_FORK_COMMAND_WAIT 2
-#define QUE_FORK_INVALID 3
-#define QUE_FORK_BEING_FREED 4
-
-/* Flag which is ORed to control structure statement node types */
-#define QUE_NODE_CONTROL_STAT 1024
-
-/* Query graph node types */
-#define QUE_NODE_LOCK 1
-#define QUE_NODE_INSERT 2
-#define QUE_NODE_UPDATE 4
-#define QUE_NODE_CURSOR 5
-#define QUE_NODE_SELECT 6
-#define QUE_NODE_AGGREGATE 7
-#define QUE_NODE_FORK 8
-#define QUE_NODE_THR 9
-#define QUE_NODE_UNDO 10
-#define QUE_NODE_COMMIT 11
-#define QUE_NODE_ROLLBACK 12
-#define QUE_NODE_PURGE 13
-#define QUE_NODE_CREATE_TABLE 14
-#define QUE_NODE_CREATE_INDEX 15
-#define QUE_NODE_SYMBOL 16
-#define QUE_NODE_RES_WORD 17
-#define QUE_NODE_FUNC 18
-#define QUE_NODE_ORDER 19
-#define QUE_NODE_PROC (20 + QUE_NODE_CONTROL_STAT)
-#define QUE_NODE_IF (21 + QUE_NODE_CONTROL_STAT)
-#define QUE_NODE_WHILE (22 + QUE_NODE_CONTROL_STAT)
-#define QUE_NODE_ASSIGNMENT 23
-#define QUE_NODE_FETCH 24
-#define QUE_NODE_OPEN 25
-#define QUE_NODE_COL_ASSIGNMENT 26
-#define QUE_NODE_FOR (27 + QUE_NODE_CONTROL_STAT)
-#define QUE_NODE_RETURN 28
-#define QUE_NODE_ROW_PRINTF 29
-#define QUE_NODE_ELSIF 30
-#define QUE_NODE_CALL 31
-#define QUE_NODE_EXIT 32
-
-/* Query thread states */
-#define QUE_THR_RUNNING 1
-#define QUE_THR_PROCEDURE_WAIT 2
-#define QUE_THR_COMPLETED 3 /* in selects this means that the
- thread is at the end of its result set
- (or start, in case of a scroll cursor);
- in other statements, this means the
- thread has done its task */
-#define QUE_THR_COMMAND_WAIT 4
-#define QUE_THR_LOCK_WAIT 5
-#define QUE_THR_SIG_REPLY_WAIT 6
-#define QUE_THR_SUSPENDED 7
-#define QUE_THR_ERROR 8
-
-/* Query thread lock states */
-#define QUE_THR_LOCK_NOLOCK 0
-#define QUE_THR_LOCK_ROW 1
-#define QUE_THR_LOCK_TABLE 2
-
-/* From where the cursor position is counted */
-#define QUE_CUR_NOT_DEFINED 1
-#define QUE_CUR_START 2
-#define QUE_CUR_END 3
-
-
-#ifndef UNIV_NONINL
-#include "que0que.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/que0que.ic b/storage/innobase/include/que0que.ic
deleted file mode 100644
index a20108a7820..00000000000
--- a/storage/innobase/include/que0que.ic
+++ /dev/null
@@ -1,259 +0,0 @@
-/******************************************************
-Query graph
-
-(c) 1996 Innobase Oy
-
-Created 5/27/1996 Heikki Tuuri
-*******************************************************/
-
-#include "usr0sess.h"
-
-/***************************************************************************
-Gets the trx of a query thread. */
-UNIV_INLINE
-trx_t*
-thr_get_trx(
-/*========*/
- que_thr_t* thr) /* in: query thread */
-{
- ut_ad(thr);
-
- return(thr->graph->trx);
-}
-
-/***************************************************************************
-Gets the first thr in a fork. */
-UNIV_INLINE
-que_thr_t*
-que_fork_get_first_thr(
-/*===================*/
- que_fork_t* fork) /* in: query fork */
-{
- return(UT_LIST_GET_FIRST(fork->thrs));
-}
-
-/***************************************************************************
-Gets the child node of the first thr in a fork. */
-UNIV_INLINE
-que_node_t*
-que_fork_get_child(
-/*===============*/
- que_fork_t* fork) /* in: query fork */
-{
- que_thr_t* thr;
-
- thr = UT_LIST_GET_FIRST(fork->thrs);
-
- return(thr->child);
-}
-
-/***************************************************************************
-Gets the type of a graph node. */
-UNIV_INLINE
-ulint
-que_node_get_type(
-/*==============*/
- que_node_t* node) /* in: graph node */
-{
- ut_ad(node);
-
- return(((que_common_t*)node)->type);
-}
-
-/***************************************************************************
-Gets pointer to the value dfield of a graph node. */
-UNIV_INLINE
-dfield_t*
-que_node_get_val(
-/*=============*/
- que_node_t* node) /* in: graph node */
-{
- ut_ad(node);
-
- return(&(((que_common_t*)node)->val));
-}
-
-/***************************************************************************
-Gets the value buffer size of a graph node. */
-UNIV_INLINE
-ulint
-que_node_get_val_buf_size(
-/*======================*/
- /* out: val buffer size, not defined if
- val.data == NULL in node */
- que_node_t* node) /* in: graph node */
-{
- ut_ad(node);
-
- return(((que_common_t*)node)->val_buf_size);
-}
-
-/***************************************************************************
-Sets the value buffer size of a graph node. */
-UNIV_INLINE
-void
-que_node_set_val_buf_size(
-/*======================*/
- que_node_t* node, /* in: graph node */
- ulint size) /* in: size */
-{
- ut_ad(node);
-
- ((que_common_t*)node)->val_buf_size = size;
-}
-
-/***************************************************************************
-Sets the parent of a graph node. */
-UNIV_INLINE
-void
-que_node_set_parent(
-/*================*/
- que_node_t* node, /* in: graph node */
- que_node_t* parent) /* in: parent */
-{
- ut_ad(node);
-
- ((que_common_t*)node)->parent = parent;
-}
-
-/***************************************************************************
-Gets pointer to the value data type field of a graph node. */
-UNIV_INLINE
-dtype_t*
-que_node_get_data_type(
-/*===================*/
- que_node_t* node) /* in: graph node */
-{
- ut_ad(node);
-
- return(&(((que_common_t*)node)->val.type));
-}
-
-/*************************************************************************
-Catenates a query graph node to a list of them, possible empty list. */
-UNIV_INLINE
-que_node_t*
-que_node_list_add_last(
-/*===================*/
- /* out: one-way list of nodes */
- que_node_t* node_list, /* in: node list, or NULL */
- que_node_t* node) /* in: node */
-{
- que_common_t* cnode;
- que_common_t* cnode2;
-
- cnode = node;
-
- cnode->brother = NULL;
-
- if (node_list == NULL) {
-
- return(node);
- }
-
- cnode2 = node_list;
-
- while (cnode2->brother != NULL) {
- cnode2 = cnode2->brother;
- }
-
- cnode2->brother = node;
-
- return(node_list);
-}
-
-/*************************************************************************
-Gets the next list node in a list of query graph nodes. */
-UNIV_INLINE
-que_node_t*
-que_node_get_next(
-/*==============*/
- /* out: next node in a list of nodes */
- que_node_t* node) /* in: node in a list */
-{
- return(((que_common_t*)node)->brother);
-}
-
-/*************************************************************************
-Gets a query graph node list length. */
-UNIV_INLINE
-ulint
-que_node_list_get_len(
-/*==================*/
- /* out: length, for NULL list 0 */
- que_node_t* node_list) /* in: node list, or NULL */
-{
- que_common_t* cnode;
- ulint len;
-
- cnode = node_list;
- len = 0;
-
- while (cnode != NULL) {
- len++;
- cnode = cnode->brother;
- }
-
- return(len);
-}
-
-/*************************************************************************
-Gets the parent node of a query graph node. */
-UNIV_INLINE
-que_node_t*
-que_node_get_parent(
-/*================*/
- /* out: parent node or NULL */
- que_node_t* node) /* in: node */
-{
- return(((que_common_t*)node)->parent);
-}
-
-/**************************************************************************
-Checks if graph, trx, or session is in a state where the query thread should
-be stopped. */
-UNIV_INLINE
-ibool
-que_thr_peek_stop(
-/*==============*/
- /* out: TRUE if should be stopped; NOTE that
- if the peek is made without reserving the
- kernel mutex, then another peek with the
- mutex reserved is necessary before deciding
- the actual stopping */
- que_thr_t* thr) /* in: query thread */
-{
- trx_t* trx;
- que_t* graph;
-
- graph = thr->graph;
- trx = graph->trx;
-
- if (graph->state != QUE_FORK_ACTIVE
- || trx->que_state == TRX_QUE_LOCK_WAIT
- || (UT_LIST_GET_LEN(trx->signals) > 0
- && trx->que_state == TRX_QUE_RUNNING)) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/***************************************************************************
-Returns TRUE if the query graph is for a SELECT statement. */
-UNIV_INLINE
-ibool
-que_graph_is_select(
-/*================*/
- /* out: TRUE if a select */
- que_t* graph) /* in: graph */
-{
- if (graph->fork_type == QUE_FORK_SELECT_SCROLL
- || graph->fork_type == QUE_FORK_SELECT_NON_SCROLL) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
diff --git a/storage/innobase/include/que0types.h b/storage/innobase/include/que0types.h
deleted file mode 100644
index 30e3f0a172b..00000000000
--- a/storage/innobase/include/que0types.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/******************************************************
-Query graph global types
-
-(c) 1996 Innobase Oy
-
-Created 5/27/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef que0types_h
-#define que0types_h
-
-#include "data0data.h"
-#include "dict0types.h"
-
-/* Pseudotype for all graph nodes */
-typedef void que_node_t;
-
-typedef struct que_fork_struct que_fork_t;
-
-/* Query graph root is a fork node */
-typedef que_fork_t que_t;
-
-typedef struct que_thr_struct que_thr_t;
-typedef struct que_common_struct que_common_t;
-
-/* Common struct at the beginning of each query graph node; the name of this
-substruct must be 'common' */
-
-struct que_common_struct{
- ulint type; /* query node type */
- que_node_t* parent; /* back pointer to parent node, or NULL */
- que_node_t* brother;/* pointer to a possible brother node */
- dfield_t val; /* evaluated value for an expression */
- ulint val_buf_size;
- /* buffer size for the evaluated value data,
- if the buffer has been allocated dynamically:
- if this field is != 0, and the node is a
- symbol node or a function node, then we
- have to free the data field in val
- explicitly */
-};
-
-#endif
diff --git a/storage/innobase/include/read0read.h b/storage/innobase/include/read0read.h
deleted file mode 100644
index 97b6d7e9dd9..00000000000
--- a/storage/innobase/include/read0read.h
+++ /dev/null
@@ -1,165 +0,0 @@
-/******************************************************
-Cursor read
-
-(c) 1997 Innobase Oy
-
-Created 2/16/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef read0read_h
-#define read0read_h
-
-#include "univ.i"
-
-
-#include "ut0byte.h"
-#include "ut0lst.h"
-#include "trx0trx.h"
-#include "read0types.h"
-
-/*************************************************************************
-Opens a read view where exactly the transactions serialized before this
-point in time are seen in the view. */
-
-read_view_t*
-read_view_open_now(
-/*===============*/
- /* out, own: read view struct */
- dulint cr_trx_id, /* in: trx_id of creating
- transaction, or (0, 0) used in
- purge */
- mem_heap_t* heap); /* in: memory heap from which
- allocated */
-/*************************************************************************
-Makes a copy of the oldest existing read view, or opens a new. The view
-must be closed with ..._close. */
-
-read_view_t*
-read_view_oldest_copy_or_open_new(
-/*==============================*/
- /* out, own: read view struct */
- dulint cr_trx_id, /* in: trx_id of creating
- transaction, or (0, 0) used in
- purge */
- mem_heap_t* heap); /* in: memory heap from which
- allocated */
-/*************************************************************************
-Closes a read view. */
-
-void
-read_view_close(
-/*============*/
- read_view_t* view); /* in: read view */
-/*************************************************************************
-Closes a consistent read view for MySQL. This function is called at an SQL
-statement end if the trx isolation level is <= TRX_ISO_READ_COMMITTED. */
-
-void
-read_view_close_for_mysql(
-/*======================*/
- trx_t* trx); /* in: trx which has a read view */
-/*************************************************************************
-Checks if a read view sees the specified transaction. */
-UNIV_INLINE
-ibool
-read_view_sees_trx_id(
-/*==================*/
- /* out: TRUE if sees */
- read_view_t* view, /* in: read view */
- dulint trx_id);/* in: trx id */
-/*************************************************************************
-Prints a read view to stderr. */
-
-void
-read_view_print(
-/*============*/
- read_view_t* view); /* in: read view */
-/*************************************************************************
-Create a consistent cursor view for mysql to be used in cursors. In this
-consistent read view modifications done by the creating transaction or future
-transactions are not visible. */
-
-cursor_view_t*
-read_cursor_view_create_for_mysql(
-/*==============================*/
- trx_t* cr_trx);/* in: trx where cursor view is created */
-/*************************************************************************
-Close a given consistent cursor view for mysql and restore global read view
-back to a transaction read view. */
-
-void
-read_cursor_view_close_for_mysql(
-/*=============================*/
- trx_t* trx, /* in: trx */
- cursor_view_t* curview); /* in: cursor view to be closed */
-/*************************************************************************
-This function sets a given consistent cursor view to a transaction
-read view if given consistent cursor view is not NULL. Otherwise, function
-restores a global read view to a transaction read view. */
-
-void
-read_cursor_set_for_mysql(
-/*======================*/
- trx_t* trx, /* in: transaction where cursor is set */
- cursor_view_t* curview);/* in: consistent cursor view to be set */
-
-/* Read view lists the trx ids of those transactions for which a consistent
-read should not see the modifications to the database. */
-
-struct read_view_struct{
- ulint type; /* VIEW_NORMAL, VIEW_HIGH_GRANULARITY */
- dulint undo_no; /* (0, 0) or if type is VIEW_HIGH_GRANULARITY
- transaction undo_no when this high-granularity
- consistent read view was created */
- dulint low_limit_no; /* The view does not need to see the undo
- logs for transactions whose transaction number
- is strictly smaller (<) than this value: they
- can be removed in purge if not needed by other
- views */
- dulint low_limit_id; /* The read should not see any transaction
- with trx id >= this value */
- dulint up_limit_id; /* The read should see all trx ids which
- are strictly smaller (<) than this value */
- ulint n_trx_ids; /* Number of cells in the trx_ids array */
- dulint* trx_ids; /* Additional trx ids which the read should
- not see: typically, these are the active
- transactions at the time when the read is
- serialized, except the reading transaction
- itself; the trx ids in this array are in a
- descending order */
- dulint creator_trx_id; /* trx id of creating transaction, or
- (0, 0) used in purge */
- UT_LIST_NODE_T(read_view_t) view_list;
- /* List of read views in trx_sys */
-};
-
-/* Read view types */
-#define VIEW_NORMAL 1 /* Normal consistent read view
- where transaction does not see changes
- made by active transactions except
- creating transaction. */
-#define VIEW_HIGH_GRANULARITY 2 /* High-granularity read view where
- transaction does not see changes
- made by active transactions and own
- changes after a point in time when this
- read view was created. */
-
-/* Implement InnoDB framework to support consistent read views in
-cursors. This struct holds both heap where consistent read view
-is allocated and pointer to a read view. */
-
-struct cursor_view_struct{
- mem_heap_t* heap;
- /* Memory heap for the cursor view */
- read_view_t* read_view;
- /* Consistent read view of the cursor*/
- ulint n_mysql_tables_in_use;
- /* number of Innobase tables used in the
- processing of this cursor */
-};
-
-#ifndef UNIV_NONINL
-#include "read0read.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/read0read.ic b/storage/innobase/include/read0read.ic
deleted file mode 100644
index 3aded1ca07c..00000000000
--- a/storage/innobase/include/read0read.ic
+++ /dev/null
@@ -1,81 +0,0 @@
-/******************************************************
-Cursor read
-
-(c) 1997 Innobase Oy
-
-Created 2/16/1997 Heikki Tuuri
-*******************************************************/
-
-/*************************************************************************
-Gets the nth trx id in a read view. */
-UNIV_INLINE
-dulint
-read_view_get_nth_trx_id(
-/*=====================*/
- /* out: trx id */
- read_view_t* view, /* in: read view */
- ulint n) /* in: position */
-{
- ut_ad(n < view->n_trx_ids);
-
- return(*(view->trx_ids + n));
-}
-
-/*************************************************************************
-Sets the nth trx id in a read view. */
-UNIV_INLINE
-void
-read_view_set_nth_trx_id(
-/*=====================*/
- read_view_t* view, /* in: read view */
- ulint n, /* in: position */
- dulint trx_id) /* in: trx id to set */
-{
- ut_ad(n < view->n_trx_ids);
-
- *(view->trx_ids + n) = trx_id;
-}
-
-/*************************************************************************
-Checks if a read view sees the specified transaction. */
-UNIV_INLINE
-ibool
-read_view_sees_trx_id(
-/*==================*/
- /* out: TRUE if sees */
- read_view_t* view, /* in: read view */
- dulint trx_id) /* in: trx id */
-{
- ulint n_ids;
- int cmp;
- ulint i;
-
- if (ut_dulint_cmp(trx_id, view->up_limit_id) < 0) {
-
- return(TRUE);
- }
-
- if (ut_dulint_cmp(trx_id, view->low_limit_id) >= 0) {
-
- return(FALSE);
- }
-
- /* We go through the trx ids in the array smallest first: this order
- may save CPU time, because if there was a very long running
- transaction in the trx id array, its trx id is looked at first, and
- the first two comparisons may well decide the visibility of trx_id. */
-
- n_ids = view->n_trx_ids;
-
- for (i = 0; i < n_ids; i++) {
-
- cmp = ut_dulint_cmp(
- trx_id,
- read_view_get_nth_trx_id(view, n_ids - i - 1));
- if (cmp <= 0) {
- return(cmp < 0);
- }
- }
-
- return(TRUE);
-}
diff --git a/storage/innobase/include/read0types.h b/storage/innobase/include/read0types.h
deleted file mode 100644
index 7d42728523e..00000000000
--- a/storage/innobase/include/read0types.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/******************************************************
-Cursor read
-
-(c) 1997 Innobase Oy
-
-Created 2/16/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef read0types_h
-#define read0types_h
-
-typedef struct read_view_struct read_view_t;
-typedef struct cursor_view_struct cursor_view_t;
-
-#endif
diff --git a/storage/innobase/include/rem0cmp.h b/storage/innobase/include/rem0cmp.h
deleted file mode 100644
index c6a6e5de4db..00000000000
--- a/storage/innobase/include/rem0cmp.h
+++ /dev/null
@@ -1,173 +0,0 @@
-/***********************************************************************
-Comparison services for records
-
-(c) 1994-2001 Innobase Oy
-
-Created 7/1/1994 Heikki Tuuri
-************************************************************************/
-
-#ifndef rem0cmp_h
-#define rem0cmp_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "data0type.h"
-#include "dict0dict.h"
-#include "rem0rec.h"
-
-/*****************************************************************
-Returns TRUE if two columns are equal for comparison purposes. */
-
-ibool
-cmp_cols_are_equal(
-/*===============*/
- /* out: TRUE if the columns are
- considered equal in comparisons */
- const dict_col_t* col1, /* in: column 1 */
- const dict_col_t* col2, /* in: column 2 */
- ibool check_charsets);
- /* in: whether to check charsets */
-/*****************************************************************
-This function is used to compare two data fields for which we know the
-data type. */
-UNIV_INLINE
-int
-cmp_data_data(
-/*==========*/
- /* out: 1, 0, -1, if data1 is greater, equal,
- less than data2, respectively */
- ulint mtype, /* in: main type */
- ulint prtype, /* in: precise type */
- byte* data1, /* in: data field (== a pointer to a memory
- buffer) */
- ulint len1, /* in: data field length or UNIV_SQL_NULL */
- byte* data2, /* in: data field (== a pointer to a memory
- buffer) */
- ulint len2); /* in: data field length or UNIV_SQL_NULL */
-/*****************************************************************
-This function is used to compare two data fields for which we know the
-data type. */
-
-int
-cmp_data_data_slow(
-/*===============*/
- /* out: 1, 0, -1, if data1 is greater, equal,
- less than data2, respectively */
- ulint mtype, /* in: main type */
- ulint prtype, /* in: precise type */
- byte* data1, /* in: data field (== a pointer to a memory
- buffer) */
- ulint len1, /* in: data field length or UNIV_SQL_NULL */
- byte* data2, /* in: data field (== a pointer to a memory
- buffer) */
- ulint len2); /* in: data field length or UNIV_SQL_NULL */
-/*****************************************************************
-This function is used to compare two dfields where at least the first
-has its data type field set. */
-UNIV_INLINE
-int
-cmp_dfield_dfield(
-/*==============*/
- /* out: 1, 0, -1, if dfield1 is greater, equal,
- less than dfield2, respectively */
- dfield_t* dfield1,/* in: data field; must have type field set */
- dfield_t* dfield2);/* in: data field */
-/*****************************************************************
-This function is used to compare a data tuple to a physical record.
-Only dtuple->n_fields_cmp first fields are taken into account for
-the the data tuple! If we denote by n = n_fields_cmp, then rec must
-have either m >= n fields, or it must differ from dtuple in some of
-the m fields rec has. If rec has an externally stored field we do not
-compare it but return with value 0 if such a comparison should be
-made. */
-
-int
-cmp_dtuple_rec_with_match(
-/*======================*/
- /* out: 1, 0, -1, if dtuple is greater, equal,
- less than rec, respectively, when only the
- common first fields are compared, or
- until the first externally stored field in
- rec */
- dtuple_t* dtuple, /* in: data tuple */
- rec_t* rec, /* in: physical record which differs from
- dtuple in some of the common fields, or which
- has an equal number or more fields than
- dtuple */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint* matched_fields, /* in/out: number of already completely
- matched fields; when function returns,
- contains the value for current comparison */
- ulint* matched_bytes); /* in/out: number of already matched
- bytes within the first field not completely
- matched; when function returns, contains the
- value for current comparison */
-/******************************************************************
-Compares a data tuple to a physical record. */
-
-int
-cmp_dtuple_rec(
-/*===========*/
- /* out: 1, 0, -1, if dtuple is greater, equal,
- less than rec, respectively; see the comments
- for cmp_dtuple_rec_with_match */
- dtuple_t* dtuple, /* in: data tuple */
- rec_t* rec, /* in: physical record */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-/******************************************************************
-Checks if a dtuple is a prefix of a record. The last field in dtuple
-is allowed to be a prefix of the corresponding field in the record. */
-
-ibool
-cmp_dtuple_is_prefix_of_rec(
-/*========================*/
- /* out: TRUE if prefix */
- dtuple_t* dtuple, /* in: data tuple */
- rec_t* rec, /* in: physical record */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-/*****************************************************************
-This function is used to compare two physical records. Only the common
-first fields are compared, and if an externally stored field is
-encountered, then 0 is returned. */
-
-int
-cmp_rec_rec_with_match(
-/*===================*/
- /* out: 1, 0 , -1 if rec1 is greater, equal,
- less, respectively, than rec2; only the common
- first fields are compared */
- rec_t* rec1, /* in: physical record */
- rec_t* rec2, /* in: physical record */
- const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */
- const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */
- dict_index_t* index, /* in: data dictionary index */
- ulint* matched_fields, /* in/out: number of already completely
- matched fields; when the function returns,
- contains the value the for current
- comparison */
- ulint* matched_bytes);/* in/out: number of already matched
- bytes within the first field not completely
- matched; when the function returns, contains
- the value for the current comparison */
-/*****************************************************************
-This function is used to compare two physical records. Only the common
-first fields are compared. */
-UNIV_INLINE
-int
-cmp_rec_rec(
-/*========*/
- /* out: 1, 0 , -1 if rec1 is greater, equal,
- less, respectively, than rec2; only the common
- first fields are compared */
- rec_t* rec1, /* in: physical record */
- rec_t* rec2, /* in: physical record */
- const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */
- const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */
- dict_index_t* index); /* in: data dictionary index */
-
-
-#ifndef UNIV_NONINL
-#include "rem0cmp.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/rem0cmp.ic b/storage/innobase/include/rem0cmp.ic
deleted file mode 100644
index 52dc7ff5dc9..00000000000
--- a/storage/innobase/include/rem0cmp.ic
+++ /dev/null
@@ -1,76 +0,0 @@
-/***********************************************************************
-Comparison services for records
-
-(c) 1994-1996 Innobase Oy
-
-Created 7/1/1994 Heikki Tuuri
-************************************************************************/
-
-/*****************************************************************
-This function is used to compare two data fields for which we know the
-data type. */
-UNIV_INLINE
-int
-cmp_data_data(
-/*==========*/
- /* out: 1, 0, -1, if data1 is greater, equal,
- less than data2, respectively */
- ulint mtype, /* in: main type */
- ulint prtype, /* in: precise type */
- byte* data1, /* in: data field (== a pointer to a memory
- buffer) */
- ulint len1, /* in: data field length or UNIV_SQL_NULL */
- byte* data2, /* in: data field (== a pointer to a memory
- buffer) */
- ulint len2) /* in: data field length or UNIV_SQL_NULL */
-{
- return(cmp_data_data_slow(mtype, prtype, data1, len1, data2, len2));
-}
-
-/*****************************************************************
-This function is used to compare two dfields where at least the first
-has its data type field set. */
-UNIV_INLINE
-int
-cmp_dfield_dfield(
-/*==============*/
- /* out: 1, 0, -1, if dfield1 is greater, equal,
- less than dfield2, respectively */
- dfield_t* dfield1,/* in: data field; must have type field set */
- dfield_t* dfield2)/* in: data field */
-{
- const dtype_t* type;
-
- ut_ad(dfield_check_typed(dfield1));
-
- type = dfield_get_type(dfield1);
-
- return(cmp_data_data(type->mtype, type->prtype,
- dfield_get_data(dfield1),
- dfield_get_len(dfield1),
- dfield_get_data(dfield2),
- dfield_get_len(dfield2)));
-}
-
-/*****************************************************************
-This function is used to compare two physical records. Only the common
-first fields are compared. */
-UNIV_INLINE
-int
-cmp_rec_rec(
-/*========*/
- /* out: 1, 0 , -1 if rec1 is greater, equal,
- less, respectively, than rec2; only the common
- first fields are compared */
- rec_t* rec1, /* in: physical record */
- rec_t* rec2, /* in: physical record */
- const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */
- const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */
- dict_index_t* index) /* in: data dictionary index */
-{
- ulint match_f = 0;
- ulint match_b = 0;
-
- return(cmp_rec_rec_with_match(rec1, rec2, offsets1, offsets2, index,
- &match_f, &match_b));
-}
diff --git a/storage/innobase/include/rem0rec.h b/storage/innobase/include/rem0rec.h
deleted file mode 100644
index abc204bb583..00000000000
--- a/storage/innobase/include/rem0rec.h
+++ /dev/null
@@ -1,582 +0,0 @@
-/************************************************************************
-Record manager
-
-(c) 1994-1996 Innobase Oy
-
-Created 5/30/1994 Heikki Tuuri
-*************************************************************************/
-
-#ifndef rem0rec_h
-#define rem0rec_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "rem0types.h"
-#include "mtr0types.h"
-
-/* Info bit denoting the predefined minimum record: this bit is set
-if and only if the record is the first user record on a non-leaf
-B-tree page that is the leftmost page on its level
-(PAGE_LEVEL is nonzero and FIL_PAGE_PREV is FIL_NULL). */
-#define REC_INFO_MIN_REC_FLAG 0x10UL
-
-/* Number of extra bytes in an old-style record,
-in addition to the data and the offsets */
-#define REC_N_OLD_EXTRA_BYTES 6
-/* Number of extra bytes in a new-style record,
-in addition to the data and the offsets */
-#define REC_N_NEW_EXTRA_BYTES 5
-
-/* Record status values */
-#define REC_STATUS_ORDINARY 0
-#define REC_STATUS_NODE_PTR 1
-#define REC_STATUS_INFIMUM 2
-#define REC_STATUS_SUPREMUM 3
-
-/* Number of elements that should be initially allocated for the
-offsets[] array, first passed to rec_get_offsets() */
-#define REC_OFFS_NORMAL_SIZE 100
-#define REC_OFFS_SMALL_SIZE 10
-
-/**********************************************************
-The following function is used to get the offset of the
-next chained record on the same page. */
-UNIV_INLINE
-ulint
-rec_get_next_offs(
-/*==============*/
- /* out: the page offset of the next
- chained record */
- rec_t* rec, /* in: physical record */
- ulint comp); /* in: nonzero=compact page format */
-/**********************************************************
-The following function is used to set the next record offset field
-of the record. */
-UNIV_INLINE
-void
-rec_set_next_offs(
-/*==============*/
- rec_t* rec, /* in: physical record */
- ulint comp, /* in: nonzero=compact page format */
- ulint next); /* in: offset of the next record */
-/**********************************************************
-The following function is used to get the number of fields
-in an old-style record. */
-UNIV_INLINE
-ulint
-rec_get_n_fields_old(
-/*=================*/
- /* out: number of data fields */
- rec_t* rec); /* in: physical record */
-/**********************************************************
-The following function is used to get the number of fields
-in a record. */
-UNIV_INLINE
-ulint
-rec_get_n_fields(
-/*=============*/
- /* out: number of data fields */
- rec_t* rec, /* in: physical record */
- dict_index_t* index); /* in: record descriptor */
-/**********************************************************
-The following function is used to get the number of records
-owned by the previous directory record. */
-UNIV_INLINE
-ulint
-rec_get_n_owned(
-/*============*/
- /* out: number of owned records */
- rec_t* rec, /* in: physical record */
- ulint comp); /* in: nonzero=compact page format */
-/**********************************************************
-The following function is used to set the number of owned
-records. */
-UNIV_INLINE
-void
-rec_set_n_owned(
-/*============*/
- rec_t* rec, /* in: physical record */
- ulint comp, /* in: nonzero=compact page format */
- ulint n_owned); /* in: the number of owned */
-/**********************************************************
-The following function is used to retrieve the info bits of
-a record. */
-UNIV_INLINE
-ulint
-rec_get_info_bits(
-/*==============*/
- /* out: info bits */
- rec_t* rec, /* in: physical record */
- ulint comp); /* in: nonzero=compact page format */
-/**********************************************************
-The following function is used to set the info bits of a record. */
-UNIV_INLINE
-void
-rec_set_info_bits(
-/*==============*/
- rec_t* rec, /* in: physical record */
- ulint comp, /* in: nonzero=compact page format */
- ulint bits); /* in: info bits */
-/**********************************************************
-The following function retrieves the status bits of a new-style record. */
-UNIV_INLINE
-ulint
-rec_get_status(
-/*===========*/
- /* out: status bits */
- rec_t* rec); /* in: physical record */
-
-/**********************************************************
-The following function is used to set the status bits of a new-style record. */
-UNIV_INLINE
-void
-rec_set_status(
-/*===========*/
- rec_t* rec, /* in: physical record */
- ulint bits); /* in: info bits */
-
-/**********************************************************
-The following function is used to retrieve the info and status
-bits of a record. (Only compact records have status bits.) */
-UNIV_INLINE
-ulint
-rec_get_info_and_status_bits(
-/*=========================*/
- /* out: info bits */
- rec_t* rec, /* in: physical record */
- ulint comp); /* in: nonzero=compact page format */
-/**********************************************************
-The following function is used to set the info and status
-bits of a record. (Only compact records have status bits.) */
-UNIV_INLINE
-void
-rec_set_info_and_status_bits(
-/*=========================*/
- rec_t* rec, /* in: physical record */
- ulint comp, /* in: nonzero=compact page format */
- ulint bits); /* in: info bits */
-
-/**********************************************************
-The following function tells if record is delete marked. */
-UNIV_INLINE
-ulint
-rec_get_deleted_flag(
-/*=================*/
- /* out: nonzero if delete marked */
- rec_t* rec, /* in: physical record */
- ulint comp); /* in: nonzero=compact page format */
-/**********************************************************
-The following function is used to set the deleted bit. */
-UNIV_INLINE
-void
-rec_set_deleted_flag(
-/*=================*/
- rec_t* rec, /* in: physical record */
- ulint comp, /* in: nonzero=compact page format */
- ulint flag); /* in: nonzero if delete marked */
-/**********************************************************
-The following function tells if a new-style record is a node pointer. */
-UNIV_INLINE
-ibool
-rec_get_node_ptr_flag(
-/*==================*/
- /* out: TRUE if node pointer */
- rec_t* rec); /* in: physical record */
-/**********************************************************
-The following function is used to get the order number
-of the record in the heap of the index page. */
-UNIV_INLINE
-ulint
-rec_get_heap_no(
-/*============*/
- /* out: heap order number */
- rec_t* rec, /* in: physical record */
- ulint comp); /* in: nonzero=compact page format */
-/**********************************************************
-The following function is used to set the heap number
-field in the record. */
-UNIV_INLINE
-void
-rec_set_heap_no(
-/*============*/
- rec_t* rec, /* in: physical record */
- ulint comp, /* in: nonzero=compact page format */
- ulint heap_no);/* in: the heap number */
-/**********************************************************
-The following function is used to test whether the data offsets
-in the record are stored in one-byte or two-byte format. */
-UNIV_INLINE
-ibool
-rec_get_1byte_offs_flag(
-/*====================*/
- /* out: TRUE if 1-byte form */
- rec_t* rec); /* in: physical record */
-/**********************************************************
-The following function determines the offsets to each field
-in the record. It can reuse a previously allocated array. */
-
-ulint*
-rec_get_offsets_func(
-/*=================*/
- /* out: the new offsets */
- rec_t* rec, /* in: physical record */
- dict_index_t* index, /* in: record descriptor */
- ulint* offsets,/* in: array consisting of offsets[0]
- allocated elements, or an array from
- rec_get_offsets(), or NULL */
- ulint n_fields,/* in: maximum number of initialized fields
- (ULINT_UNDEFINED if all fields) */
- mem_heap_t** heap, /* in/out: memory heap */
- const char* file, /* in: file name where called */
- ulint line); /* in: line number where called */
-
-#define rec_get_offsets(rec,index,offsets,n,heap) \
- rec_get_offsets_func(rec,index,offsets,n,heap,__FILE__,__LINE__)
-
-/****************************************************************
-Validates offsets returned by rec_get_offsets(). */
-UNIV_INLINE
-ibool
-rec_offs_validate(
-/*==============*/
- /* out: TRUE if valid */
- rec_t* rec, /* in: record or NULL */
- dict_index_t* index, /* in: record descriptor or NULL */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-/****************************************************************
-Updates debug data in offsets, in order to avoid bogus
-rec_offs_validate() failures. */
-UNIV_INLINE
-void
-rec_offs_make_valid(
-/*================*/
- rec_t* rec, /* in: record */
- dict_index_t* index,/* in: record descriptor */
- ulint* offsets);/* in: array returned by rec_get_offsets() */
-
-/****************************************************************
-The following function is used to get a pointer to the nth
-data field in an old-style record. */
-
-byte*
-rec_get_nth_field_old(
-/*==================*/
- /* out: pointer to the field */
- rec_t* rec, /* in: record */
- ulint n, /* in: index of the field */
- ulint* len); /* out: length of the field; UNIV_SQL_NULL
- if SQL null */
-/****************************************************************
-Gets the physical size of an old-style field.
-Also an SQL null may have a field of size > 0,
-if the data type is of a fixed size. */
-UNIV_INLINE
-ulint
-rec_get_nth_field_size(
-/*===================*/
- /* out: field size in bytes */
- rec_t* rec, /* in: record */
- ulint n); /* in: index of the field */
-/****************************************************************
-The following function is used to get a pointer to the nth
-data field in a record. */
-UNIV_INLINE
-byte*
-rec_get_nth_field(
-/*==============*/
- /* out: pointer to the field */
- rec_t* rec, /* in: record */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint n, /* in: index of the field */
- ulint* len); /* out: length of the field; UNIV_SQL_NULL
- if SQL null */
-/**********************************************************
-Determine if the offsets are for a record in the new
-compact format. */
-UNIV_INLINE
-ulint
-rec_offs_comp(
-/*==========*/
- /* out: nonzero if compact format */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-/**********************************************************
-Returns nonzero if the extern bit is set in nth field of rec. */
-UNIV_INLINE
-ulint
-rec_offs_nth_extern(
-/*================*/
- /* out: nonzero if externally stored */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint n); /* in: nth field */
-/**********************************************************
-Returns nonzero if the SQL NULL bit is set in nth field of rec. */
-UNIV_INLINE
-ulint
-rec_offs_nth_sql_null(
-/*==================*/
- /* out: nonzero if SQL NULL */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint n); /* in: nth field */
-/**********************************************************
-Gets the physical size of a field. */
-UNIV_INLINE
-ulint
-rec_offs_nth_size(
-/*==============*/
- /* out: length of field */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint n); /* in: nth field */
-
-/**********************************************************
-Returns TRUE if the extern bit is set in any of the fields
-of rec. */
-UNIV_INLINE
-ibool
-rec_offs_any_extern(
-/*================*/
- /* out: TRUE if a field is stored externally */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-/***************************************************************
-Sets the value of the ith field extern storage bit. */
-UNIV_INLINE
-void
-rec_set_nth_field_extern_bit(
-/*=========================*/
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: record descriptor */
- ulint i, /* in: ith field */
- ibool val, /* in: value to set */
- mtr_t* mtr); /* in: mtr holding an X-latch to the page
- where rec is, or NULL; in the NULL case
- we do not write to log about the change */
-/***************************************************************
-Sets TRUE the extern storage bits of fields mentioned in an array. */
-
-void
-rec_set_field_extern_bits(
-/*======================*/
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: record descriptor */
- const ulint* vec, /* in: array of field numbers */
- ulint n_fields,/* in: number of fields numbers */
- mtr_t* mtr); /* in: mtr holding an X-latch to the page
- where rec is, or NULL; in the NULL case
- we do not write to log about the change */
-/***************************************************************
-This is used to modify the value of an already existing field in a record.
-The previous value must have exactly the same size as the new value. If len
-is UNIV_SQL_NULL then the field is treated as an SQL null.
-For records in ROW_FORMAT=COMPACT (new-style records), len must not be
-UNIV_SQL_NULL unless the field already is SQL null. */
-UNIV_INLINE
-void
-rec_set_nth_field(
-/*==============*/
- rec_t* rec, /* in: record */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint n, /* in: index number of the field */
- const void* data, /* in: pointer to the data if not SQL null */
- ulint len); /* in: length of the data or UNIV_SQL_NULL */
-/**************************************************************
-The following function returns the data size of an old-style physical
-record, that is the sum of field lengths. SQL null fields
-are counted as length 0 fields. The value returned by the function
-is the distance from record origin to record end in bytes. */
-UNIV_INLINE
-ulint
-rec_get_data_size_old(
-/*==================*/
- /* out: size */
- rec_t* rec); /* in: physical record */
-/**************************************************************
-The following function returns the number of fields in a record. */
-UNIV_INLINE
-ulint
-rec_offs_n_fields(
-/*==============*/
- /* out: number of fields */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-/**************************************************************
-The following function returns the data size of a physical
-record, that is the sum of field lengths. SQL null fields
-are counted as length 0 fields. The value returned by the function
-is the distance from record origin to record end in bytes. */
-UNIV_INLINE
-ulint
-rec_offs_data_size(
-/*===============*/
- /* out: size */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-/**************************************************************
-Returns the total size of record minus data size of record.
-The value returned by the function is the distance from record
-start to record origin in bytes. */
-UNIV_INLINE
-ulint
-rec_offs_extra_size(
-/*================*/
- /* out: size */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-/**************************************************************
-Returns the total size of a physical record. */
-UNIV_INLINE
-ulint
-rec_offs_size(
-/*==========*/
- /* out: size */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-/**************************************************************
-Returns a pointer to the start of the record. */
-UNIV_INLINE
-byte*
-rec_get_start(
-/*==========*/
- /* out: pointer to start */
- rec_t* rec, /* in: pointer to record */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-/**************************************************************
-Returns a pointer to the end of the record. */
-UNIV_INLINE
-byte*
-rec_get_end(
-/*========*/
- /* out: pointer to end */
- rec_t* rec, /* in: pointer to record */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-/*******************************************************************
-Copies a physical record to a buffer. */
-UNIV_INLINE
-rec_t*
-rec_copy(
-/*=====*/
- /* out: pointer to the origin of the copy */
- void* buf, /* in: buffer */
- const rec_t* rec, /* in: physical record */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-/******************************************************************
-Copies the first n fields of a physical record to a new physical record in
-a buffer. */
-
-rec_t*
-rec_copy_prefix_to_buf(
-/*===================*/
- /* out, own: copied record */
- rec_t* rec, /* in: physical record */
- dict_index_t* index, /* in: record descriptor */
- ulint n_fields, /* in: number of fields to copy */
- byte** buf, /* in/out: memory buffer
- for the copied prefix, or NULL */
- ulint* buf_size); /* in/out: buffer size */
-/****************************************************************
-Folds a prefix of a physical record to a ulint. */
-UNIV_INLINE
-ulint
-rec_fold(
-/*=====*/
- /* out: the folded value */
- rec_t* rec, /* in: the physical record */
- const ulint* offsets, /* in: array returned by
- rec_get_offsets() */
- ulint n_fields, /* in: number of complete
- fields to fold */
- ulint n_bytes, /* in: number of bytes to fold
- in an incomplete last field */
- dulint tree_id); /* in: index tree id */
-/*************************************************************
-Builds a physical record out of a data tuple and stores it beginning from
-address destination. */
-
-rec_t*
-rec_convert_dtuple_to_rec(
-/*======================*/
- /* out: pointer to the origin
- of physical record */
- byte* buf, /* in: start address of the
- physical record */
- dict_index_t* index, /* in: record descriptor */
- dtuple_t* dtuple);/* in: data tuple */
-/**************************************************************
-Returns the extra size of an old-style physical record if we know its
-data size and number of fields. */
-UNIV_INLINE
-ulint
-rec_get_converted_extra_size(
-/*=========================*/
- /* out: extra size */
- ulint data_size, /* in: data size */
- ulint n_fields) /* in: number of fields */
- __attribute__((const));
-/**************************************************************
-The following function returns the size of a data tuple when converted to
-a physical record. */
-UNIV_INLINE
-ulint
-rec_get_converted_size(
-/*===================*/
- /* out: size */
- dict_index_t* index, /* in: record descriptor */
- dtuple_t* dtuple);/* in: data tuple */
-/******************************************************************
-Copies the first n fields of a physical record to a data tuple.
-The fields are copied to the memory heap. */
-
-void
-rec_copy_prefix_to_dtuple(
-/*======================*/
- dtuple_t* tuple, /* in: data tuple */
- rec_t* rec, /* in: physical record */
- dict_index_t* index, /* in: record descriptor */
- ulint n_fields, /* in: number of fields to copy */
- mem_heap_t* heap); /* in: memory heap */
-/*******************************************************************
-Validates the consistency of a physical record. */
-
-ibool
-rec_validate(
-/*=========*/
- /* out: TRUE if ok */
- rec_t* rec, /* in: physical record */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-/*******************************************************************
-Prints an old-style physical record. */
-
-void
-rec_print_old(
-/*==========*/
- FILE* file, /* in: file where to print */
- rec_t* rec); /* in: physical record */
-/*******************************************************************
-Prints a physical record. */
-
-void
-rec_print_new(
-/*==========*/
- FILE* file, /* in: file where to print */
- rec_t* rec, /* in: physical record */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-/*******************************************************************
-Prints a physical record. */
-
-void
-rec_print(
-/*======*/
- FILE* file, /* in: file where to print */
- rec_t* rec, /* in: physical record */
- dict_index_t* index); /* in: record descriptor */
-
-#define REC_INFO_BITS 6 /* This is single byte bit-field */
-
-/* Maximum lengths for the data in a physical record if the offsets
-are given in one byte (resp. two byte) format. */
-#define REC_1BYTE_OFFS_LIMIT 0x7FUL
-#define REC_2BYTE_OFFS_LIMIT 0x7FFFUL
-
-/* The data size of record must be smaller than this because we reserve
-two upmost bits in a two byte offset for special purposes */
-#define REC_MAX_DATA_SIZE (16 * 1024)
-
-#ifndef UNIV_NONINL
-#include "rem0rec.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/rem0rec.ic b/storage/innobase/include/rem0rec.ic
deleted file mode 100644
index d91fb4c4391..00000000000
--- a/storage/innobase/include/rem0rec.ic
+++ /dev/null
@@ -1,1531 +0,0 @@
-/************************************************************************
-Record manager
-
-(c) 1994-1996 Innobase Oy
-
-Created 5/30/1994 Heikki Tuuri
-*************************************************************************/
-
-#include "mach0data.h"
-#include "ut0byte.h"
-#include "dict0dict.h"
-
-/* Compact flag ORed to the extra size returned by rec_get_offsets() */
-#define REC_OFFS_COMPACT ((ulint) 1 << 31)
-/* SQL NULL flag in offsets returned by rec_get_offsets() */
-#define REC_OFFS_SQL_NULL ((ulint) 1 << 31)
-/* External flag in offsets returned by rec_get_offsets() */
-#define REC_OFFS_EXTERNAL ((ulint) 1 << 30)
-/* Mask for offsets returned by rec_get_offsets() */
-#define REC_OFFS_MASK (REC_OFFS_EXTERNAL - 1)
-
-/* Offsets of the bit-fields in an old-style record. NOTE! In the table the
-most significant bytes and bits are written below less significant.
-
- (1) byte offset (2) bit usage within byte
- downward from
- origin -> 1 8 bits pointer to next record
- 2 8 bits pointer to next record
- 3 1 bit short flag
- 7 bits number of fields
- 4 3 bits number of fields
- 5 bits heap number
- 5 8 bits heap number
- 6 4 bits n_owned
- 4 bits info bits
-*/
-
-/* Offsets of the bit-fields in a new-style record. NOTE! In the table the
-most significant bytes and bits are written below less significant.
-
- (1) byte offset (2) bit usage within byte
- downward from
- origin -> 1 8 bits relative offset of next record
- 2 8 bits relative offset of next record
- the relative offset is an unsigned 16-bit
- integer:
- (offset_of_next_record
- - offset_of_this_record) mod 64Ki,
- where mod is the modulo as a non-negative
- number;
- we can calculate the the offset of the next
- record with the formula:
- relative_offset + offset_of_this_record
- mod UNIV_PAGE_SIZE
- 3 3 bits status:
- 000=conventional record
- 001=node pointer record (inside B-tree)
- 010=infimum record
- 011=supremum record
- 1xx=reserved
- 5 bits heap number
- 4 8 bits heap number
- 5 4 bits n_owned
- 4 bits info bits
-*/
-
-/* We list the byte offsets from the origin of the record, the mask,
-and the shift needed to obtain each bit-field of the record. */
-
-#define REC_NEXT 2
-#define REC_NEXT_MASK 0xFFFFUL
-#define REC_NEXT_SHIFT 0
-
-#define REC_OLD_SHORT 3 /* This is single byte bit-field */
-#define REC_OLD_SHORT_MASK 0x1UL
-#define REC_OLD_SHORT_SHIFT 0
-
-#define REC_OLD_N_FIELDS 4
-#define REC_OLD_N_FIELDS_MASK 0x7FEUL
-#define REC_OLD_N_FIELDS_SHIFT 1
-
-#define REC_NEW_STATUS 3 /* This is single byte bit-field */
-#define REC_NEW_STATUS_MASK 0x7UL
-#define REC_NEW_STATUS_SHIFT 0
-
-#define REC_OLD_HEAP_NO 5
-#define REC_NEW_HEAP_NO 4
-#define REC_HEAP_NO_MASK 0xFFF8UL
-#define REC_HEAP_NO_SHIFT 3
-
-#define REC_OLD_N_OWNED 6 /* This is single byte bit-field */
-#define REC_NEW_N_OWNED 5 /* This is single byte bit-field */
-#define REC_N_OWNED_MASK 0xFUL
-#define REC_N_OWNED_SHIFT 0
-
-#define REC_OLD_INFO_BITS 6 /* This is single byte bit-field */
-#define REC_NEW_INFO_BITS 5 /* This is single byte bit-field */
-#define REC_INFO_BITS_MASK 0xF0UL
-#define REC_INFO_BITS_SHIFT 0
-
-/* The deleted flag in info bits */
-#define REC_INFO_DELETED_FLAG 0x20UL /* when bit is set to 1, it means the
- record has been delete marked */
-/* The following masks are used to filter the SQL null bit from
-one-byte and two-byte offsets */
-
-#define REC_1BYTE_SQL_NULL_MASK 0x80UL
-#define REC_2BYTE_SQL_NULL_MASK 0x8000UL
-
-/* In a 2-byte offset the second most significant bit denotes
-a field stored to another page: */
-
-#define REC_2BYTE_EXTERN_MASK 0x4000UL
-
-#if REC_OLD_SHORT_MASK << (8 * (REC_OLD_SHORT - 3)) \
- ^ REC_OLD_N_FIELDS_MASK << (8 * (REC_OLD_N_FIELDS - 4)) \
- ^ REC_HEAP_NO_MASK << (8 * (REC_OLD_HEAP_NO - 4)) \
- ^ REC_N_OWNED_MASK << (8 * (REC_OLD_N_OWNED - 3)) \
- ^ REC_INFO_BITS_MASK << (8 * (REC_OLD_INFO_BITS - 3)) \
- ^ 0xFFFFFFFFUL
-# error "sum of old-style masks != 0xFFFFFFFFUL"
-#endif
-#if REC_NEW_STATUS_MASK << (8 * (REC_NEW_STATUS - 3)) \
- ^ REC_HEAP_NO_MASK << (8 * (REC_NEW_HEAP_NO - 4)) \
- ^ REC_N_OWNED_MASK << (8 * (REC_NEW_N_OWNED - 3)) \
- ^ REC_INFO_BITS_MASK << (8 * (REC_NEW_INFO_BITS - 3)) \
- ^ 0xFFFFFFUL
-# error "sum of new-style masks != 0xFFFFFFUL"
-#endif
-
-/***************************************************************
-Sets the value of the ith field SQL null bit of an old-style record. */
-
-void
-rec_set_nth_field_null_bit(
-/*=======================*/
- rec_t* rec, /* in: record */
- ulint i, /* in: ith field */
- ibool val); /* in: value to set */
-/***************************************************************
-Sets an old-style record field to SQL null.
-The physical size of the field is not changed. */
-
-void
-rec_set_nth_field_sql_null(
-/*=======================*/
- rec_t* rec, /* in: record */
- ulint n); /* in: index of the field */
-
-/***************************************************************
-Sets the value of the ith field extern storage bit of an old-style record. */
-
-void
-rec_set_nth_field_extern_bit_old(
-/*=============================*/
- rec_t* rec, /* in: old-style record */
- ulint i, /* in: ith field */
- ibool val, /* in: value to set */
- mtr_t* mtr); /* in: mtr holding an X-latch to the page where
- rec is, or NULL; in the NULL case we do not
- write to log about the change */
-/***************************************************************
-Sets the value of the ith field extern storage bit of a new-style record. */
-
-void
-rec_set_nth_field_extern_bit_new(
-/*=============================*/
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: record descriptor */
- ulint ith, /* in: ith field */
- ibool val, /* in: value to set */
- mtr_t* mtr); /* in: mtr holding an X-latch to the page
- where rec is, or NULL; in the NULL case
- we do not write to log about the change */
-
-/**********************************************************
-Gets a bit field from within 1 byte. */
-UNIV_INLINE
-ulint
-rec_get_bit_field_1(
-/*================*/
- rec_t* rec, /* in: pointer to record origin */
- ulint offs, /* in: offset from the origin down */
- ulint mask, /* in: mask used to filter bits */
- ulint shift) /* in: shift right applied after masking */
-{
- ut_ad(rec);
-
- return((mach_read_from_1(rec - offs) & mask) >> shift);
-}
-
-/**********************************************************
-Sets a bit field within 1 byte. */
-UNIV_INLINE
-void
-rec_set_bit_field_1(
-/*================*/
- rec_t* rec, /* in: pointer to record origin */
- ulint val, /* in: value to set */
- ulint offs, /* in: offset from the origin down */
- ulint mask, /* in: mask used to filter bits */
- ulint shift) /* in: shift right applied after masking */
-{
- ut_ad(rec);
- ut_ad(offs <= REC_N_OLD_EXTRA_BYTES);
- ut_ad(mask);
- ut_ad(mask <= 0xFFUL);
- ut_ad(((mask >> shift) << shift) == mask);
- ut_ad(((val << shift) & mask) == (val << shift));
-
- mach_write_to_1(rec - offs,
- (mach_read_from_1(rec - offs) & ~mask)
- | (val << shift));
-}
-
-/**********************************************************
-Gets a bit field from within 2 bytes. */
-UNIV_INLINE
-ulint
-rec_get_bit_field_2(
-/*================*/
- rec_t* rec, /* in: pointer to record origin */
- ulint offs, /* in: offset from the origin down */
- ulint mask, /* in: mask used to filter bits */
- ulint shift) /* in: shift right applied after masking */
-{
- ut_ad(rec);
-
- return((mach_read_from_2(rec - offs) & mask) >> shift);
-}
-
-/**********************************************************
-Sets a bit field within 2 bytes. */
-UNIV_INLINE
-void
-rec_set_bit_field_2(
-/*================*/
- rec_t* rec, /* in: pointer to record origin */
- ulint val, /* in: value to set */
- ulint offs, /* in: offset from the origin down */
- ulint mask, /* in: mask used to filter bits */
- ulint shift) /* in: shift right applied after masking */
-{
- ut_ad(rec);
- ut_ad(offs <= REC_N_OLD_EXTRA_BYTES);
- ut_ad(mask > 0xFFUL);
- ut_ad(mask <= 0xFFFFUL);
- ut_ad((mask >> shift) & 1);
- ut_ad(0 == ((mask >> shift) & ((mask >> shift) + 1)));
- ut_ad(((mask >> shift) << shift) == mask);
- ut_ad(((val << shift) & mask) == (val << shift));
-
- mach_write_to_2(rec - offs,
- (mach_read_from_2(rec - offs) & ~mask)
- | (val << shift));
-}
-
-/**********************************************************
-The following function is used to get the offset of the next chained record
-on the same page. */
-UNIV_INLINE
-ulint
-rec_get_next_offs(
-/*==============*/
- /* out: the page offset of the next chained record, or
- 0 if none */
- rec_t* rec, /* in: physical record */
- ulint comp) /* in: nonzero=compact page format */
-{
- ulint field_value;
-#if REC_NEXT_MASK != 0xFFFFUL
-# error "REC_NEXT_MASK != 0xFFFFUL"
-#endif
-#if REC_NEXT_SHIFT
-# error "REC_NEXT_SHIFT != 0"
-#endif
-
- field_value = mach_read_from_2(rec - REC_NEXT);
-
- if (comp) {
-#if UNIV_PAGE_SIZE <= 32768
- /* Note that for 64 KiB pages, field_value can 'wrap around'
- and the debug assertion is not valid */
-
- /* In the following assertion, field_value is interpreted
- as signed 16-bit integer in 2's complement arithmetics.
- If all platforms defined int16_t in the standard headers,
- the expression could be written simpler as
- (int16_t) field_value + ut_align_offset(...) < UNIV_PAGE_SIZE
- */
- ut_ad((field_value >= 32768
- ? field_value - 65536
- : field_value)
- + ut_align_offset(rec, UNIV_PAGE_SIZE)
- < UNIV_PAGE_SIZE);
-#endif
- if (field_value == 0) {
-
- return(0);
- }
-
- return(ut_align_offset(rec + field_value, UNIV_PAGE_SIZE));
- } else {
- ut_ad(field_value < UNIV_PAGE_SIZE);
-
- return(field_value);
- }
-}
-
-/**********************************************************
-The following function is used to set the next record offset field of the
-record. */
-UNIV_INLINE
-void
-rec_set_next_offs(
-/*==============*/
- rec_t* rec, /* in: physical record */
- ulint comp, /* in: nonzero=compact page format */
- ulint next) /* in: offset of the next record, or 0 if none */
-{
- ut_ad(rec);
- ut_ad(UNIV_PAGE_SIZE > next);
-#if REC_NEXT_MASK != 0xFFFFUL
-# error "REC_NEXT_MASK != 0xFFFFUL"
-#endif
-#if REC_NEXT_SHIFT
-# error "REC_NEXT_SHIFT != 0"
-#endif
-
- if (comp) {
- ulint field_value;
-
- if (next) {
- /* The following two statements calculate
- next - offset_of_rec mod 64Ki, where mod is the modulo
- as a non-negative number */
-
- field_value = (ulint)((lint)next
- - (lint)ut_align_offset(
- rec, UNIV_PAGE_SIZE));
- field_value &= REC_NEXT_MASK;
- } else {
- field_value = 0;
- }
-
- mach_write_to_2(rec - REC_NEXT, field_value);
- } else {
- mach_write_to_2(rec - REC_NEXT, next);
- }
-}
-
-/**********************************************************
-The following function is used to get the number of fields
-in an old-style record. */
-UNIV_INLINE
-ulint
-rec_get_n_fields_old(
-/*=================*/
- /* out: number of data fields */
- rec_t* rec) /* in: physical record */
-{
- ulint ret;
-
- ut_ad(rec);
-
- ret = rec_get_bit_field_2(rec, REC_OLD_N_FIELDS,
- REC_OLD_N_FIELDS_MASK,
- REC_OLD_N_FIELDS_SHIFT);
- ut_ad(ret <= REC_MAX_N_FIELDS);
- ut_ad(ret > 0);
-
- return(ret);
-}
-
-/**********************************************************
-The following function is used to set the number of fields
-in an old-style record. */
-UNIV_INLINE
-void
-rec_set_n_fields_old(
-/*=================*/
- rec_t* rec, /* in: physical record */
- ulint n_fields) /* in: the number of fields */
-{
- ut_ad(rec);
- ut_ad(n_fields <= REC_MAX_N_FIELDS);
- ut_ad(n_fields > 0);
-
- rec_set_bit_field_2(rec, n_fields, REC_OLD_N_FIELDS,
- REC_OLD_N_FIELDS_MASK, REC_OLD_N_FIELDS_SHIFT);
-}
-
-/**********************************************************
-The following function retrieves the status bits of a new-style record. */
-UNIV_INLINE
-ulint
-rec_get_status(
-/*===========*/
- /* out: status bits */
- rec_t* rec) /* in: physical record */
-{
- ulint ret;
-
- ut_ad(rec);
-
- ret = rec_get_bit_field_1(rec, REC_NEW_STATUS,
- REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT);
- ut_ad((ret & ~REC_NEW_STATUS_MASK) == 0);
-
- return(ret);
-}
-
-/**********************************************************
-The following function is used to get the number of fields
-in a record. */
-UNIV_INLINE
-ulint
-rec_get_n_fields(
-/*=============*/
- /* out: number of data fields */
- rec_t* rec, /* in: physical record */
- dict_index_t* index) /* in: record descriptor */
-{
- ut_ad(rec);
- ut_ad(index);
-
- if (!dict_table_is_comp(index->table)) {
- return(rec_get_n_fields_old(rec));
- }
-
- switch (rec_get_status(rec)) {
- case REC_STATUS_ORDINARY:
- return(dict_index_get_n_fields(index));
- case REC_STATUS_NODE_PTR:
- return(dict_index_get_n_unique_in_tree(index) + 1);
- case REC_STATUS_INFIMUM:
- case REC_STATUS_SUPREMUM:
- return(1);
- default:
- ut_error;
- return(ULINT_UNDEFINED);
- }
-}
-
-/**********************************************************
-The following function is used to get the number of records owned by the
-previous directory record. */
-UNIV_INLINE
-ulint
-rec_get_n_owned(
-/*============*/
- /* out: number of owned records */
- rec_t* rec, /* in: physical record */
- ulint comp) /* in: nonzero=compact page format */
-{
- ulint ret;
-
- ut_ad(rec);
-
- ret = rec_get_bit_field_1(rec,
- comp ? REC_NEW_N_OWNED : REC_OLD_N_OWNED,
- REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
- ut_ad(ret <= REC_MAX_N_OWNED);
-
- return(ret);
-}
-
-/**********************************************************
-The following function is used to set the number of owned records. */
-UNIV_INLINE
-void
-rec_set_n_owned(
-/*============*/
- rec_t* rec, /* in: physical record */
- ulint comp, /* in: nonzero=compact page format */
- ulint n_owned) /* in: the number of owned */
-{
- ut_ad(rec);
- ut_ad(n_owned <= REC_MAX_N_OWNED);
-
- rec_set_bit_field_1(rec, n_owned,
- comp ? REC_NEW_N_OWNED : REC_OLD_N_OWNED,
- REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
-}
-
-/**********************************************************
-The following function is used to retrieve the info bits of a record. */
-UNIV_INLINE
-ulint
-rec_get_info_bits(
-/*==============*/
- /* out: info bits */
- rec_t* rec, /* in: physical record */
- ulint comp) /* in: nonzero=compact page format */
-{
- ulint ret;
-
- ut_ad(rec);
-
- ret = rec_get_bit_field_1(rec,
- comp ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS,
- REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT);
- ut_ad((ret & ~REC_INFO_BITS_MASK) == 0);
-
- return(ret);
-}
-
-/**********************************************************
-The following function is used to set the info bits of a record. */
-UNIV_INLINE
-void
-rec_set_info_bits(
-/*==============*/
- rec_t* rec, /* in: physical record */
- ulint comp, /* in: nonzero=compact page format */
- ulint bits) /* in: info bits */
-{
- ut_ad(rec);
- ut_ad((bits & ~REC_INFO_BITS_MASK) == 0);
-
- rec_set_bit_field_1(rec, bits,
- comp ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS,
- REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT);
-}
-
-/**********************************************************
-The following function is used to set the status bits of a new-style record. */
-UNIV_INLINE
-void
-rec_set_status(
-/*===========*/
- rec_t* rec, /* in: physical record */
- ulint bits) /* in: info bits */
-{
- ut_ad(rec);
- ut_ad((bits & ~REC_NEW_STATUS_MASK) == 0);
-
- rec_set_bit_field_1(rec, bits, REC_NEW_STATUS,
- REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT);
-}
-
-/**********************************************************
-The following function is used to retrieve the info and status
-bits of a record. (Only compact records have status bits.) */
-UNIV_INLINE
-ulint
-rec_get_info_and_status_bits(
-/*=========================*/
- /* out: info bits */
- rec_t* rec, /* in: physical record */
- ulint comp) /* in: nonzero=compact page format */
-{
- ulint bits;
-#if (REC_NEW_STATUS_MASK >> REC_NEW_STATUS_SHIFT) \
-& (REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)
-# error "REC_NEW_STATUS_MASK and REC_INFO_BITS_MASK overlap"
-#endif
- if (UNIV_EXPECT(comp, REC_OFFS_COMPACT)) {
- bits = rec_get_info_bits(rec, TRUE) | rec_get_status(rec);
- } else {
- bits = rec_get_info_bits(rec, FALSE);
- ut_ad(!(bits & ~(REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)));
- }
- return(bits);
-}
-/**********************************************************
-The following function is used to set the info and status
-bits of a record. (Only compact records have status bits.) */
-UNIV_INLINE
-void
-rec_set_info_and_status_bits(
-/*=========================*/
- rec_t* rec, /* in: physical record */
- ulint comp, /* in: nonzero=compact page format */
- ulint bits) /* in: info bits */
-{
-#if (REC_NEW_STATUS_MASK >> REC_NEW_STATUS_SHIFT) \
-& (REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)
-# error "REC_NEW_STATUS_MASK and REC_INFO_BITS_MASK overlap"
-#endif
- if (comp) {
- rec_set_status(rec, bits & REC_NEW_STATUS_MASK);
- } else {
- ut_ad(!(bits & ~(REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)));
- }
- rec_set_info_bits(rec, comp, bits & ~REC_NEW_STATUS_MASK);
-}
-
-/**********************************************************
-The following function tells if record is delete marked. */
-UNIV_INLINE
-ulint
-rec_get_deleted_flag(
-/*=================*/
- /* out: nonzero if delete marked */
- rec_t* rec, /* in: physical record */
- ulint comp) /* in: nonzero=compact page format */
-{
- if (UNIV_EXPECT(comp, REC_OFFS_COMPACT)) {
- return(UNIV_UNLIKELY(
- rec_get_bit_field_1(rec, REC_NEW_INFO_BITS,
- REC_INFO_DELETED_FLAG,
- REC_INFO_BITS_SHIFT)));
- } else {
- return(UNIV_UNLIKELY(
- rec_get_bit_field_1(rec, REC_OLD_INFO_BITS,
- REC_INFO_DELETED_FLAG,
- REC_INFO_BITS_SHIFT)));
- }
-}
-
-/**********************************************************
-The following function is used to set the deleted bit. */
-UNIV_INLINE
-void
-rec_set_deleted_flag(
-/*=================*/
- rec_t* rec, /* in: physical record */
- ulint comp, /* in: nonzero=compact page format */
- ulint flag) /* in: nonzero if delete marked */
-{
- ulint val;
-
- val = rec_get_info_bits(rec, comp);
-
- if (flag) {
- val |= REC_INFO_DELETED_FLAG;
- } else {
- val &= ~REC_INFO_DELETED_FLAG;
- }
-
- rec_set_info_bits(rec, comp, val);
-}
-
-/**********************************************************
-The following function tells if a new-style record is a node pointer. */
-UNIV_INLINE
-ibool
-rec_get_node_ptr_flag(
-/*==================*/
- /* out: TRUE if node pointer */
- rec_t* rec) /* in: physical record */
-{
- return(REC_STATUS_NODE_PTR == rec_get_status(rec));
-}
-
-/**********************************************************
-The following function is used to get the order number of the record in the
-heap of the index page. */
-UNIV_INLINE
-ulint
-rec_get_heap_no(
-/*============*/
- /* out: heap order number */
- rec_t* rec, /* in: physical record */
- ulint comp) /* in: nonzero=compact page format */
-{
- ulint ret;
-
- ut_ad(rec);
-
- ret = rec_get_bit_field_2(rec,
- comp ? REC_NEW_HEAP_NO : REC_OLD_HEAP_NO,
- REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT);
- ut_ad(ret <= REC_MAX_HEAP_NO);
-
- return(ret);
-}
-
-/**********************************************************
-The following function is used to set the heap number field in the record. */
-UNIV_INLINE
-void
-rec_set_heap_no(
-/*============*/
- rec_t* rec, /* in: physical record */
- ulint comp, /* in: nonzero=compact page format */
- ulint heap_no)/* in: the heap number */
-{
- ut_ad(heap_no <= REC_MAX_HEAP_NO);
-
- rec_set_bit_field_2(rec, heap_no,
- comp ? REC_NEW_HEAP_NO : REC_OLD_HEAP_NO,
- REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT);
-}
-
-/**********************************************************
-The following function is used to test whether the data offsets in the record
-are stored in one-byte or two-byte format. */
-UNIV_INLINE
-ibool
-rec_get_1byte_offs_flag(
-/*====================*/
- /* out: TRUE if 1-byte form */
- rec_t* rec) /* in: physical record */
-{
-#if TRUE != 1
-#error "TRUE != 1"
-#endif
-
- return(rec_get_bit_field_1(rec, REC_OLD_SHORT, REC_OLD_SHORT_MASK,
- REC_OLD_SHORT_SHIFT));
-}
-
-/**********************************************************
-The following function is used to set the 1-byte offsets flag. */
-UNIV_INLINE
-void
-rec_set_1byte_offs_flag(
-/*====================*/
- rec_t* rec, /* in: physical record */
- ibool flag) /* in: TRUE if 1byte form */
-{
-#if TRUE != 1
-#error "TRUE != 1"
-#endif
- ut_ad(flag <= TRUE);
-
- rec_set_bit_field_1(rec, flag, REC_OLD_SHORT, REC_OLD_SHORT_MASK,
- REC_OLD_SHORT_SHIFT);
-}
-
-/**********************************************************
-Returns the offset of nth field end if the record is stored in the 1-byte
-offsets form. If the field is SQL null, the flag is ORed in the returned
-value. */
-UNIV_INLINE
-ulint
-rec_1_get_field_end_info(
-/*=====================*/
- /* out: offset of the start of the field, SQL null
- flag ORed */
- rec_t* rec, /* in: record */
- ulint n) /* in: field index */
-{
- ut_ad(rec_get_1byte_offs_flag(rec));
- ut_ad(n < rec_get_n_fields_old(rec));
-
- return(mach_read_from_1(rec - (REC_N_OLD_EXTRA_BYTES + n + 1)));
-}
-
-/**********************************************************
-Returns the offset of nth field end if the record is stored in the 2-byte
-offsets form. If the field is SQL null, the flag is ORed in the returned
-value. */
-UNIV_INLINE
-ulint
-rec_2_get_field_end_info(
-/*=====================*/
- /* out: offset of the start of the field, SQL null
- flag and extern storage flag ORed */
- rec_t* rec, /* in: record */
- ulint n) /* in: field index */
-{
- ut_ad(!rec_get_1byte_offs_flag(rec));
- ut_ad(n < rec_get_n_fields_old(rec));
-
- return(mach_read_from_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n + 2)));
-}
-
-#ifdef UNIV_DEBUG
-/* Length of the rec_get_offsets() header */
-# define REC_OFFS_HEADER_SIZE 4
-#else /* UNIV_DEBUG */
-/* Length of the rec_get_offsets() header */
-# define REC_OFFS_HEADER_SIZE 2
-#endif /* UNIV_DEBUG */
-
-/* Get the base address of offsets. The extra_size is stored at
-this position, and following positions hold the end offsets of
-the fields. */
-#define rec_offs_base(offsets) (offsets + REC_OFFS_HEADER_SIZE)
-
-/**************************************************************
-The following function returns the number of allocated elements
-for an array of offsets. */
-UNIV_INLINE
-ulint
-rec_offs_get_n_alloc(
-/*=================*/
- /* out: number of elements */
- const ulint* offsets)/* in: array for rec_get_offsets() */
-{
- ulint n_alloc;
- ut_ad(offsets);
- n_alloc = offsets[0];
- ut_ad(n_alloc > 0);
- return(n_alloc);
-}
-
-/**************************************************************
-The following function sets the number of allocated elements
-for an array of offsets. */
-UNIV_INLINE
-void
-rec_offs_set_n_alloc(
-/*=================*/
- ulint* offsets, /* out: array for rec_get_offsets(),
- must be allocated */
- ulint n_alloc) /* in: number of elements */
-{
- ut_ad(offsets);
- ut_ad(n_alloc > 0);
- UNIV_MEM_ASSERT_AND_ALLOC(offsets, n_alloc * sizeof *offsets);
- offsets[0] = n_alloc;
-}
-
-/**************************************************************
-The following function returns the number of fields in a record. */
-UNIV_INLINE
-ulint
-rec_offs_n_fields(
-/*==============*/
- /* out: number of fields */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
-{
- ulint n_fields;
- ut_ad(offsets);
- n_fields = offsets[1];
- ut_ad(n_fields > 0);
- ut_ad(n_fields <= REC_MAX_N_FIELDS);
- ut_ad(n_fields + REC_OFFS_HEADER_SIZE
- <= rec_offs_get_n_alloc(offsets));
- return(n_fields);
-}
-
-/****************************************************************
-Validates offsets returned by rec_get_offsets(). */
-UNIV_INLINE
-ibool
-rec_offs_validate(
-/*==============*/
- /* out: TRUE if valid */
- rec_t* rec, /* in: record or NULL */
- dict_index_t* index, /* in: record descriptor or NULL */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
-{
- ulint i = rec_offs_n_fields(offsets);
- ulint last = ULINT_MAX;
- ulint comp = *rec_offs_base(offsets) & REC_OFFS_COMPACT;
-
- if (rec) {
- ut_ad((ulint) rec == offsets[2]);
- if (!comp) {
- ut_a(rec_get_n_fields_old(rec) >= i);
- }
- }
- if (index) {
- ulint max_n_fields;
- ut_ad((ulint) index == offsets[3]);
- max_n_fields = ut_max(
- dict_index_get_n_fields(index),
- dict_index_get_n_unique_in_tree(index) + 1);
- if (comp && rec) {
- switch (rec_get_status(rec)) {
- case REC_STATUS_ORDINARY:
- break;
- case REC_STATUS_NODE_PTR:
- max_n_fields = dict_index_get_n_unique_in_tree(
- index) + 1;
- break;
- case REC_STATUS_INFIMUM:
- case REC_STATUS_SUPREMUM:
- max_n_fields = 1;
- break;
- default:
- ut_error;
- }
- }
- /* index->n_def == 0 for dummy indexes if !comp */
- ut_a(!comp || index->n_def);
- ut_a(!index->n_def || i <= max_n_fields);
- }
- while (i--) {
- ulint curr = rec_offs_base(offsets)[1 + i] & REC_OFFS_MASK;
- ut_a(curr <= last);
- last = curr;
- }
- return(TRUE);
-}
-/****************************************************************
-Updates debug data in offsets, in order to avoid bogus
-rec_offs_validate() failures. */
-UNIV_INLINE
-void
-rec_offs_make_valid(
-/*================*/
- rec_t* rec __attribute__((unused)),
- /* in: record */
- dict_index_t* index __attribute__((unused)),
- /* in: record descriptor */
- ulint* offsets __attribute__((unused)))
- /* in: array returned by rec_get_offsets() */
-{
-#ifdef UNIV_DEBUG
- ut_ad(rec_get_n_fields(rec, index) >= rec_offs_n_fields(offsets));
- offsets[2] = (ulint) rec;
- offsets[3] = (ulint) index;
-#endif /* UNIV_DEBUG */
-}
-
-/****************************************************************
-The following function is used to get a pointer to the nth
-data field in a record. */
-UNIV_INLINE
-byte*
-rec_get_nth_field(
-/*==============*/
- /* out: pointer to the field */
- rec_t* rec, /* in: record */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint n, /* in: index of the field */
- ulint* len) /* out: length of the field; UNIV_SQL_NULL
- if SQL null */
-{
- byte* field;
- ulint length;
- ut_ad(rec);
- ut_ad(rec_offs_validate(rec, NULL, offsets));
- ut_ad(n < rec_offs_n_fields(offsets));
- ut_ad(len);
-
- if (UNIV_UNLIKELY(n == 0)) {
- field = rec;
- } else {
- field = rec + (rec_offs_base(offsets)[n] & REC_OFFS_MASK);
- }
-
- length = rec_offs_base(offsets)[1 + n];
-
- if (length & REC_OFFS_SQL_NULL) {
- length = UNIV_SQL_NULL;
- } else {
- length &= REC_OFFS_MASK;
- length -= field - rec;
- }
-
- *len = length;
- return(field);
-}
-
-/**********************************************************
-Determine if the offsets are for a record in the new
-compact format. */
-UNIV_INLINE
-ulint
-rec_offs_comp(
-/*==========*/
- /* out: nonzero if compact format */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
-{
- ut_ad(rec_offs_validate(NULL, NULL, offsets));
- return(*rec_offs_base(offsets) & REC_OFFS_COMPACT);
-}
-
-/**********************************************************
-Returns nonzero if the extern bit is set in nth field of rec. */
-UNIV_INLINE
-ulint
-rec_offs_nth_extern(
-/*================*/
- /* out: nonzero if externally stored */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint n) /* in: nth field */
-{
- ut_ad(rec_offs_validate(NULL, NULL, offsets));
- ut_ad(n < rec_offs_n_fields(offsets));
- return(UNIV_UNLIKELY(rec_offs_base(offsets)[1 + n]
- & REC_OFFS_EXTERNAL));
-}
-
-/**********************************************************
-Returns nonzero if the SQL NULL bit is set in nth field of rec. */
-UNIV_INLINE
-ulint
-rec_offs_nth_sql_null(
-/*==================*/
- /* out: nonzero if SQL NULL */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint n) /* in: nth field */
-{
- ut_ad(rec_offs_validate(NULL, NULL, offsets));
- ut_ad(n < rec_offs_n_fields(offsets));
- return(UNIV_UNLIKELY(rec_offs_base(offsets)[1 + n]
- & REC_OFFS_SQL_NULL));
-}
-
-/**********************************************************
-Gets the physical size of a field. */
-UNIV_INLINE
-ulint
-rec_offs_nth_size(
-/*==============*/
- /* out: length of field */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint n) /* in: nth field */
-{
- ut_ad(rec_offs_validate(NULL, NULL, offsets));
- ut_ad(n < rec_offs_n_fields(offsets));
- if (!n) {
- return(rec_offs_base(offsets)[1 + n] & REC_OFFS_MASK);
- }
- return((rec_offs_base(offsets)[1 + n] - rec_offs_base(offsets)[n])
- & REC_OFFS_MASK);
-}
-
-/**********************************************************
-Returns TRUE if the extern bit is set in any of the fields
-of an old-style record. */
-UNIV_INLINE
-ibool
-rec_offs_any_extern(
-/*================*/
- /* out: TRUE if a field is stored externally */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
-{
- ulint i;
- for (i = rec_offs_n_fields(offsets); i--; ) {
- if (rec_offs_nth_extern(offsets, i)) {
- return(TRUE);
- }
- }
- return(FALSE);
-}
-
-/***************************************************************
-Sets the value of the ith field extern storage bit. */
-UNIV_INLINE
-void
-rec_set_nth_field_extern_bit(
-/*=========================*/
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: record descriptor */
- ulint i, /* in: ith field */
- ibool val, /* in: value to set */
- mtr_t* mtr) /* in: mtr holding an X-latch to the page
- where rec is, or NULL; in the NULL case
- we do not write to log about the change */
-{
- if (dict_table_is_comp(index->table)) {
- rec_set_nth_field_extern_bit_new(rec, index, i, val, mtr);
- } else {
- rec_set_nth_field_extern_bit_old(rec, i, val, mtr);
- }
-}
-
-/**********************************************************
-Returns the offset of n - 1th field end if the record is stored in the 1-byte
-offsets form. If the field is SQL null, the flag is ORed in the returned
-value. This function and the 2-byte counterpart are defined here because the
-C-compiler was not able to sum negative and positive constant offsets, and
-warned of constant arithmetic overflow within the compiler. */
-UNIV_INLINE
-ulint
-rec_1_get_prev_field_end_info(
-/*==========================*/
- /* out: offset of the start of the PREVIOUS field, SQL
- null flag ORed */
- rec_t* rec, /* in: record */
- ulint n) /* in: field index */
-{
- ut_ad(rec_get_1byte_offs_flag(rec));
- ut_ad(n <= rec_get_n_fields_old(rec));
-
- return(mach_read_from_1(rec - (REC_N_OLD_EXTRA_BYTES + n)));
-}
-
-/**********************************************************
-Returns the offset of n - 1th field end if the record is stored in the 2-byte
-offsets form. If the field is SQL null, the flag is ORed in the returned
-value. */
-UNIV_INLINE
-ulint
-rec_2_get_prev_field_end_info(
-/*==========================*/
- /* out: offset of the start of the PREVIOUS field, SQL
- null flag ORed */
- rec_t* rec, /* in: record */
- ulint n) /* in: field index */
-{
- ut_ad(!rec_get_1byte_offs_flag(rec));
- ut_ad(n <= rec_get_n_fields_old(rec));
-
- return(mach_read_from_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n)));
-}
-
-/**********************************************************
-Sets the field end info for the nth field if the record is stored in the
-1-byte format. */
-UNIV_INLINE
-void
-rec_1_set_field_end_info(
-/*=====================*/
- rec_t* rec, /* in: record */
- ulint n, /* in: field index */
- ulint info) /* in: value to set */
-{
- ut_ad(rec_get_1byte_offs_flag(rec));
- ut_ad(n < rec_get_n_fields_old(rec));
-
- mach_write_to_1(rec - (REC_N_OLD_EXTRA_BYTES + n + 1), info);
-}
-
-/**********************************************************
-Sets the field end info for the nth field if the record is stored in the
-2-byte format. */
-UNIV_INLINE
-void
-rec_2_set_field_end_info(
-/*=====================*/
- rec_t* rec, /* in: record */
- ulint n, /* in: field index */
- ulint info) /* in: value to set */
-{
- ut_ad(!rec_get_1byte_offs_flag(rec));
- ut_ad(n < rec_get_n_fields_old(rec));
-
- mach_write_to_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n + 2), info);
-}
-
-/**********************************************************
-Returns the offset of nth field start if the record is stored in the 1-byte
-offsets form. */
-UNIV_INLINE
-ulint
-rec_1_get_field_start_offs(
-/*=======================*/
- /* out: offset of the start of the field */
- rec_t* rec, /* in: record */
- ulint n) /* in: field index */
-{
- ut_ad(rec_get_1byte_offs_flag(rec));
- ut_ad(n <= rec_get_n_fields_old(rec));
-
- if (n == 0) {
-
- return(0);
- }
-
- return(rec_1_get_prev_field_end_info(rec, n)
- & ~REC_1BYTE_SQL_NULL_MASK);
-}
-
-/**********************************************************
-Returns the offset of nth field start if the record is stored in the 2-byte
-offsets form. */
-UNIV_INLINE
-ulint
-rec_2_get_field_start_offs(
-/*=======================*/
- /* out: offset of the start of the field */
- rec_t* rec, /* in: record */
- ulint n) /* in: field index */
-{
- ut_ad(!rec_get_1byte_offs_flag(rec));
- ut_ad(n <= rec_get_n_fields_old(rec));
-
- if (n == 0) {
-
- return(0);
- }
-
- return(rec_2_get_prev_field_end_info(rec, n)
- & ~(REC_2BYTE_SQL_NULL_MASK | REC_2BYTE_EXTERN_MASK));
-}
-
-/**********************************************************
-The following function is used to read the offset of the start of a data field
-in the record. The start of an SQL null field is the end offset of the
-previous non-null field, or 0, if none exists. If n is the number of the last
-field + 1, then the end offset of the last field is returned. */
-UNIV_INLINE
-ulint
-rec_get_field_start_offs(
-/*=====================*/
- /* out: offset of the start of the field */
- rec_t* rec, /* in: record */
- ulint n) /* in: field index */
-{
- ut_ad(rec);
- ut_ad(n <= rec_get_n_fields_old(rec));
-
- if (n == 0) {
-
- return(0);
- }
-
- if (rec_get_1byte_offs_flag(rec)) {
-
- return(rec_1_get_field_start_offs(rec, n));
- }
-
- return(rec_2_get_field_start_offs(rec, n));
-}
-
-/****************************************************************
-Gets the physical size of an old-style field.
-Also an SQL null may have a field of size > 0,
-if the data type is of a fixed size. */
-UNIV_INLINE
-ulint
-rec_get_nth_field_size(
-/*===================*/
- /* out: field size in bytes */
- rec_t* rec, /* in: record */
- ulint n) /* in: index of the field */
-{
- ulint os;
- ulint next_os;
-
- os = rec_get_field_start_offs(rec, n);
- next_os = rec_get_field_start_offs(rec, n + 1);
-
- ut_ad(next_os - os < UNIV_PAGE_SIZE);
-
- return(next_os - os);
-}
-
-/***************************************************************
-This is used to modify the value of an already existing field in a record.
-The previous value must have exactly the same size as the new value. If len
-is UNIV_SQL_NULL then the field is treated as an SQL null.
-For records in ROW_FORMAT=COMPACT (new-style records), len must not be
-UNIV_SQL_NULL unless the field already is SQL null. */
-UNIV_INLINE
-void
-rec_set_nth_field(
-/*==============*/
- rec_t* rec, /* in: record */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint n, /* in: index number of the field */
- const void* data, /* in: pointer to the data
- if not SQL null */
- ulint len) /* in: length of the data or UNIV_SQL_NULL */
-{
- byte* data2;
- ulint len2;
-
- ut_ad(rec);
- ut_ad(rec_offs_validate(rec, NULL, offsets));
-
- if (UNIV_UNLIKELY(len == UNIV_SQL_NULL)) {
- if (!rec_offs_nth_sql_null(offsets, n)) {
- ut_a(!rec_offs_comp(offsets));
- rec_set_nth_field_sql_null(rec, n);
- }
-
- return;
- }
-
- data2 = rec_get_nth_field(rec, offsets, n, &len2);
- if (len2 == UNIV_SQL_NULL) {
- ut_ad(!rec_offs_comp(offsets));
- rec_set_nth_field_null_bit(rec, n, FALSE);
- ut_ad(len == rec_get_nth_field_size(rec, n));
- } else {
- ut_ad(len2 == len);
- }
-
- ut_memcpy(data2, data, len);
-}
-
-/**************************************************************
-The following function returns the data size of an old-style physical
-record, that is the sum of field lengths. SQL null fields
-are counted as length 0 fields. The value returned by the function
-is the distance from record origin to record end in bytes. */
-UNIV_INLINE
-ulint
-rec_get_data_size_old(
-/*==================*/
- /* out: size */
- rec_t* rec) /* in: physical record */
-{
- ut_ad(rec);
-
- return(rec_get_field_start_offs(rec, rec_get_n_fields_old(rec)));
-}
-
-/**************************************************************
-The following function sets the number of fields in offsets. */
-UNIV_INLINE
-void
-rec_offs_set_n_fields(
-/*==================*/
- ulint* offsets, /* in/out: array returned by
- rec_get_offsets() */
- ulint n_fields) /* in: number of fields */
-{
- ut_ad(offsets);
- ut_ad(n_fields > 0);
- ut_ad(n_fields <= REC_MAX_N_FIELDS);
- ut_ad(n_fields + REC_OFFS_HEADER_SIZE
- <= rec_offs_get_n_alloc(offsets));
- offsets[1] = n_fields;
-}
-
-/**************************************************************
-The following function returns the data size of a physical
-record, that is the sum of field lengths. SQL null fields
-are counted as length 0 fields. The value returned by the function
-is the distance from record origin to record end in bytes. */
-UNIV_INLINE
-ulint
-rec_offs_data_size(
-/*===============*/
- /* out: size */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
-{
- ulint size;
-
- ut_ad(rec_offs_validate(NULL, NULL, offsets));
- size = rec_offs_base(offsets)[rec_offs_n_fields(offsets)]
- & REC_OFFS_MASK;
- ut_ad(size < UNIV_PAGE_SIZE);
- return(size);
-}
-
-/**************************************************************
-Returns the total size of record minus data size of record. The value
-returned by the function is the distance from record start to record origin
-in bytes. */
-UNIV_INLINE
-ulint
-rec_offs_extra_size(
-/*================*/
- /* out: size */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
-{
- ulint size;
- ut_ad(rec_offs_validate(NULL, NULL, offsets));
- size = *rec_offs_base(offsets) & ~REC_OFFS_COMPACT;
- ut_ad(size < UNIV_PAGE_SIZE);
- return(size);
-}
-
-/**************************************************************
-Returns the total size of a physical record. */
-UNIV_INLINE
-ulint
-rec_offs_size(
-/*==========*/
- /* out: size */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
-{
- return(rec_offs_data_size(offsets) + rec_offs_extra_size(offsets));
-}
-
-/**************************************************************
-Returns a pointer to the end of the record. */
-UNIV_INLINE
-byte*
-rec_get_end(
-/*========*/
- /* out: pointer to end */
- rec_t* rec, /* in: pointer to record */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
-{
- return(rec + rec_offs_data_size(offsets));
-}
-
-/**************************************************************
-Returns a pointer to the start of the record. */
-UNIV_INLINE
-byte*
-rec_get_start(
-/*==========*/
- /* out: pointer to start */
- rec_t* rec, /* in: pointer to record */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
-{
- return(rec - rec_offs_extra_size(offsets));
-}
-
-/*******************************************************************
-Copies a physical record to a buffer. */
-UNIV_INLINE
-rec_t*
-rec_copy(
-/*=====*/
- /* out: pointer to the origin of the copy */
- void* buf, /* in: buffer */
- const rec_t* rec, /* in: physical record */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
-{
- ulint extra_len;
- ulint data_len;
-
- ut_ad(rec && buf);
- ut_ad(rec_offs_validate((rec_t*) rec, NULL, offsets));
- ut_ad(rec_validate((rec_t*) rec, offsets));
-
- extra_len = rec_offs_extra_size(offsets);
- data_len = rec_offs_data_size(offsets);
-
- ut_memcpy(buf, rec - extra_len, extra_len + data_len);
-
- return((byte*)buf + extra_len);
-}
-
-/**************************************************************
-Returns the extra size of an old-style physical record if we know its
-data size and number of fields. */
-UNIV_INLINE
-ulint
-rec_get_converted_extra_size(
-/*=========================*/
- /* out: extra size */
- ulint data_size, /* in: data size */
- ulint n_fields) /* in: number of fields */
-{
- if (data_size <= REC_1BYTE_OFFS_LIMIT) {
-
- return(REC_N_OLD_EXTRA_BYTES + n_fields);
- }
-
- return(REC_N_OLD_EXTRA_BYTES + 2 * n_fields);
-}
-
-/**************************************************************
-The following function returns the size of a data tuple when converted to
-a new-style physical record. */
-
-ulint
-rec_get_converted_size_new(
-/*=======================*/
- /* out: size */
- dict_index_t* index, /* in: record descriptor */
- dtuple_t* dtuple);/* in: data tuple */
-/**************************************************************
-The following function returns the size of a data tuple when converted to
-a physical record. */
-UNIV_INLINE
-ulint
-rec_get_converted_size(
-/*===================*/
- /* out: size */
- dict_index_t* index, /* in: record descriptor */
- dtuple_t* dtuple) /* in: data tuple */
-{
- ulint data_size;
- ulint extra_size;
-
- ut_ad(index);
- ut_ad(dtuple);
- ut_ad(dtuple_check_typed(dtuple));
-
- ut_ad(index->type & DICT_UNIVERSAL
- || dtuple_get_n_fields(dtuple)
- == (((dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK)
- == REC_STATUS_NODE_PTR)
- ? dict_index_get_n_unique_in_tree(index) + 1
- : dict_index_get_n_fields(index)));
-
- if (dict_table_is_comp(index->table)) {
- return(rec_get_converted_size_new(index, dtuple));
- }
-
- data_size = dtuple_get_data_size(dtuple);
-
- extra_size = rec_get_converted_extra_size(
- data_size, dtuple_get_n_fields(dtuple));
-
- return(data_size + extra_size);
-}
-
-/****************************************************************
-Folds a prefix of a physical record to a ulint. Folds only existing fields,
-that is, checks that we do not run out of the record. */
-UNIV_INLINE
-ulint
-rec_fold(
-/*=====*/
- /* out: the folded value */
- rec_t* rec, /* in: the physical record */
- const ulint* offsets, /* in: array returned by
- rec_get_offsets() */
- ulint n_fields, /* in: number of complete
- fields to fold */
- ulint n_bytes, /* in: number of bytes to fold
- in an incomplete last field */
- dulint tree_id) /* in: index tree id */
-{
- ulint i;
- byte* data;
- ulint len;
- ulint fold;
- ulint n_fields_rec;
-
- ut_ad(rec_offs_validate(rec, NULL, offsets));
- ut_ad(rec_validate((rec_t*) rec, offsets));
- ut_ad(n_fields + n_bytes > 0);
-
- n_fields_rec = rec_offs_n_fields(offsets);
- ut_ad(n_fields <= n_fields_rec);
- ut_ad(n_fields < n_fields_rec || n_bytes == 0);
-
- if (n_fields > n_fields_rec) {
- n_fields = n_fields_rec;
- }
-
- if (n_fields == n_fields_rec) {
- n_bytes = 0;
- }
-
- fold = ut_fold_dulint(tree_id);
-
- for (i = 0; i < n_fields; i++) {
- data = rec_get_nth_field(rec, offsets, i, &len);
-
- if (len != UNIV_SQL_NULL) {
- fold = ut_fold_ulint_pair(fold,
- ut_fold_binary(data, len));
- }
- }
-
- if (n_bytes > 0) {
- data = rec_get_nth_field(rec, offsets, i, &len);
-
- if (len != UNIV_SQL_NULL) {
- if (len > n_bytes) {
- len = n_bytes;
- }
-
- fold = ut_fold_ulint_pair(fold,
- ut_fold_binary(data, len));
- }
- }
-
- return(fold);
-}
diff --git a/storage/innobase/include/rem0types.h b/storage/innobase/include/rem0types.h
deleted file mode 100644
index 79c162392d2..00000000000
--- a/storage/innobase/include/rem0types.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/************************************************************************
-Record manager global types
-
-(c) 1994-1996 Innobase Oy
-
-Created 5/30/1994 Heikki Tuuri
-*************************************************************************/
-
-#ifndef rem0types_h
-#define rem0types_h
-
-/* We define the physical record simply as an array of bytes */
-typedef byte rec_t;
-
-/* Maximum values for various fields (for non-blob tuples) */
-#define REC_MAX_N_FIELDS (1024 - 1)
-#define REC_MAX_HEAP_NO (2 * 8192 - 1)
-#define REC_MAX_N_OWNED (16 - 1)
-
-#endif
diff --git a/storage/innobase/include/row0ins.h b/storage/innobase/include/row0ins.h
deleted file mode 100644
index b4bcc8ac5ca..00000000000
--- a/storage/innobase/include/row0ins.h
+++ /dev/null
@@ -1,169 +0,0 @@
-/******************************************************
-Insert into a table
-
-(c) 1996 Innobase Oy
-
-Created 4/20/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0ins_h
-#define row0ins_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "que0types.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "row0types.h"
-
-/*******************************************************************
-Checks if foreign key constraint fails for an index entry. Sets shared locks
-which lock either the success or the failure of the constraint. NOTE that
-the caller must have a shared latch on dict_foreign_key_check_lock. */
-
-ulint
-row_ins_check_foreign_constraint(
-/*=============================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT,
- DB_NO_REFERENCED_ROW,
- or DB_ROW_IS_REFERENCED */
- ibool check_ref,/* in: TRUE If we want to check that
- the referenced table is ok, FALSE if we
- want to to check the foreign key table */
- dict_foreign_t* foreign,/* in: foreign constraint; NOTE that the
- tables mentioned in it must be in the
- dictionary cache if they exist at all */
- dict_table_t* table, /* in: if check_ref is TRUE, then the foreign
- table, else the referenced table */
- dtuple_t* entry, /* in: index entry for index */
- que_thr_t* thr); /* in: query thread */
-/*************************************************************************
-Creates an insert node struct. */
-
-ins_node_t*
-ins_node_create(
-/*============*/
- /* out, own: insert node struct */
- ulint ins_type, /* in: INS_VALUES, ... */
- dict_table_t* table, /* in: table where to insert */
- mem_heap_t* heap); /* in: mem heap where created */
-/*************************************************************************
-Sets a new row to insert for an INS_DIRECT node. This function is only used
-if we have constructed the row separately, which is a rare case; this
-function is quite slow. */
-
-void
-ins_node_set_new_row(
-/*=================*/
- ins_node_t* node, /* in: insert node */
- dtuple_t* row); /* in: new row (or first row) for the node */
-/*******************************************************************
-Tries to insert an index entry to an index. If the index is clustered
-and a record with the same unique key is found, the other record is
-necessarily marked deleted by a committed transaction, or a unique key
-violation error occurs. The delete marked record is then updated to an
-existing record, and we must write an undo log record on the delete
-marked record. If the index is secondary, and a record with exactly the
-same fields is found, the other record is necessarily marked deleted.
-It is then unmarked. Otherwise, the entry is just inserted to the index. */
-
-ulint
-row_ins_index_entry_low(
-/*====================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT, DB_FAIL
- if pessimistic retry needed, or error code */
- ulint mode, /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
- depending on whether we wish optimistic or
- pessimistic descent down the index tree */
- dict_index_t* index, /* in: index */
- dtuple_t* entry, /* in: index entry to insert */
- ulint* ext_vec,/* in: array containing field numbers of
- externally stored fields in entry, or NULL */
- ulint n_ext_vec,/* in: number of fields in ext_vec */
- que_thr_t* thr); /* in: query thread */
-/*******************************************************************
-Inserts an index entry to index. Tries first optimistic, then pessimistic
-descent down the tree. If the entry matches enough to a delete marked record,
-performs the insert by updating or delete unmarking the delete marked
-record. */
-
-ulint
-row_ins_index_entry(
-/*================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT,
- DB_DUPLICATE_KEY, or some other error code */
- dict_index_t* index, /* in: index */
- dtuple_t* entry, /* in: index entry to insert */
- ulint* ext_vec,/* in: array containing field numbers of
- externally stored fields in entry, or NULL */
- ulint n_ext_vec,/* in: number of fields in ext_vec */
- que_thr_t* thr); /* in: query thread */
-/***************************************************************
-Inserts a row to a table. */
-
-ulint
-row_ins(
-/*====*/
- /* out: DB_SUCCESS if operation successfully
- completed, else error code or DB_LOCK_WAIT */
- ins_node_t* node, /* in: row insert node */
- que_thr_t* thr); /* in: query thread */
-/***************************************************************
-Inserts a row to a table. This is a high-level function used in
-SQL execution graphs. */
-
-que_thr_t*
-row_ins_step(
-/*=========*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr); /* in: query thread */
-
-/* Insert node structure */
-
-struct ins_node_struct{
- que_common_t common; /* node type: QUE_NODE_INSERT */
- ulint ins_type;/* INS_VALUES, INS_SEARCHED, or INS_DIRECT */
- dtuple_t* row; /* row to insert */
- dict_table_t* table; /* table where to insert */
- sel_node_t* select; /* select in searched insert */
- que_node_t* values_list;/* list of expressions to evaluate and
- insert in an INS_VALUES insert */
- ulint state; /* node execution state */
- dict_index_t* index; /* NULL, or the next index where the index
- entry should be inserted */
- dtuple_t* entry; /* NULL, or entry to insert in the index;
- after a successful insert of the entry,
- this should be reset to NULL */
- UT_LIST_BASE_NODE_T(dtuple_t)
- entry_list;/* list of entries, one for each index */
- byte* row_id_buf;/* buffer for the row id sys field in row */
- dulint trx_id; /* trx id or the last trx which executed the
- node */
- byte* trx_id_buf;/* buffer for the trx id sys field in row */
- mem_heap_t* entry_sys_heap;
- /* memory heap used as auxiliary storage;
- entry_list and sys fields are stored here;
- if this is NULL, entry list should be created
- and buffers for sys fields in row allocated */
- ulint magic_n;
-};
-
-#define INS_NODE_MAGIC_N 15849075
-
-/* Insert node types */
-#define INS_SEARCHED 0 /* INSERT INTO ... SELECT ... */
-#define INS_VALUES 1 /* INSERT INTO ... VALUES ... */
-#define INS_DIRECT 2 /* this is for internal use in dict0crea:
- insert the row directly */
-
-/* Node execution states */
-#define INS_NODE_SET_IX_LOCK 1 /* we should set an IX lock on table */
-#define INS_NODE_ALLOC_ROW_ID 2 /* row id should be allocated */
-#define INS_NODE_INSERT_ENTRIES 3 /* index entries should be built and
- inserted */
-
-#ifndef UNIV_NONINL
-#include "row0ins.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/row0ins.ic b/storage/innobase/include/row0ins.ic
deleted file mode 100644
index 80a232d41ee..00000000000
--- a/storage/innobase/include/row0ins.ic
+++ /dev/null
@@ -1,9 +0,0 @@
-/******************************************************
-Insert into a table
-
-(c) 1996 Innobase Oy
-
-Created 4/20/1996 Heikki Tuuri
-*******************************************************/
-
-
diff --git a/storage/innobase/include/row0mysql.h b/storage/innobase/include/row0mysql.h
deleted file mode 100644
index 5430190fa51..00000000000
--- a/storage/innobase/include/row0mysql.h
+++ /dev/null
@@ -1,743 +0,0 @@
-/******************************************************
-Interface between Innobase row operations and MySQL.
-Contains also create table and other data dictionary operations.
-
-(c) 2000 Innobase Oy
-
-Created 9/17/2000 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0mysql_h
-#define row0mysql_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "que0types.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "row0types.h"
-#include "btr0pcur.h"
-#include "trx0types.h"
-
-extern ibool row_rollback_on_timeout;
-
-typedef struct row_prebuilt_struct row_prebuilt_t;
-
-/***********************************************************************
-Frees the blob heap in prebuilt when no longer needed. */
-
-void
-row_mysql_prebuilt_free_blob_heap(
-/*==============================*/
- row_prebuilt_t* prebuilt); /* in: prebuilt struct of a
- ha_innobase:: table handle */
-/***********************************************************************
-Stores a >= 5.0.3 format true VARCHAR length to dest, in the MySQL row
-format. */
-
-byte*
-row_mysql_store_true_var_len(
-/*=========================*/
- /* out: pointer to the data, we skip the 1 or 2 bytes
- at the start that are used to store the len */
- byte* dest, /* in: where to store */
- ulint len, /* in: length, must fit in two bytes */
- ulint lenlen);/* in: storage length of len: either 1 or 2 bytes */
-/***********************************************************************
-Reads a >= 5.0.3 format true VARCHAR length, in the MySQL row format, and
-returns a pointer to the data. */
-
-byte*
-row_mysql_read_true_varchar(
-/*========================*/
- /* out: pointer to the data, we skip the 1 or 2 bytes
- at the start that are used to store the len */
- ulint* len, /* out: variable-length field length */
- byte* field, /* in: field in the MySQL format */
- ulint lenlen);/* in: storage length of len: either 1 or 2 bytes */
-/***********************************************************************
-Stores a reference to a BLOB in the MySQL format. */
-
-void
-row_mysql_store_blob_ref(
-/*=====================*/
- byte* dest, /* in: where to store */
- ulint col_len, /* in: dest buffer size: determines into
- how many bytes the BLOB length is stored,
- this may vary from 1 to 4 bytes */
- byte* data, /* in: BLOB data */
- ulint len); /* in: BLOB length */
-/***********************************************************************
-Reads a reference to a BLOB in the MySQL format. */
-
-byte*
-row_mysql_read_blob_ref(
-/*====================*/
- /* out: pointer to BLOB data */
- ulint* len, /* out: BLOB length */
- byte* ref, /* in: BLOB reference in the MySQL format */
- ulint col_len); /* in: BLOB reference length (not BLOB
- length) */
-/******************************************************************
-Stores a non-SQL-NULL field given in the MySQL format in the InnoDB format.
-The counterpart of this function is row_sel_field_store_in_mysql_format() in
-row0sel.c. */
-
-byte*
-row_mysql_store_col_in_innobase_format(
-/*===================================*/
- /* out: up to which byte we used
- buf in the conversion */
- dfield_t* dfield, /* in/out: dfield where dtype
- information must be already set when
- this function is called! */
- byte* buf, /* in/out: buffer for a converted
- integer value; this must be at least
- col_len long then! */
- ibool row_format_col, /* TRUE if the mysql_data is from
- a MySQL row, FALSE if from a MySQL
- key value;
- in MySQL, a true VARCHAR storage
- format differs in a row and in a
- key value: in a key value the length
- is always stored in 2 bytes! */
- byte* mysql_data, /* in: MySQL column value, not
- SQL NULL; NOTE that dfield may also
- get a pointer to mysql_data,
- therefore do not discard this as long
- as dfield is used! */
- ulint col_len, /* in: MySQL column length; NOTE that
- this is the storage length of the
- column in the MySQL format row, not
- necessarily the length of the actual
- payload data; if the column is a true
- VARCHAR then this is irrelevant */
- ulint comp); /* in: nonzero=compact format */
-/********************************************************************
-Handles user errors and lock waits detected by the database engine. */
-
-ibool
-row_mysql_handle_errors(
-/*====================*/
- /* out: TRUE if it was a lock wait and
- we should continue running the query thread */
- ulint* new_err,/* out: possible new error encountered in
- rollback, or the old error which was
- during the function entry */
- trx_t* trx, /* in: transaction */
- que_thr_t* thr, /* in: query thread */
- trx_savept_t* savept);/* in: savepoint */
-/************************************************************************
-Create a prebuilt struct for a MySQL table handle. */
-
-row_prebuilt_t*
-row_create_prebuilt(
-/*================*/
- /* out, own: a prebuilt struct */
- dict_table_t* table); /* in: Innobase table handle */
-/************************************************************************
-Free a prebuilt struct for a MySQL table handle. */
-
-void
-row_prebuilt_free(
-/*==============*/
- row_prebuilt_t* prebuilt); /* in, own: prebuilt struct */
-/*************************************************************************
-Updates the transaction pointers in query graphs stored in the prebuilt
-struct. */
-
-void
-row_update_prebuilt_trx(
-/*====================*/
- /* out: prebuilt dtuple */
- row_prebuilt_t* prebuilt, /* in: prebuilt struct in MySQL
- handle */
- trx_t* trx); /* in: transaction handle */
-/*************************************************************************
-Unlocks an AUTO_INC type lock possibly reserved by trx. */
-
-void
-row_unlock_table_autoinc_for_mysql(
-/*===============================*/
- trx_t* trx); /* in: transaction */
-/*************************************************************************
-Sets an AUTO_INC type lock on the table mentioned in prebuilt. The
-AUTO_INC lock gives exclusive access to the auto-inc counter of the
-table. The lock is reserved only for the duration of an SQL statement.
-It is not compatible with another AUTO_INC or exclusive lock on the
-table. */
-
-int
-row_lock_table_autoinc_for_mysql(
-/*=============================*/
- /* out: error code or DB_SUCCESS */
- row_prebuilt_t* prebuilt); /* in: prebuilt struct in the MySQL
- table handle */
-/*************************************************************************
-Sets a table lock on the table mentioned in prebuilt. */
-
-int
-row_lock_table_for_mysql(
-/*=====================*/
- /* out: error code or DB_SUCCESS */
- row_prebuilt_t* prebuilt, /* in: prebuilt struct in the MySQL
- table handle */
- dict_table_t* table, /* in: table to lock, or NULL
- if prebuilt->table should be
- locked as
- prebuilt->select_lock_type */
- ulint mode); /* in: lock mode of table
- (ignored if table==NULL) */
-
-/*************************************************************************
-Does an insert for MySQL. */
-
-int
-row_insert_for_mysql(
-/*=================*/
- /* out: error code or DB_SUCCESS */
- byte* mysql_rec, /* in: row in the MySQL format */
- row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL
- handle */
-/*************************************************************************
-Builds a dummy query graph used in selects. */
-
-void
-row_prebuild_sel_graph(
-/*===================*/
- row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL
- handle */
-/*************************************************************************
-Gets pointer to a prebuilt update vector used in updates. If the update
-graph has not yet been built in the prebuilt struct, then this function
-first builds it. */
-
-upd_t*
-row_get_prebuilt_update_vector(
-/*===========================*/
- /* out: prebuilt update vector */
- row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL
- handle */
-/*************************************************************************
-Checks if a table is such that we automatically created a clustered
-index on it (on row id). */
-
-ibool
-row_table_got_default_clust_index(
-/*==============================*/
- dict_table_t* table);
-/*************************************************************************
-Calculates the key number used inside MySQL for an Innobase index. We have
-to take into account if we generated a default clustered index for the table */
-
-ulint
-row_get_mysql_key_number_for_index(
-/*===============================*/
- dict_index_t* index);
-/*************************************************************************
-Does an update or delete of a row for MySQL. */
-
-int
-row_update_for_mysql(
-/*=================*/
- /* out: error code or DB_SUCCESS */
- byte* mysql_rec, /* in: the row to be updated, in
- the MySQL format */
- row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL
- handle */
-/*************************************************************************
-This can only be used when srv_locks_unsafe_for_binlog is TRUE or
-session is using a READ COMMITTED isolation level. Before
-calling this function we must use trx_reset_new_rec_lock_info() and
-trx_register_new_rec_lock() to store the information which new record locks
-really were set. This function removes a newly set lock under prebuilt->pcur,
-and also under prebuilt->clust_pcur. Currently, this is only used and tested
-in the case of an UPDATE or a DELETE statement, where the row lock is of the
-LOCK_X type.
-Thus, this implements a 'mini-rollback' that releases the latest record
-locks we set. */
-
-int
-row_unlock_for_mysql(
-/*=================*/
- /* out: error code or DB_SUCCESS */
- row_prebuilt_t* prebuilt, /* in: prebuilt struct in MySQL
- handle */
- ibool has_latches_on_recs);/* TRUE if called so that we have
- the latches on the records under pcur
- and clust_pcur, and we do not need to
- reposition the cursors. */
-/*************************************************************************
-Creates an query graph node of 'update' type to be used in the MySQL
-interface. */
-
-upd_node_t*
-row_create_update_node_for_mysql(
-/*=============================*/
- /* out, own: update node */
- dict_table_t* table, /* in: table to update */
- mem_heap_t* heap); /* in: mem heap from which allocated */
-/**************************************************************************
-Does a cascaded delete or set null in a foreign key operation. */
-
-ulint
-row_update_cascade_for_mysql(
-/*=========================*/
- /* out: error code or DB_SUCCESS */
- que_thr_t* thr, /* in: query thread */
- upd_node_t* node, /* in: update node used in the cascade
- or set null operation */
- dict_table_t* table); /* in: table where we do the operation */
-/*************************************************************************
-Locks the data dictionary exclusively for performing a table create or other
-data dictionary modification operation. */
-
-void
-row_mysql_lock_data_dictionary(
-/*===========================*/
- trx_t* trx); /* in: transaction */
-/*************************************************************************
-Unlocks the data dictionary exclusive lock. */
-
-void
-row_mysql_unlock_data_dictionary(
-/*=============================*/
- trx_t* trx); /* in: transaction */
-/*************************************************************************
-Locks the data dictionary in shared mode from modifications, for performing
-foreign key check, rollback, or other operation invisible to MySQL. */
-
-void
-row_mysql_freeze_data_dictionary(
-/*=============================*/
- trx_t* trx); /* in: transaction */
-/*************************************************************************
-Unlocks the data dictionary shared lock. */
-
-void
-row_mysql_unfreeze_data_dictionary(
-/*===============================*/
- trx_t* trx); /* in: transaction */
-/*************************************************************************
-Creates a table for MySQL. If the name of the table ends in
-one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
-"innodb_table_monitor", then this will also start the printing of monitor
-output by the master thread. If the table name ends in "innodb_mem_validate",
-InnoDB will try to invoke mem_validate(). */
-
-int
-row_create_table_for_mysql(
-/*=======================*/
- /* out: error code or DB_SUCCESS */
- dict_table_t* table, /* in: table definition */
- trx_t* trx); /* in: transaction handle */
-/*************************************************************************
-Does an index creation operation for MySQL. TODO: currently failure
-to create an index results in dropping the whole table! This is no problem
-currently as all indexes must be created at the same time as the table. */
-
-int
-row_create_index_for_mysql(
-/*=======================*/
- /* out: error number or DB_SUCCESS */
- dict_index_t* index, /* in: index definition */
- trx_t* trx, /* in: transaction handle */
- const ulint* field_lengths); /* in: if not NULL, must contain
- dict_index_get_n_fields(index)
- actual field lengths for the
- index columns, which are
- then checked for not being too
- large. */
-/*************************************************************************
-Scans a table create SQL string and adds to the data dictionary
-the foreign key constraints declared in the string. This function
-should be called after the indexes for a table have been created.
-Each foreign key constraint must be accompanied with indexes in
-bot participating tables. The indexes are allowed to contain more
-fields than mentioned in the constraint. */
-
-int
-row_table_add_foreign_constraints(
-/*==============================*/
- /* out: error code or DB_SUCCESS */
- trx_t* trx, /* in: transaction */
- const char* sql_string, /* in: table create statement where
- foreign keys are declared like:
- FOREIGN KEY (a, b) REFERENCES table2(c, d),
- table2 can be written also with the
- database name before it: test.table2 */
- const char* name, /* in: table full name in the
- normalized form
- database_name/table_name */
- ibool reject_fks); /* in: if TRUE, fail with error
- code DB_CANNOT_ADD_CONSTRAINT if
- any foreign keys are found. */
-
-/*************************************************************************
-The master thread in srv0srv.c calls this regularly to drop tables which
-we must drop in background after queries to them have ended. Such lazy
-dropping of tables is needed in ALTER TABLE on Unix. */
-
-ulint
-row_drop_tables_for_mysql_in_background(void);
-/*=========================================*/
- /* out: how many tables dropped
- + remaining tables in list */
-/*************************************************************************
-Get the background drop list length. NOTE: the caller must own the kernel
-mutex! */
-
-ulint
-row_get_background_drop_list_len_low(void);
-/*======================================*/
- /* out: how many tables in list */
-/*************************************************************************
-Truncates a table for MySQL. */
-
-int
-row_truncate_table_for_mysql(
-/*=========================*/
- /* out: error code or DB_SUCCESS */
- dict_table_t* table, /* in: table handle */
- trx_t* trx); /* in: transaction handle */
-/*************************************************************************
-Drops a table for MySQL. If the name of the dropped table ends in
-one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
-"innodb_table_monitor", then this will also stop the printing of monitor
-output by the master thread. */
-
-int
-row_drop_table_for_mysql(
-/*=====================*/
- /* out: error code or DB_SUCCESS */
- const char* name, /* in: table name */
- trx_t* trx, /* in: transaction handle */
- ibool drop_db);/* in: TRUE=dropping whole database */
-
-/*************************************************************************
-Discards the tablespace of a table which stored in an .ibd file. Discarding
-means that this function deletes the .ibd file and assigns a new table id for
-the table. Also the flag table->ibd_file_missing is set TRUE. */
-
-int
-row_discard_tablespace_for_mysql(
-/*=============================*/
- /* out: error code or DB_SUCCESS */
- const char* name, /* in: table name */
- trx_t* trx); /* in: transaction handle */
-/*********************************************************************
-Imports a tablespace. The space id in the .ibd file must match the space id
-of the table in the data dictionary. */
-
-int
-row_import_tablespace_for_mysql(
-/*============================*/
- /* out: error code or DB_SUCCESS */
- const char* name, /* in: table name */
- trx_t* trx); /* in: transaction handle */
-/*************************************************************************
-Drops a database for MySQL. */
-
-int
-row_drop_database_for_mysql(
-/*========================*/
- /* out: error code or DB_SUCCESS */
- const char* name, /* in: database name which ends to '/' */
- trx_t* trx); /* in: transaction handle */
-/*************************************************************************
-Renames a table for MySQL. */
-
-int
-row_rename_table_for_mysql(
-/*=======================*/
- /* out: error code or DB_SUCCESS */
- const char* old_name, /* in: old table name */
- const char* new_name, /* in: new table name */
- trx_t* trx); /* in: transaction handle */
-/*************************************************************************
-Checks a table for corruption. */
-
-ulint
-row_check_table_for_mysql(
-/*======================*/
- /* out: DB_ERROR or DB_SUCCESS */
- row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL
- handle */
-
-/*************************************************************************
-Determines if a table is a magic monitor table. */
-
-ibool
-row_is_magic_monitor_table(
-/*=======================*/
- /* out: TRUE if monitor table */
- const char* table_name); /* in: name of the table, in the
- form database/table_name */
-
-/* A struct describing a place for an individual column in the MySQL
-row format which is presented to the table handler in ha_innobase.
-This template struct is used to speed up row transformations between
-Innobase and MySQL. */
-
-typedef struct mysql_row_templ_struct mysql_row_templ_t;
-struct mysql_row_templ_struct {
- ulint col_no; /* column number of the column */
- ulint rec_field_no; /* field number of the column in an
- Innobase record in the current index;
- not defined if template_type is
- ROW_MYSQL_WHOLE_ROW */
- ulint mysql_col_offset; /* offset of the column in the MySQL
- row format */
- ulint mysql_col_len; /* length of the column in the MySQL
- row format */
- ulint mysql_null_byte_offset; /* MySQL NULL bit byte offset in a
- MySQL record */
- ulint mysql_null_bit_mask; /* bit mask to get the NULL bit,
- zero if column cannot be NULL */
- ulint type; /* column type in Innobase mtype
- numbers DATA_CHAR... */
- ulint mysql_type; /* MySQL type code; this is always
- < 256 */
- ulint mysql_length_bytes; /* if mysql_type
- == DATA_MYSQL_TRUE_VARCHAR, this tells
- whether we should use 1 or 2 bytes to
- store the MySQL true VARCHAR data
- length at the start of row in the MySQL
- format (NOTE that the MySQL key value
- format always uses 2 bytes for the data
- len) */
- ulint charset; /* MySQL charset-collation code
- of the column, or zero */
- ulint mbminlen; /* minimum length of a char, in bytes,
- or zero if not a char type */
- ulint mbmaxlen; /* maximum length of a char, in bytes,
- or zero if not a char type */
- ulint is_unsigned; /* if a column type is an integer
- type and this field is != 0, then
- it is an unsigned integer type */
-};
-
-#define MYSQL_FETCH_CACHE_SIZE 8
-/* After fetching this many rows, we start caching them in fetch_cache */
-#define MYSQL_FETCH_CACHE_THRESHOLD 4
-
-#define ROW_PREBUILT_ALLOCATED 78540783
-#define ROW_PREBUILT_FREED 26423527
-
-/* A struct for (sometimes lazily) prebuilt structures in an Innobase table
-handle used within MySQL; these are used to save CPU time. */
-
-struct row_prebuilt_struct {
- ulint magic_n; /* this magic number is set to
- ROW_PREBUILT_ALLOCATED when created
- and to ROW_PREBUILT_FREED when the
- struct has been freed; used in
- debugging */
- dict_table_t* table; /* Innobase table handle */
- trx_t* trx; /* current transaction handle */
- ibool sql_stat_start; /* TRUE when we start processing of
- an SQL statement: we may have to set
- an intention lock on the table,
- create a consistent read view etc. */
- ibool mysql_has_locked; /* this is set TRUE when MySQL
- calls external_lock on this handle
- with a lock flag, and set FALSE when
- with the F_UNLOCK flag */
- ibool clust_index_was_generated;
- /* if the user did not define a
- primary key in MySQL, then Innobase
- automatically generated a clustered
- index where the ordering column is
- the row id: in this case this flag
- is set to TRUE */
- dict_index_t* index; /* current index for a search, if
- any */
- ulint read_just_key; /* set to 1 when MySQL calls
- ha_innobase::extra with the
- argument HA_EXTRA_KEYREAD; it is enough
- to read just columns defined in
- the index (i.e., no read of the
- clustered index record necessary) */
- ibool used_in_HANDLER;/* TRUE if we have been using this
- handle in a MySQL HANDLER low level
- index cursor command: then we must
- store the pcur position even in a
- unique search from a clustered index,
- because HANDLER allows NEXT and PREV
- in such a situation */
- ulint template_type; /* ROW_MYSQL_WHOLE_ROW,
- ROW_MYSQL_REC_FIELDS,
- ROW_MYSQL_DUMMY_TEMPLATE, or
- ROW_MYSQL_NO_TEMPLATE */
- ulint n_template; /* number of elements in the
- template */
- ulint null_bitmap_len;/* number of bytes in the SQL NULL
- bitmap at the start of a row in the
- MySQL format */
- ibool need_to_access_clustered; /* if we are fetching
- columns through a secondary index
- and at least one column is not in
- the secondary index, then this is
- set to TRUE */
- ibool templ_contains_blob;/* TRUE if the template contains
- BLOB column(s) */
- mysql_row_templ_t* mysql_template;/* template used to transform
- rows fast between MySQL and Innobase
- formats; memory for this template
- is not allocated from 'heap' */
- mem_heap_t* heap; /* memory heap from which
- these auxiliary structures are
- allocated when needed */
- ins_node_t* ins_node; /* Innobase SQL insert node
- used to perform inserts
- to the table */
- byte* ins_upd_rec_buff;/* buffer for storing data converted
- to the Innobase format from the MySQL
- format */
- const byte* default_rec; /* the default values of all columns
- (a "default row") in MySQL format */
- ulint hint_need_to_fetch_extra_cols;
- /* normally this is set to 0; if this
- is set to ROW_RETRIEVE_PRIMARY_KEY,
- then we should at least retrieve all
- columns in the primary key; if this
- is set to ROW_RETRIEVE_ALL_COLS, then
- we must retrieve all columns in the
- key (if read_just_key == 1), or all
- columns in the table */
- upd_node_t* upd_node; /* Innobase SQL update node used
- to perform updates and deletes */
- que_fork_t* ins_graph; /* Innobase SQL query graph used
- in inserts */
- que_fork_t* upd_graph; /* Innobase SQL query graph used
- in updates or deletes */
- btr_pcur_t* pcur; /* persistent cursor used in selects
- and updates */
- btr_pcur_t* clust_pcur; /* persistent cursor used in
- some selects and updates */
- que_fork_t* sel_graph; /* dummy query graph used in
- selects */
- dtuple_t* search_tuple; /* prebuilt dtuple used in selects */
- byte row_id[DATA_ROW_ID_LEN];
- /* if the clustered index was
- generated, the row id of the
- last row fetched is stored
- here */
- dtuple_t* clust_ref; /* prebuilt dtuple used in
- sel/upd/del */
- ulint select_lock_type;/* LOCK_NONE, LOCK_S, or LOCK_X */
- ulint stored_select_lock_type;/* this field is used to
- remember the original select_lock_type
- that was decided in ha_innodb.cc,
- ::store_lock(), ::external_lock(),
- etc. */
- ulint row_read_type; /* ROW_READ_WITH_LOCKS if row locks
- should be the obtained for records
- under an UPDATE or DELETE cursor.
- If innodb_locks_unsafe_for_binlog
- is TRUE, this can be set to
- ROW_READ_TRY_SEMI_CONSISTENT, so that
- if the row under an UPDATE or DELETE
- cursor was locked by another
- transaction, InnoDB will resort
- to reading the last committed value
- ('semi-consistent read'). Then,
- this field will be set to
- ROW_READ_DID_SEMI_CONSISTENT to
- indicate that. If the row does not
- match the WHERE condition, MySQL will
- invoke handler::unlock_row() to
- clear the flag back to
- ROW_READ_TRY_SEMI_CONSISTENT and
- to simply skip the row. If
- the row matches, the next call to
- row_search_for_mysql() will lock
- the row.
- This eliminates lock waits in some
- cases; note that this breaks
- serializability. */
- ulint new_rec_locks; /* normally 0; if
- srv_locks_unsafe_for_binlog is
- TRUE or session is using READ
- COMMITTED isolation level, in a
- cursor search, if we set a new
- record lock on an index, this is
- incremented; this is used in
- releasing the locks under the
- cursors if we are performing an
- UPDATE and we determine after
- retrieving the row that it does
- not need to be locked; thus,
- these can be used to implement a
- 'mini-rollback' that releases
- the latest record locks */
- ulint mysql_prefix_len;/* byte offset of the end of
- the last requested column */
- ulint mysql_row_len; /* length in bytes of a row in the
- MySQL format */
- ulint n_rows_fetched; /* number of rows fetched after
- positioning the current cursor */
- ulint fetch_direction;/* ROW_SEL_NEXT or ROW_SEL_PREV */
- byte* fetch_cache[MYSQL_FETCH_CACHE_SIZE];
- /* a cache for fetched rows if we
- fetch many rows from the same cursor:
- it saves CPU time to fetch them in a
- batch; we reserve mysql_row_len
- bytes for each such row; these
- pointers point 4 bytes past the
- allocated mem buf start, because
- there is a 4 byte magic number at the
- start and at the end */
- ibool keep_other_fields_on_keyread; /* when using fetch
- cache with HA_EXTRA_KEYREAD, don't
- overwrite other fields in mysql row
- row buffer.*/
- ulint fetch_cache_first;/* position of the first not yet
- fetched row in fetch_cache */
- ulint n_fetch_cached; /* number of not yet fetched rows
- in fetch_cache */
- mem_heap_t* blob_heap; /* in SELECTS BLOB fields are copied
- to this heap */
- mem_heap_t* old_vers_heap; /* memory heap where a previous
- version is built in consistent read */
- /*----------------------*/
- ulonglong autoinc_last_value;/* last value of AUTO-INC interval */
- ulonglong autoinc_increment;/* The increment step of the auto
- increment column. Value must be
- greater than or equal to 1. Required to
- calculate the next value */
- ulonglong autoinc_offset; /* The offset passed to
- get_auto_increment() by MySQL. Required
- to calculate the next value */
- ulint autoinc_error; /* The actual error code encountered
- while trying to init or read the
- autoinc value from the table. We
- store it here so that we can return
- it to MySQL */
- /*----------------------*/
- ulint magic_n2; /* this should be the same as
- magic_n */
-};
-
-#define ROW_PREBUILT_FETCH_MAGIC_N 465765687
-
-#define ROW_MYSQL_WHOLE_ROW 0
-#define ROW_MYSQL_REC_FIELDS 1
-#define ROW_MYSQL_NO_TEMPLATE 2
-#define ROW_MYSQL_DUMMY_TEMPLATE 3 /* dummy template used in
- row_scan_and_check_index */
-
-/* Values for hint_need_to_fetch_extra_cols */
-#define ROW_RETRIEVE_PRIMARY_KEY 1
-#define ROW_RETRIEVE_ALL_COLS 2
-
-/* Values for row_read_type */
-#define ROW_READ_WITH_LOCKS 0
-#define ROW_READ_TRY_SEMI_CONSISTENT 1
-#define ROW_READ_DID_SEMI_CONSISTENT 2
-
-#ifndef UNIV_NONINL
-#include "row0mysql.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/row0mysql.ic b/storage/innobase/include/row0mysql.ic
deleted file mode 100644
index aa8a70d8761..00000000000
--- a/storage/innobase/include/row0mysql.ic
+++ /dev/null
@@ -1,7 +0,0 @@
-/******************************************************
-MySQL interface for Innobase
-
-(C) 2001 Innobase Oy
-
-Created 1/23/2001 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innobase/include/row0purge.h b/storage/innobase/include/row0purge.h
deleted file mode 100644
index 174dd239eb5..00000000000
--- a/storage/innobase/include/row0purge.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/******************************************************
-Purge obsolete records
-
-(c) 1997 Innobase Oy
-
-Created 3/14/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0purge_h
-#define row0purge_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "btr0types.h"
-#include "btr0pcur.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "que0types.h"
-#include "row0types.h"
-
-/************************************************************************
-Creates a purge node to a query graph. */
-
-purge_node_t*
-row_purge_node_create(
-/*==================*/
- /* out, own: purge node */
- que_thr_t* parent, /* in: parent node, i.e., a thr node */
- mem_heap_t* heap); /* in: memory heap where created */
-/***************************************************************
-Does the purge operation for a single undo log record. This is a high-level
-function used in an SQL execution graph. */
-
-que_thr_t*
-row_purge_step(
-/*===========*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr); /* in: query thread */
-
-/* Purge node structure */
-
-struct purge_node_struct{
- que_common_t common; /* node type: QUE_NODE_PURGE */
- /*----------------------*/
- /* Local storage for this graph node */
- dulint roll_ptr;/* roll pointer to undo log record */
- trx_undo_rec_t* undo_rec;/* undo log record */
- trx_undo_inf_t* reservation;/* reservation for the undo log record in
- the purge array */
- dulint undo_no;/* undo number of the record */
- ulint rec_type;/* undo log record type: TRX_UNDO_INSERT_REC,
- ... */
- btr_pcur_t pcur; /* persistent cursor used in searching the
- clustered index record */
- ibool found_clust;/* TRUE if the clustered index record
- determined by ref was found in the clustered
- index, and we were able to position pcur on
- it */
- dict_table_t* table; /* table where purge is done */
- ulint cmpl_info;/* compiler analysis info of an update */
- upd_t* update; /* update vector for a clustered index
- record */
- dtuple_t* ref; /* NULL, or row reference to the next row to
- handle */
- dtuple_t* row; /* NULL, or a copy (also fields copied to
- heap) of the indexed fields of the row to
- handle */
- dict_index_t* index; /* NULL, or the next index whose record should
- be handled */
- mem_heap_t* heap; /* memory heap used as auxiliary storage for
- row; this must be emptied after a successful
- purge of a row */
-};
-
-#ifndef UNIV_NONINL
-#include "row0purge.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/row0purge.ic b/storage/innobase/include/row0purge.ic
deleted file mode 100644
index 50aabf0bc1b..00000000000
--- a/storage/innobase/include/row0purge.ic
+++ /dev/null
@@ -1,8 +0,0 @@
-
-/******************************************************
-Purge obsolete records
-
-(c) 1997 Innobase Oy
-
-Created 3/14/1997 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innobase/include/row0row.h b/storage/innobase/include/row0row.h
deleted file mode 100644
index bea7627cd86..00000000000
--- a/storage/innobase/include/row0row.h
+++ /dev/null
@@ -1,250 +0,0 @@
-/******************************************************
-General row routines
-
-(c) 1996 Innobase Oy
-
-Created 4/20/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0row_h
-#define row0row_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "que0types.h"
-#include "mtr0mtr.h"
-#include "rem0types.h"
-#include "read0types.h"
-#include "btr0types.h"
-
-/*************************************************************************
-Reads the trx id field from a clustered index record. */
-UNIV_INLINE
-dulint
-row_get_rec_trx_id(
-/*===============*/
- /* out: value of the field */
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets);/* in: rec_get_offsets(rec, index) */
-/*************************************************************************
-Reads the roll pointer field from a clustered index record. */
-UNIV_INLINE
-dulint
-row_get_rec_roll_ptr(
-/*=================*/
- /* out: value of the field */
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets);/* in: rec_get_offsets(rec, index) */
-/*************************************************************************
-Writes the trx id field to a clustered index record. */
-UNIV_INLINE
-void
-row_set_rec_trx_id(
-/*===============*/
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- dulint trx_id);/* in: value of the field */
-/*************************************************************************
-Sets the roll pointer field in a clustered index record. */
-UNIV_INLINE
-void
-row_set_rec_roll_ptr(
-/*=================*/
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- dulint roll_ptr);/* in: value of the field */
-/*********************************************************************
-When an insert to a table is performed, this function builds the entry which
-has to be inserted to an index on the table. */
-
-dtuple_t*
-row_build_index_entry(
-/*==================*/
- /* out: index entry which should be inserted */
- dtuple_t* row, /* in: row which should be inserted to the
- table */
- dict_index_t* index, /* in: index on the table */
- mem_heap_t* heap); /* in: memory heap from which the memory for
- the index entry is allocated */
-/***********************************************************************
-An inverse function to dict_row_build_index_entry. Builds a row from a
-record in a clustered index. */
-
-dtuple_t*
-row_build(
-/*======*/
- /* out, own: row built; see the NOTE below! */
- ulint type, /* in: ROW_COPY_POINTERS or ROW_COPY_DATA;
- the latter copies also the data fields to
- heap while the first only places pointers to
- data fields on the index page, and thus is
- more efficient */
- dict_index_t* index, /* in: clustered index */
- rec_t* rec, /* in: record in the clustered index;
- NOTE: in the case ROW_COPY_POINTERS
- the data fields in the row will point
- directly into this record, therefore,
- the buffer page of this record must be
- at least s-latched and the latch held
- as long as the row dtuple is used! */
- const ulint* offsets,/* in: rec_get_offsets(rec, index)
- or NULL, in which case this function
- will invoke rec_get_offsets() */
- mem_heap_t* heap); /* in: memory heap from which the memory
- needed is allocated */
-/***********************************************************************
-Converts an index record to a typed data tuple. */
-
-dtuple_t*
-row_rec_to_index_entry(
-/*===================*/
- /* out, own: index entry built; see the
- NOTE below! */
- ulint type, /* in: ROW_COPY_DATA, or ROW_COPY_POINTERS:
- the former copies also the data fields to
- heap as the latter only places pointers to
- data fields on the index page */
- dict_index_t* index, /* in: index */
- rec_t* rec, /* in: record in the index;
- NOTE: in the case ROW_COPY_POINTERS
- the data fields in the row will point
- directly into this record, therefore,
- the buffer page of this record must be
- at least s-latched and the latch held
- as long as the dtuple is used! */
- mem_heap_t* heap); /* in: memory heap from which the memory
- needed is allocated */
-/***********************************************************************
-Builds from a secondary index record a row reference with which we can
-search the clustered index record. */
-
-dtuple_t*
-row_build_row_ref(
-/*==============*/
- /* out, own: row reference built; see the
- NOTE below! */
- ulint type, /* in: ROW_COPY_DATA, or ROW_COPY_POINTERS:
- the former copies also the data fields to
- heap, whereas the latter only places pointers
- to data fields on the index page */
- dict_index_t* index, /* in: index */
- rec_t* rec, /* in: record in the index;
- NOTE: in the case ROW_COPY_POINTERS
- the data fields in the row will point
- directly into this record, therefore,
- the buffer page of this record must be
- at least s-latched and the latch held
- as long as the row reference is used! */
- mem_heap_t* heap); /* in: memory heap from which the memory
- needed is allocated */
-/***********************************************************************
-Builds from a secondary index record a row reference with which we can
-search the clustered index record. */
-
-void
-row_build_row_ref_in_tuple(
-/*=======================*/
- dtuple_t* ref, /* in/out: row reference built; see the
- NOTE below! */
- dict_index_t* index, /* in: index */
- rec_t* rec, /* in: record in the index;
- NOTE: the data fields in ref will point
- directly into this record, therefore,
- the buffer page of this record must be
- at least s-latched and the latch held
- as long as the row reference is used! */
- trx_t* trx); /* in: transaction */
-/***********************************************************************
-From a row build a row reference with which we can search the clustered
-index record. */
-
-void
-row_build_row_ref_from_row(
-/*=======================*/
- dtuple_t* ref, /* in/out: row reference built; see the
- NOTE below! ref must have the right number
- of fields! */
- dict_table_t* table, /* in: table */
- dtuple_t* row); /* in: row
- NOTE: the data fields in ref will point
- directly into data of this row */
-/***********************************************************************
-Builds from a secondary index record a row reference with which we can
-search the clustered index record. */
-UNIV_INLINE
-void
-row_build_row_ref_fast(
-/*===================*/
- dtuple_t* ref, /* in: typed data tuple where the
- reference is built */
- const ulint* map, /* in: array of field numbers in rec
- telling how ref should be built from
- the fields of rec */
- rec_t* rec, /* in: record in the index; must be
- preserved while ref is used, as we do
- not copy field values to heap */
- const ulint* offsets);/* in: array returned by rec_get_offsets() */
-/*******************************************************************
-Searches the clustered index record for a row, if we have the row
-reference. */
-
-ibool
-row_search_on_row_ref(
-/*==================*/
- /* out: TRUE if found */
- btr_pcur_t* pcur, /* in/out: persistent cursor, which must
- be closed by the caller */
- ulint mode, /* in: BTR_MODIFY_LEAF, ... */
- dict_table_t* table, /* in: table */
- dtuple_t* ref, /* in: row reference */
- mtr_t* mtr); /* in: mtr */
-/*************************************************************************
-Fetches the clustered index record for a secondary index record. The latches
-on the secondary index record are preserved. */
-
-rec_t*
-row_get_clust_rec(
-/*==============*/
- /* out: record or NULL, if no record found */
- ulint mode, /* in: BTR_MODIFY_LEAF, ... */
- rec_t* rec, /* in: record in a secondary index */
- dict_index_t* index, /* in: secondary index */
- dict_index_t** clust_index,/* out: clustered index */
- mtr_t* mtr); /* in: mtr */
-/*******************************************************************
-Searches an index record. */
-
-ibool
-row_search_index_entry(
-/*===================*/
- /* out: TRUE if found */
- dict_index_t* index, /* in: index */
- dtuple_t* entry, /* in: index entry */
- ulint mode, /* in: BTR_MODIFY_LEAF, ... */
- btr_pcur_t* pcur, /* in/out: persistent cursor, which must
- be closed by the caller */
- mtr_t* mtr); /* in: mtr */
-
-
-#define ROW_COPY_DATA 1
-#define ROW_COPY_POINTERS 2
-
-/* The allowed latching order of index records is the following:
-(1) a secondary index record ->
-(2) the clustered index record ->
-(3) rollback segment data for the clustered index record.
-
-No new latches may be obtained while the kernel mutex is reserved.
-However, the kernel mutex can be reserved while latches are owned. */
-
-#ifndef UNIV_NONINL
-#include "row0row.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/row0row.ic b/storage/innobase/include/row0row.ic
deleted file mode 100644
index de417f3d971..00000000000
--- a/storage/innobase/include/row0row.ic
+++ /dev/null
@@ -1,182 +0,0 @@
-/******************************************************
-General row routines
-
-(c) 1996 Innobase Oy
-
-Created 4/20/1996 Heikki Tuuri
-*******************************************************/
-
-#include "dict0dict.h"
-#include "rem0rec.h"
-#include "trx0undo.h"
-
-/*************************************************************************
-Reads the trx id or roll ptr field from a clustered index record: this function
-is slower than the specialized inline functions. */
-
-dulint
-row_get_rec_sys_field(
-/*==================*/
- /* out: value of the field */
- ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets);/* in: rec_get_offsets(rec, index) */
-/*************************************************************************
-Sets the trx id or roll ptr field in a clustered index record: this function
-is slower than the specialized inline functions. */
-
-void
-row_set_rec_sys_field(
-/*==================*/
- /* out: value of the field */
- ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- dulint val); /* in: value to set */
-
-/*************************************************************************
-Reads the trx id field from a clustered index record. */
-UNIV_INLINE
-dulint
-row_get_rec_trx_id(
-/*===============*/
- /* out: value of the field */
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets)/* in: rec_get_offsets(rec, index) */
-{
- ulint offset;
-
- ut_ad(index->type & DICT_CLUSTERED);
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- offset = index->trx_id_offset;
-
- if (offset) {
- return(trx_read_trx_id(rec + offset));
- } else {
- return(row_get_rec_sys_field(DATA_TRX_ID,
- rec, index, offsets));
- }
-}
-
-/*************************************************************************
-Reads the roll pointer field from a clustered index record. */
-UNIV_INLINE
-dulint
-row_get_rec_roll_ptr(
-/*=================*/
- /* out: value of the field */
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets)/* in: rec_get_offsets(rec, index) */
-{
- ulint offset;
-
- ut_ad(index->type & DICT_CLUSTERED);
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- offset = index->trx_id_offset;
-
- if (offset) {
- return(trx_read_roll_ptr(rec + offset + DATA_TRX_ID_LEN));
- } else {
- return(row_get_rec_sys_field(DATA_ROLL_PTR,
- rec, index, offsets));
- }
-}
-
-/*************************************************************************
-Writes the trx id field to a clustered index record. */
-UNIV_INLINE
-void
-row_set_rec_trx_id(
-/*===============*/
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- dulint trx_id) /* in: value of the field */
-{
- ulint offset;
-
- ut_ad(index->type & DICT_CLUSTERED);
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- offset = index->trx_id_offset;
-
- if (offset) {
- trx_write_trx_id(rec + offset, trx_id);
- } else {
- row_set_rec_sys_field(DATA_TRX_ID,
- rec, index, offsets, trx_id);
- }
-}
-
-/*************************************************************************
-Sets the roll pointer field in a clustered index record. */
-UNIV_INLINE
-void
-row_set_rec_roll_ptr(
-/*=================*/
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- dulint roll_ptr)/* in: value of the field */
-{
- ulint offset;
-
- ut_ad(index->type & DICT_CLUSTERED);
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- offset = index->trx_id_offset;
-
- if (offset) {
- trx_write_roll_ptr(rec + offset + DATA_TRX_ID_LEN, roll_ptr);
- } else {
- row_set_rec_sys_field(DATA_ROLL_PTR,
- rec, index, offsets, roll_ptr);
- }
-}
-
-/***********************************************************************
-Builds from a secondary index record a row reference with which we can
-search the clustered index record. */
-UNIV_INLINE
-void
-row_build_row_ref_fast(
-/*===================*/
- dtuple_t* ref, /* in: typed data tuple where the
- reference is built */
- const ulint* map, /* in: array of field numbers in rec
- telling how ref should be built from
- the fields of rec */
- rec_t* rec, /* in: record in the index; must be
- preserved while ref is used, as we do
- not copy field values to heap */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
-{
- dfield_t* dfield;
- byte* field;
- ulint len;
- ulint ref_len;
- ulint field_no;
- ulint i;
-
- ut_ad(rec_offs_validate(rec, NULL, offsets));
- ref_len = dtuple_get_n_fields(ref);
-
- for (i = 0; i < ref_len; i++) {
- dfield = dtuple_get_nth_field(ref, i);
-
- field_no = *(map + i);
-
- if (field_no != ULINT_UNDEFINED) {
-
- field = rec_get_nth_field(rec, offsets,
- field_no, &len);
- dfield_set_data(dfield, field, len);
- }
- }
-}
diff --git a/storage/innobase/include/row0sel.h b/storage/innobase/include/row0sel.h
deleted file mode 100644
index a0a4ccb973b..00000000000
--- a/storage/innobase/include/row0sel.h
+++ /dev/null
@@ -1,392 +0,0 @@
-/******************************************************
-Select
-
-(c) 1997 Innobase Oy
-
-Created 12/19/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0sel_h
-#define row0sel_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "que0types.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "row0types.h"
-#include "que0types.h"
-#include "pars0sym.h"
-#include "btr0pcur.h"
-#include "read0read.h"
-#include "row0mysql.h"
-
-/*************************************************************************
-Creates a select node struct. */
-
-sel_node_t*
-sel_node_create(
-/*============*/
- /* out, own: select node struct */
- mem_heap_t* heap); /* in: memory heap where created */
-/*************************************************************************
-Frees the memory private to a select node when a query graph is freed,
-does not free the heap where the node was originally created. */
-
-void
-sel_node_free_private(
-/*==================*/
- sel_node_t* node); /* in: select node struct */
-/*************************************************************************
-Frees a prefetch buffer for a column, including the dynamically allocated
-memory for data stored there. */
-
-void
-sel_col_prefetch_buf_free(
-/*======================*/
- sel_buf_t* prefetch_buf); /* in, own: prefetch buffer */
-/*************************************************************************
-Gets the plan node for the nth table in a join. */
-UNIV_INLINE
-plan_t*
-sel_node_get_nth_plan(
-/*==================*/
- sel_node_t* node,
- ulint i);
-/**************************************************************************
-Performs a select step. This is a high-level function used in SQL execution
-graphs. */
-
-que_thr_t*
-row_sel_step(
-/*=========*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr); /* in: query thread */
-/**************************************************************************
-Performs an execution step of an open or close cursor statement node. */
-UNIV_INLINE
-que_thr_t*
-open_step(
-/*======*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr); /* in: query thread */
-/**************************************************************************
-Performs a fetch for a cursor. */
-
-que_thr_t*
-fetch_step(
-/*=======*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr); /* in: query thread */
-/********************************************************************
-Sample callback function for fetch that prints each row.*/
-
-void*
-row_fetch_print(
-/*============*/
- /* out: always returns non-NULL */
- void* row, /* in: sel_node_t* */
- void* user_arg); /* in: not used */
-/********************************************************************
-Callback function for fetch that stores an unsigned 4 byte integer to the
-location pointed. The column's type must be DATA_INT, DATA_UNSIGNED, length
-= 4. */
-
-void*
-row_fetch_store_uint4(
-/*==================*/
- /* out: always returns NULL */
- void* row, /* in: sel_node_t* */
- void* user_arg); /* in: data pointer */
-/***************************************************************
-Prints a row in a select result. */
-
-que_thr_t*
-row_printf_step(
-/*============*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr); /* in: query thread */
-/********************************************************************
-Converts a key value stored in MySQL format to an Innobase dtuple. The last
-field of the key value may be just a prefix of a fixed length field: hence
-the parameter key_len. But currently we do not allow search keys where the
-last field is only a prefix of the full key field len and print a warning if
-such appears. */
-
-void
-row_sel_convert_mysql_key_to_innobase(
-/*==================================*/
- dtuple_t* tuple, /* in: tuple where to build;
- NOTE: we assume that the type info
- in the tuple is already according
- to index! */
- byte* buf, /* in: buffer to use in field
- conversions */
- ulint buf_len, /* in: buffer length */
- dict_index_t* index, /* in: index of the key value */
- byte* key_ptr, /* in: MySQL key value */
- ulint key_len, /* in: MySQL key value length */
- trx_t* trx); /* in: transaction */
-/************************************************************************
-Searches for rows in the database. This is used in the interface to
-MySQL. This function opens a cursor, and also implements fetch next
-and fetch prev. NOTE that if we do a search with a full key value
-from a unique index (ROW_SEL_EXACT), then we will not store the cursor
-position and fetch next or fetch prev must not be tried to the cursor! */
-
-ulint
-row_search_for_mysql(
-/*=================*/
- /* out: DB_SUCCESS,
- DB_RECORD_NOT_FOUND,
- DB_END_OF_INDEX, DB_DEADLOCK,
- DB_LOCK_TABLE_FULL,
- or DB_TOO_BIG_RECORD */
- byte* buf, /* in/out: buffer for the fetched
- row in the MySQL format */
- ulint mode, /* in: search mode PAGE_CUR_L, ... */
- row_prebuilt_t* prebuilt, /* in: prebuilt struct for the
- table handle; this contains the info
- of search_tuple, index; if search
- tuple contains 0 fields then we
- position the cursor at the start or
- the end of the index, depending on
- 'mode' */
- ulint match_mode, /* in: 0 or ROW_SEL_EXACT or
- ROW_SEL_EXACT_PREFIX */
- ulint direction); /* in: 0 or ROW_SEL_NEXT or
- ROW_SEL_PREV; NOTE: if this is != 0,
- then prebuilt must have a pcur
- with stored position! In opening of a
- cursor 'direction' should be 0. */
-/***********************************************************************
-Checks if MySQL at the moment is allowed for this table to retrieve a
-consistent read result, or store it to the query cache. */
-
-ibool
-row_search_check_if_query_cache_permitted(
-/*======================================*/
- /* out: TRUE if storing or retrieving
- from the query cache is permitted */
- trx_t* trx, /* in: transaction object */
- const char* norm_name); /* in: concatenation of database name,
- '/' char, table name */
-/***********************************************************************
-Read the max AUTOINC value from an index. */
-
-ulint
-row_search_max_autoinc(
-/*===================*/
- /* out: DB_SUCCESS if all OK else
- error code */
- dict_index_t* index, /* in: index to search */
- const char* col_name, /* in: autoinc column name */
- ib_ulonglong* value); /* out: AUTOINC value read */
-
-/* A structure for caching column values for prefetched rows */
-struct sel_buf_struct{
- byte* data; /* data, or NULL; if not NULL, this field
- has allocated memory which must be explicitly
- freed; can be != NULL even when len is
- UNIV_SQL_NULL */
- ulint len; /* data length or UNIV_SQL_NULL */
- ulint val_buf_size;
- /* size of memory buffer allocated for data:
- this can be more than len; this is defined
- when data != NULL */
-};
-
-struct plan_struct{
- dict_table_t* table; /* table struct in the dictionary
- cache */
- dict_index_t* index; /* table index used in the search */
- btr_pcur_t pcur; /* persistent cursor used to search
- the index */
- ibool asc; /* TRUE if cursor traveling upwards */
- ibool pcur_is_open; /* TRUE if pcur has been positioned
- and we can try to fetch new rows */
- ibool cursor_at_end; /* TRUE if the cursor is open but
- we know that there are no more
- qualifying rows left to retrieve from
- the index tree; NOTE though, that
- there may still be unprocessed rows in
- the prefetch stack; always FALSE when
- pcur_is_open is FALSE */
- ibool stored_cursor_rec_processed;
- /* TRUE if the pcur position has been
- stored and the record it is positioned
- on has already been processed */
- que_node_t** tuple_exps; /* array of expressions which are used
- to calculate the field values in the
- search tuple: there is one expression
- for each field in the search tuple */
- dtuple_t* tuple; /* search tuple */
- ulint mode; /* search mode: PAGE_CUR_G, ... */
- ulint n_exact_match; /* number of first fields in the search
- tuple which must be exactly matched */
- ibool unique_search; /* TRUE if we are searching an
- index record with a unique key */
- ulint n_rows_fetched; /* number of rows fetched using pcur
- after it was opened */
- ulint n_rows_prefetched;/* number of prefetched rows cached
- for fetch: fetching several rows in
- the same mtr saves CPU time */
- ulint first_prefetched;/* index of the first cached row in
- select buffer arrays for each column */
- ibool no_prefetch; /* no prefetch for this table */
- sym_node_list_t columns; /* symbol table nodes for the columns
- to retrieve from the table */
- UT_LIST_BASE_NODE_T(func_node_t)
- end_conds; /* conditions which determine the
- fetch limit of the index segment we
- have to look at: when one of these
- fails, the result set has been
- exhausted for the cursor in this
- index; these conditions are normalized
- so that in a comparison the column
- for this table is the first argument */
- UT_LIST_BASE_NODE_T(func_node_t)
- other_conds; /* the rest of search conditions we can
- test at this table in a join */
- ibool must_get_clust; /* TRUE if index is a non-clustered
- index and we must also fetch the
- clustered index record; this is the
- case if the non-clustered record does
- not contain all the needed columns, or
- if this is a single-table explicit
- cursor, or a searched update or
- delete */
- ulint* clust_map; /* map telling how clust_ref is built
- from the fields of a non-clustered
- record */
- dtuple_t* clust_ref; /* the reference to the clustered
- index entry is built here if index is
- a non-clustered index */
- btr_pcur_t clust_pcur; /* if index is non-clustered, we use
- this pcur to search the clustered
- index */
- mem_heap_t* old_vers_heap; /* memory heap used in building an old
- version of a row, or NULL */
-};
-
-struct sel_node_struct{
- que_common_t common; /* node type: QUE_NODE_SELECT */
- ulint state; /* node state */
- que_node_t* select_list; /* select list */
- sym_node_t* into_list; /* variables list or NULL */
- sym_node_t* table_list; /* table list */
- ibool asc; /* TRUE if the rows should be fetched
- in an ascending order */
- ibool set_x_locks; /* TRUE if the cursor is for update or
- delete, which means that a row x-lock
- should be placed on the cursor row */
- ibool select_will_do_update;
- /* TRUE if the select is for a searched
- update which can be performed in-place:
- in this case the select will take care
- of the update */
- ulint latch_mode; /* BTR_SEARCH_LEAF, or BTR_MODIFY_LEAF
- if select_will_do_update is TRUE */
- ulint row_lock_mode; /* LOCK_X or LOCK_S */
- ulint n_tables; /* number of tables */
- ulint fetch_table; /* number of the next table to access
- in the join */
- plan_t* plans; /* array of n_tables many plan nodes
- containing the search plan and the
- search data structures */
- que_node_t* search_cond; /* search condition */
- read_view_t* read_view; /* if the query is a non-locking
- consistent read, its read view is
- placed here, otherwise NULL */
- ibool consistent_read;/* TRUE if the select is a consistent,
- non-locking read */
- order_node_t* order_by; /* order by column definition, or
- NULL */
- ibool is_aggregate; /* TRUE if the select list consists of
- aggregate functions */
- ibool aggregate_already_fetched;
- /* TRUE if the aggregate row has
- already been fetched for the current
- cursor */
- ibool can_get_updated;/* this is TRUE if the select
- is in a single-table explicit
- cursor which can get updated
- within the stored procedure,
- or in a searched update or
- delete; NOTE that to determine
- of an explicit cursor if it
- can get updated, the parser
- checks from a stored procedure
- if it contains positioned
- update or delete statements */
- sym_node_t* explicit_cursor;/* not NULL if an explicit cursor */
- UT_LIST_BASE_NODE_T(sym_node_t)
- copy_variables; /* variables whose values we have to
- copy when an explicit cursor is opened,
- so that they do not change between
- fetches */
-};
-
-/* Select node states */
-#define SEL_NODE_CLOSED 0 /* it is a declared cursor which is not
- currently open */
-#define SEL_NODE_OPEN 1 /* intention locks not yet set on
- tables */
-#define SEL_NODE_FETCH 2 /* intention locks have been set */
-#define SEL_NODE_NO_MORE_ROWS 3 /* cursor has reached the result set
- end */
-
-/* Fetch statement node */
-struct fetch_node_struct{
- que_common_t common; /* type: QUE_NODE_FETCH */
- sel_node_t* cursor_def; /* cursor definition */
- sym_node_t* into_list; /* variables to set */
-
- pars_user_func_t*
- func; /* User callback function or NULL.
- The first argument to the function
- is a sel_node_t*, containing the
- results of the SELECT operation for
- one row. If the function returns
- NULL, it is not interested in
- further rows and the cursor is
- modified so (cursor % NOTFOUND) is
- true. If it returns not-NULL,
- continue normally. See
- row_fetch_print() for an example
- (and a useful debugging tool). */
-};
-
-/* Open or close cursor statement node */
-struct open_node_struct{
- que_common_t common; /* type: QUE_NODE_OPEN */
- ulint op_type; /* ROW_SEL_OPEN_CURSOR or
- ROW_SEL_CLOSE_CURSOR */
- sel_node_t* cursor_def; /* cursor definition */
-};
-
-/* Row printf statement node */
-struct row_printf_node_struct{
- que_common_t common; /* type: QUE_NODE_ROW_PRINTF */
- sel_node_t* sel_node; /* select */
-};
-
-#define ROW_SEL_OPEN_CURSOR 0
-#define ROW_SEL_CLOSE_CURSOR 1
-
-/* Flags for the MySQL interface */
-#define ROW_SEL_NEXT 1
-#define ROW_SEL_PREV 2
-
-#define ROW_SEL_EXACT 1 /* search using a complete key value */
-#define ROW_SEL_EXACT_PREFIX 2 /* search using a key prefix which
- must match to rows: the prefix may
- contain an incomplete field (the
- last field in prefix may be just
- a prefix of a fixed length column) */
-
-#ifndef UNIV_NONINL
-#include "row0sel.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/row0sel.ic b/storage/innobase/include/row0sel.ic
deleted file mode 100644
index 1f92b99271e..00000000000
--- a/storage/innobase/include/row0sel.ic
+++ /dev/null
@@ -1,88 +0,0 @@
-/******************************************************
-Select
-
-(c) 1997 Innobase Oy
-
-Created 12/19/1997 Heikki Tuuri
-*******************************************************/
-
-#include "que0que.h"
-
-/*************************************************************************
-Gets the plan node for the nth table in a join. */
-UNIV_INLINE
-plan_t*
-sel_node_get_nth_plan(
-/*==================*/
- /* out: plan node */
- sel_node_t* node, /* in: select node */
- ulint i) /* in: get ith plan node */
-{
- ut_ad(i < node->n_tables);
-
- return(node->plans + i);
-}
-
-/*************************************************************************
-Resets the cursor defined by sel_node to the SEL_NODE_OPEN state, which means
-that it will start fetching from the start of the result set again, regardless
-of where it was before, and it will set intention locks on the tables. */
-UNIV_INLINE
-void
-sel_node_reset_cursor(
-/*==================*/
- sel_node_t* node) /* in: select node */
-{
- node->state = SEL_NODE_OPEN;
-}
-
-/**************************************************************************
-Performs an execution step of an open or close cursor statement node. */
-UNIV_INLINE
-que_thr_t*
-open_step(
-/*======*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr) /* in: query thread */
-{
- sel_node_t* sel_node;
- open_node_t* node;
- ulint err;
-
- ut_ad(thr);
-
- node = thr->run_node;
- ut_ad(que_node_get_type(node) == QUE_NODE_OPEN);
-
- sel_node = node->cursor_def;
-
- err = DB_SUCCESS;
-
- if (node->op_type == ROW_SEL_OPEN_CURSOR) {
-
- /* if (sel_node->state == SEL_NODE_CLOSED) { */
-
- sel_node_reset_cursor(sel_node);
- /* } else {
- err = DB_ERROR;
- } */
- } else {
- if (sel_node->state != SEL_NODE_CLOSED) {
-
- sel_node->state = SEL_NODE_CLOSED;
- } else {
- err = DB_ERROR;
- }
- }
-
- if (UNIV_EXPECT(err, DB_SUCCESS) != DB_SUCCESS) {
- /* SQL error detected */
- fprintf(stderr, "SQL error %lu\n", (ulong) err);
-
- ut_error;
- }
-
- thr->run_node = que_node_get_parent(node);
-
- return(thr);
-}
diff --git a/storage/innobase/include/row0types.h b/storage/innobase/include/row0types.h
deleted file mode 100644
index 56ca8711848..00000000000
--- a/storage/innobase/include/row0types.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/******************************************************
-Row operation global types
-
-(c) 1996 Innobase Oy
-
-Created 12/27/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0types_h
-#define row0types_h
-
-typedef struct plan_struct plan_t;
-
-typedef struct upd_struct upd_t;
-
-typedef struct upd_field_struct upd_field_t;
-
-typedef struct upd_node_struct upd_node_t;
-
-typedef struct del_node_struct del_node_t;
-
-typedef struct ins_node_struct ins_node_t;
-
-typedef struct sel_node_struct sel_node_t;
-
-typedef struct open_node_struct open_node_t;
-
-typedef struct fetch_node_struct fetch_node_t;
-
-typedef struct row_printf_node_struct row_printf_node_t;
-typedef struct sel_buf_struct sel_buf_t;
-
-typedef struct undo_node_struct undo_node_t;
-
-typedef struct purge_node_struct purge_node_t;
-
-#endif
diff --git a/storage/innobase/include/row0uins.h b/storage/innobase/include/row0uins.h
deleted file mode 100644
index e28d5363048..00000000000
--- a/storage/innobase/include/row0uins.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/******************************************************
-Fresh insert undo
-
-(c) 1996 Innobase Oy
-
-Created 2/25/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0uins_h
-#define row0uins_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "que0types.h"
-#include "row0types.h"
-#include "mtr0mtr.h"
-
-/***************************************************************
-Undoes a fresh insert of a row to a table. A fresh insert means that
-the same clustered index unique key did not have any record, even delete
-marked, at the time of the insert. */
-
-ulint
-row_undo_ins(
-/*=========*/
- /* out: DB_SUCCESS */
- undo_node_t* node); /* in: row undo node */
-
-
-#ifndef UNIV_NONINL
-#include "row0uins.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/row0uins.ic b/storage/innobase/include/row0uins.ic
deleted file mode 100644
index 2b3d5a10f95..00000000000
--- a/storage/innobase/include/row0uins.ic
+++ /dev/null
@@ -1,8 +0,0 @@
-/******************************************************
-Fresh insert undo
-
-(c) 1996 Innobase Oy
-
-Created 2/25/1997 Heikki Tuuri
-*******************************************************/
-
diff --git a/storage/innobase/include/row0umod.h b/storage/innobase/include/row0umod.h
deleted file mode 100644
index f22945e6f12..00000000000
--- a/storage/innobase/include/row0umod.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/******************************************************
-Undo modify of a row
-
-(c) 1997 Innobase Oy
-
-Created 2/27/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0umod_h
-#define row0umod_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "que0types.h"
-#include "row0types.h"
-#include "mtr0mtr.h"
-
-/***************************************************************
-Undoes a modify operation on a row of a table. */
-
-ulint
-row_undo_mod(
-/*=========*/
- /* out: DB_SUCCESS or error code */
- undo_node_t* node, /* in: row undo node */
- que_thr_t* thr); /* in: query thread */
-
-
-#ifndef UNIV_NONINL
-#include "row0umod.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/row0umod.ic b/storage/innobase/include/row0umod.ic
deleted file mode 100644
index fcbf4dbc1f3..00000000000
--- a/storage/innobase/include/row0umod.ic
+++ /dev/null
@@ -1,7 +0,0 @@
-/******************************************************
-Undo modify of a row
-
-(c) 1997 Innobase Oy
-
-Created 2/27/1997 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innobase/include/row0undo.h b/storage/innobase/include/row0undo.h
deleted file mode 100644
index 0be09ed1822..00000000000
--- a/storage/innobase/include/row0undo.h
+++ /dev/null
@@ -1,115 +0,0 @@
-/******************************************************
-Row undo
-
-(c) 1997 Innobase Oy
-
-Created 1/8/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0undo_h
-#define row0undo_h
-
-#include "univ.i"
-#include "mtr0mtr.h"
-#include "trx0sys.h"
-#include "btr0types.h"
-#include "btr0pcur.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "que0types.h"
-#include "row0types.h"
-
-/************************************************************************
-Creates a row undo node to a query graph. */
-
-undo_node_t*
-row_undo_node_create(
-/*=================*/
- /* out, own: undo node */
- trx_t* trx, /* in: transaction */
- que_thr_t* parent, /* in: parent node, i.e., a thr node */
- mem_heap_t* heap); /* in: memory heap where created */
-/***************************************************************
-Looks for the clustered index record when node has the row reference.
-The pcur in node is used in the search. If found, stores the row to node,
-and stores the position of pcur, and detaches it. The pcur must be closed
-by the caller in any case. */
-
-ibool
-row_undo_search_clust_to_pcur(
-/*==========================*/
- /* out: TRUE if found; NOTE the node->pcur
- must be closed by the caller, regardless of
- the return value */
- undo_node_t* node); /* in: row undo node */
-/***************************************************************
-Undoes a row operation in a table. This is a high-level function used
-in SQL execution graphs. */
-
-que_thr_t*
-row_undo_step(
-/*==========*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr); /* in: query thread */
-
-/* A single query thread will try to perform the undo for all successive
-versions of a clustered index record, if the transaction has modified it
-several times during the execution which is rolled back. It may happen
-that the task is transferred to another query thread, if the other thread
-is assigned to handle an undo log record in the chain of different versions
-of the record, and the other thread happens to get the x-latch to the
-clustered index record at the right time.
- If a query thread notices that the clustered index record it is looking
-for is missing, or the roll ptr field in the record doed not point to the
-undo log record the thread was assigned to handle, then it gives up the undo
-task for that undo log record, and fetches the next. This situation can occur
-just in the case where the transaction modified the same record several times
-and another thread is currently doing the undo for successive versions of
-that index record. */
-
-/* Undo node structure */
-
-struct undo_node_struct{
- que_common_t common; /* node type: QUE_NODE_UNDO */
- ulint state; /* node execution state */
- trx_t* trx; /* trx for which undo is done */
- dulint roll_ptr;/* roll pointer to undo log record */
- trx_undo_rec_t* undo_rec;/* undo log record */
- dulint undo_no;/* undo number of the record */
- ulint rec_type;/* undo log record type: TRX_UNDO_INSERT_REC,
- ... */
- dulint new_roll_ptr; /* roll ptr to restore to clustered index
- record */
- dulint new_trx_id; /* trx id to restore to clustered index
- record */
- btr_pcur_t pcur; /* persistent cursor used in searching the
- clustered index record */
- dict_table_t* table; /* table where undo is done */
- ulint cmpl_info;/* compiler analysis of an update */
- upd_t* update; /* update vector for a clustered index
- record */
- dtuple_t* ref; /* row reference to the next row to handle */
- dtuple_t* row; /* a copy (also fields copied to heap) of the
- row to handle */
- dict_index_t* index; /* the next index whose record should be
- handled */
- mem_heap_t* heap; /* memory heap used as auxiliary storage for
- row; this must be emptied after undo is tried
- on a row */
-};
-
-/* Execution states for an undo node */
-#define UNDO_NODE_FETCH_NEXT 1 /* we should fetch the next undo log
- record */
-#define UNDO_NODE_PREV_VERS 2 /* the roll ptr to previous version of
- a row is stored in node, and undo
- should be done based on it */
-#define UNDO_NODE_INSERT 3
-#define UNDO_NODE_MODIFY 4
-
-
-#ifndef UNIV_NONINL
-#include "row0undo.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/row0undo.ic b/storage/innobase/include/row0undo.ic
deleted file mode 100644
index e7f89c7de67..00000000000
--- a/storage/innobase/include/row0undo.ic
+++ /dev/null
@@ -1,7 +0,0 @@
-/******************************************************
-Row undo
-
-(c) 1997 Innobase Oy
-
-Created 1/8/1997 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innobase/include/row0upd.h b/storage/innobase/include/row0upd.h
deleted file mode 100644
index efbc6d6facf..00000000000
--- a/storage/innobase/include/row0upd.h
+++ /dev/null
@@ -1,432 +0,0 @@
-/******************************************************
-Update of a row
-
-(c) 1996 Innobase Oy
-
-Created 12/27/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0upd_h
-#define row0upd_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "btr0types.h"
-#include "btr0pcur.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "que0types.h"
-#include "row0types.h"
-#include "pars0types.h"
-
-/*************************************************************************
-Creates an update vector object. */
-UNIV_INLINE
-upd_t*
-upd_create(
-/*=======*/
- /* out, own: update vector object */
- ulint n, /* in: number of fields */
- mem_heap_t* heap); /* in: heap from which memory allocated */
-/*************************************************************************
-Returns the number of fields in the update vector == number of columns
-to be updated by an update vector. */
-UNIV_INLINE
-ulint
-upd_get_n_fields(
-/*=============*/
- /* out: number of fields */
- upd_t* update); /* in: update vector */
-/*************************************************************************
-Returns the nth field of an update vector. */
-UNIV_INLINE
-upd_field_t*
-upd_get_nth_field(
-/*==============*/
- /* out: update vector field */
- upd_t* update, /* in: update vector */
- ulint n); /* in: field position in update vector */
-/*************************************************************************
-Sets an index field number to be updated by an update vector field. */
-UNIV_INLINE
-void
-upd_field_set_field_no(
-/*===================*/
- upd_field_t* upd_field, /* in: update vector field */
- ulint field_no, /* in: field number in a clustered
- index */
- dict_index_t* index, /* in: index */
- trx_t* trx); /* in: transaction */
-/*************************************************************************
-Writes into the redo log the values of trx id and roll ptr and enough info
-to determine their positions within a clustered index record. */
-
-byte*
-row_upd_write_sys_vals_to_log(
-/*==========================*/
- /* out: new pointer to mlog */
- dict_index_t* index, /* in: clustered index */
- trx_t* trx, /* in: transaction */
- dulint roll_ptr,/* in: roll ptr of the undo log record */
- byte* log_ptr,/* pointer to a buffer of size > 20 opened
- in mlog */
- mtr_t* mtr); /* in: mtr */
-/*************************************************************************
-Updates the trx id and roll ptr field in a clustered index record when
-a row is updated or marked deleted. */
-UNIV_INLINE
-void
-row_upd_rec_sys_fields(
-/*===================*/
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- trx_t* trx, /* in: transaction */
- dulint roll_ptr);/* in: roll ptr of the undo log record */
-/*************************************************************************
-Sets the trx id or roll ptr field of a clustered index entry. */
-
-void
-row_upd_index_entry_sys_field(
-/*==========================*/
- dtuple_t* entry, /* in: index entry, where the memory buffers
- for sys fields are already allocated:
- the function just copies the new values to
- them */
- dict_index_t* index, /* in: clustered index */
- ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */
- dulint val); /* in: value to write */
-/*************************************************************************
-Creates an update node for a query graph. */
-
-upd_node_t*
-upd_node_create(
-/*============*/
- /* out, own: update node */
- mem_heap_t* heap); /* in: mem heap where created */
-/***************************************************************
-Writes to the redo log the new values of the fields occurring in the index. */
-
-void
-row_upd_index_write_log(
-/*====================*/
- upd_t* update, /* in: update vector */
- byte* log_ptr,/* in: pointer to mlog buffer: must contain at least
- MLOG_BUF_MARGIN bytes of free space; the buffer is
- closed within this function */
- mtr_t* mtr); /* in: mtr into whose log to write */
-/***************************************************************
-Returns TRUE if row update changes size of some field in index or if some
-field to be updated is stored externally in rec or update. */
-
-ibool
-row_upd_changes_field_size_or_external(
-/*===================================*/
- /* out: TRUE if the update changes the size of
- some field in index or the field is external
- in rec or update */
- dict_index_t* index, /* in: index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- upd_t* update);/* in: update vector */
-/***************************************************************
-Replaces the new column values stored in the update vector to the record
-given. No field size changes are allowed. This function is used only for
-a clustered index */
-
-void
-row_upd_rec_in_place(
-/*=================*/
- rec_t* rec, /* in/out: record where replaced */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- upd_t* update);/* in: update vector */
-/*******************************************************************
-Builds an update vector from those fields which in a secondary index entry
-differ from a record that has the equal ordering fields. NOTE: we compare
-the fields as binary strings! */
-
-upd_t*
-row_upd_build_sec_rec_difference_binary(
-/*====================================*/
- /* out, own: update vector of differing
- fields */
- dict_index_t* index, /* in: index */
- dtuple_t* entry, /* in: entry to insert */
- rec_t* rec, /* in: secondary index record */
- trx_t* trx, /* in: transaction */
- mem_heap_t* heap); /* in: memory heap from which allocated */
-/*******************************************************************
-Builds an update vector from those fields, excluding the roll ptr and
-trx id fields, which in an index entry differ from a record that has
-the equal ordering fields. NOTE: we compare the fields as binary strings! */
-
-upd_t*
-row_upd_build_difference_binary(
-/*============================*/
- /* out, own: update vector of differing
- fields, excluding roll ptr and trx id */
- dict_index_t* index, /* in: clustered index */
- dtuple_t* entry, /* in: entry to insert */
- ulint* ext_vec,/* in: array containing field numbers of
- externally stored fields in entry, or NULL */
- ulint n_ext_vec,/* in: number of fields in ext_vec */
- rec_t* rec, /* in: clustered index record */
- trx_t* trx, /* in: transaction */
- mem_heap_t* heap); /* in: memory heap from which allocated */
-/***************************************************************
-Replaces the new column values stored in the update vector to the index entry
-given. */
-
-void
-row_upd_index_replace_new_col_vals_index_pos(
-/*=========================================*/
- dtuple_t* entry, /* in/out: index entry where replaced */
- dict_index_t* index, /* in: index; NOTE that this may also be a
- non-clustered index */
- upd_t* update, /* in: an update vector built for the index so
- that the field number in an upd_field is the
- index position */
- ibool order_only,
- /* in: if TRUE, limit the replacement to
- ordering fields of index; note that this
- does not work for non-clustered indexes. */
- mem_heap_t* heap); /* in: memory heap to which we allocate and
- copy the new values, set this as NULL if you
- do not want allocation */
-/***************************************************************
-Replaces the new column values stored in the update vector to the index entry
-given. */
-
-void
-row_upd_index_replace_new_col_vals(
-/*===============================*/
- dtuple_t* entry, /* in/out: index entry where replaced */
- dict_index_t* index, /* in: index; NOTE that this may also be a
- non-clustered index */
- upd_t* update, /* in: an update vector built for the
- CLUSTERED index so that the field number in
- an upd_field is the clustered index position */
- mem_heap_t* heap); /* in: memory heap to which we allocate and
- copy the new values, set this as NULL if you
- do not want allocation */
-/***************************************************************
-Checks if an update vector changes an ordering field of an index record.
-This function is fast if the update vector is short or the number of ordering
-fields in the index is small. Otherwise, this can be quadratic.
-NOTE: we compare the fields as binary strings! */
-
-ibool
-row_upd_changes_ord_field_binary(
-/*=============================*/
- /* out: TRUE if update vector changes
- an ordering field in the index record;
- NOTE: the fields are compared as binary
- strings */
- dtuple_t* row, /* in: old value of row, or NULL if the
- row and the data values in update are not
- known when this function is called, e.g., at
- compile time */
- dict_index_t* index, /* in: index of the record */
- upd_t* update);/* in: update vector for the row; NOTE: the
- field numbers in this MUST be clustered index
- positions! */
-/***************************************************************
-Checks if an update vector changes an ordering field of an index record.
-This function is fast if the update vector is short or the number of ordering
-fields in the index is small. Otherwise, this can be quadratic.
-NOTE: we compare the fields as binary strings! */
-
-ibool
-row_upd_changes_some_index_ord_field_binary(
-/*========================================*/
- /* out: TRUE if update vector may change
- an ordering field in an index record */
- dict_table_t* table, /* in: table */
- upd_t* update);/* in: update vector for the row */
-/***************************************************************
-Updates a row in a table. This is a high-level function used
-in SQL execution graphs. */
-
-que_thr_t*
-row_upd_step(
-/*=========*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr); /* in: query thread */
-/*************************************************************************
-Performs an in-place update for the current clustered index record in
-select. */
-
-void
-row_upd_in_place_in_select(
-/*=======================*/
- sel_node_t* sel_node, /* in: select node */
- que_thr_t* thr, /* in: query thread */
- mtr_t* mtr); /* in: mtr */
-/*************************************************************************
-Parses the log data of system field values. */
-
-byte*
-row_upd_parse_sys_vals(
-/*===================*/
- /* out: log data end or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- ulint* pos, /* out: TRX_ID position in record */
- dulint* trx_id, /* out: trx id */
- dulint* roll_ptr);/* out: roll ptr */
-/*************************************************************************
-Updates the trx id and roll ptr field in a clustered index record in database
-recovery. */
-
-void
-row_upd_rec_sys_fields_in_recovery(
-/*===============================*/
- rec_t* rec, /* in: record */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint pos, /* in: TRX_ID position in rec */
- dulint trx_id, /* in: transaction id */
- dulint roll_ptr);/* in: roll ptr of the undo log record */
-/*************************************************************************
-Parses the log data written by row_upd_index_write_log. */
-
-byte*
-row_upd_index_parse(
-/*================*/
- /* out: log data end or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- mem_heap_t* heap, /* in: memory heap where update vector is
- built */
- upd_t** update_out);/* out: update vector */
-
-
-/* Update vector field */
-struct upd_field_struct{
- ulint field_no; /* field number in an index, usually
- the clustered index, but in updating
- a secondary index record in btr0cur.c
- this is the position in the secondary
- index */
- que_node_t* exp; /* expression for calculating a new
- value: it refers to column values and
- constants in the symbol table of the
- query graph */
- dfield_t new_val; /* new value for the column */
- ibool extern_storage; /* this is set to TRUE if dfield
- actually contains a reference to
- an externally stored field */
-};
-
-/* Update vector structure */
-struct upd_struct{
- ulint info_bits; /* new value of info bits to record;
- default is 0 */
- ulint n_fields; /* number of update fields */
- upd_field_t* fields; /* array of update fields */
-};
-
-/* Update node structure which also implements the delete operation
-of a row */
-
-struct upd_node_struct{
- que_common_t common; /* node type: QUE_NODE_UPDATE */
- ibool is_delete;/* TRUE if delete, FALSE if update */
- ibool searched_update;
- /* TRUE if searched update, FALSE if
- positioned */
- ibool select_will_do_update;
- /* TRUE if a searched update where ordering
- fields will not be updated, and the size of
- the fields will not change: in this case the
- select node will take care of the update */
- ibool in_mysql_interface;
- /* TRUE if the update node was created
- for the MySQL interface */
- dict_foreign_t* foreign;/* NULL or pointer to a foreign key
- constraint if this update node is used in
- doing an ON DELETE or ON UPDATE operation */
- upd_node_t* cascade_node;/* NULL or an update node template which
- is used to implement ON DELETE/UPDATE CASCADE
- or ... SET NULL for foreign keys */
- mem_heap_t* cascade_heap;/* NULL or a mem heap where the cascade
- node is created */
- sel_node_t* select; /* query graph subtree implementing a base
- table cursor: the rows returned will be
- updated */
- btr_pcur_t* pcur; /* persistent cursor placed on the clustered
- index record which should be updated or
- deleted; the cursor is stored in the graph
- of 'select' field above, except in the case
- of the MySQL interface */
- dict_table_t* table; /* table where updated */
- upd_t* update; /* update vector for the row */
- ulint update_n_fields;
- /* when this struct is used to implement
- a cascade operation for foreign keys, we store
- here the size of the buffer allocated for use
- as the update vector */
- sym_node_list_t columns;/* symbol table nodes for the columns
- to retrieve from the table */
- ibool has_clust_rec_x_lock;
- /* TRUE if the select which retrieves the
- records to update already sets an x-lock on
- the clustered record; note that it must always
- set at least an s-lock */
- ulint cmpl_info;/* information extracted during query
- compilation; speeds up execution:
- UPD_NODE_NO_ORD_CHANGE and
- UPD_NODE_NO_SIZE_CHANGE, ORed */
- /*----------------------*/
- /* Local storage for this graph node */
- ulint state; /* node execution state */
- dict_index_t* index; /* NULL, or the next index whose record should
- be updated */
- dtuple_t* row; /* NULL, or a copy (also fields copied to
- heap) of the row to update; this must be reset
- to NULL after a successful update */
- ulint* ext_vec;/* array describing which fields are stored
- externally in the clustered index record of
- row */
- ulint n_ext_vec;/* number of fields in ext_vec */
- mem_heap_t* heap; /* memory heap used as auxiliary storage;
- this must be emptied after a successful
- update */
- /*----------------------*/
- sym_node_t* table_sym;/* table node in symbol table */
- que_node_t* col_assign_list;
- /* column assignment list */
- ulint magic_n;
-};
-
-#define UPD_NODE_MAGIC_N 1579975
-
-/* Node execution states */
-#define UPD_NODE_SET_IX_LOCK 1 /* execution came to the node from
- a node above and if the field
- has_clust_rec_x_lock is FALSE, we
- should set an intention x-lock on
- the table */
-#define UPD_NODE_UPDATE_CLUSTERED 2 /* clustered index record should be
- updated */
-#define UPD_NODE_INSERT_CLUSTERED 3 /* clustered index record should be
- inserted, old record is already delete
- marked */
-#define UPD_NODE_UPDATE_ALL_SEC 4 /* an ordering field of the clustered
- index record was changed, or this is
- a delete operation: should update
- all the secondary index records */
-#define UPD_NODE_UPDATE_SOME_SEC 5 /* secondary index entries should be
- looked at and updated if an ordering
- field changed */
-
-/* Compilation info flags: these must fit within 3 bits; see trx0rec.h */
-#define UPD_NODE_NO_ORD_CHANGE 1 /* no secondary index record will be
- changed in the update and no ordering
- field of the clustered index */
-#define UPD_NODE_NO_SIZE_CHANGE 2 /* no record field size will be
- changed in the update */
-
-#ifndef UNIV_NONINL
-#include "row0upd.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/row0upd.ic b/storage/innobase/include/row0upd.ic
deleted file mode 100644
index 6173849e68f..00000000000
--- a/storage/innobase/include/row0upd.ic
+++ /dev/null
@@ -1,122 +0,0 @@
-/******************************************************
-Update of a row
-
-(c) 1996 Innobase Oy
-
-Created 12/27/1996 Heikki Tuuri
-*******************************************************/
-
-#include "mtr0log.h"
-#include "trx0trx.h"
-#include "trx0undo.h"
-#include "row0row.h"
-#include "btr0sea.h"
-
-/*************************************************************************
-Creates an update vector object. */
-UNIV_INLINE
-upd_t*
-upd_create(
-/*=======*/
- /* out, own: update vector object */
- ulint n, /* in: number of fields */
- mem_heap_t* heap) /* in: heap from which memory allocated */
-{
- upd_t* update;
- ulint i;
-
- update = mem_heap_alloc(heap, sizeof(upd_t));
-
- update->info_bits = 0;
- update->n_fields = n;
- update->fields = mem_heap_alloc(heap, sizeof(upd_field_t) * n);
-
- for (i = 0; i < n; i++) {
- update->fields[i].extern_storage = 0;
- }
-
- return(update);
-}
-
-/*************************************************************************
-Returns the number of fields in the update vector == number of columns
-to be updated by an update vector. */
-UNIV_INLINE
-ulint
-upd_get_n_fields(
-/*=============*/
- /* out: number of fields */
- upd_t* update) /* in: update vector */
-{
- ut_ad(update);
-
- return(update->n_fields);
-}
-
-/*************************************************************************
-Returns the nth field of an update vector. */
-UNIV_INLINE
-upd_field_t*
-upd_get_nth_field(
-/*==============*/
- /* out: update vector field */
- upd_t* update, /* in: update vector */
- ulint n) /* in: field position in update vector */
-{
- ut_ad(update);
- ut_ad(n < update->n_fields);
-
- return(update->fields + n);
-}
-
-/*************************************************************************
-Sets an index field number to be updated by an update vector field. */
-UNIV_INLINE
-void
-upd_field_set_field_no(
-/*===================*/
- upd_field_t* upd_field, /* in: update vector field */
- ulint field_no, /* in: field number in a clustered
- index */
- dict_index_t* index, /* in: index */
- trx_t* trx) /* in: transaction */
-{
- upd_field->field_no = field_no;
-
- if (UNIV_UNLIKELY(field_no >= dict_index_get_n_fields(index))) {
- fprintf(stderr,
- "InnoDB: Error: trying to access field %lu in ",
- (ulong) field_no);
- dict_index_name_print(stderr, trx, index);
- fprintf(stderr, "\n"
- "InnoDB: but index only has %lu fields\n",
- (ulong) dict_index_get_n_fields(index));
- }
-
- dict_col_copy_type(dict_index_get_nth_col(index, field_no),
- dfield_get_type(&(upd_field->new_val)));
-}
-
-/*************************************************************************
-Updates the trx id and roll ptr field in a clustered index record when
-a row is updated or marked deleted. */
-UNIV_INLINE
-void
-row_upd_rec_sys_fields(
-/*===================*/
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- trx_t* trx, /* in: transaction */
- dulint roll_ptr)/* in: roll ptr of the undo log record */
-{
- ut_ad(index->type & DICT_CLUSTERED);
- ut_ad(rec_offs_validate(rec, index, offsets));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!buf_block_align(rec)->is_hashed
- || rw_lock_own(&btr_search_latch, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- row_set_rec_trx_id(rec, index, offsets, trx->id);
- row_set_rec_roll_ptr(rec, index, offsets, roll_ptr);
-}
diff --git a/storage/innobase/include/row0vers.h b/storage/innobase/include/row0vers.h
deleted file mode 100644
index e1377112d2a..00000000000
--- a/storage/innobase/include/row0vers.h
+++ /dev/null
@@ -1,126 +0,0 @@
-/******************************************************
-Row versions
-
-(c) 1997 Innobase Oy
-
-Created 2/6/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0vers_h
-#define row0vers_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "que0types.h"
-#include "rem0types.h"
-#include "mtr0mtr.h"
-#include "read0types.h"
-
-/*********************************************************************
-Finds out if an active transaction has inserted or modified a secondary
-index record. NOTE: the kernel mutex is temporarily released in this
-function! */
-
-trx_t*
-row_vers_impl_x_locked_off_kernel(
-/*==============================*/
- /* out: NULL if committed, else the active
- transaction; NOTE that the kernel mutex is
- temporarily released! */
- rec_t* rec, /* in: record in a secondary index */
- dict_index_t* index, /* in: the secondary index */
- const ulint* offsets);/* in: rec_get_offsets(rec, index) */
-/*********************************************************************
-Finds out if we must preserve a delete marked earlier version of a clustered
-index record, because it is >= the purge view. */
-
-ibool
-row_vers_must_preserve_del_marked(
-/*==============================*/
- /* out: TRUE if earlier version should be preserved */
- dulint trx_id, /* in: transaction id in the version */
- mtr_t* mtr); /* in: mtr holding the latch on the clustered index
- record; it will also hold the latch on purge_view */
-/*********************************************************************
-Finds out if a version of the record, where the version >= the current
-purge view, should have ientry as its secondary index entry. We check
-if there is any not delete marked version of the record where the trx
-id >= purge view, and the secondary index entry == ientry; exactly in
-this case we return TRUE. */
-
-ibool
-row_vers_old_has_index_entry(
-/*=========================*/
- /* out: TRUE if earlier version should have */
- ibool also_curr,/* in: TRUE if also rec is included in the
- versions to search; otherwise only versions
- prior to it are searched */
- rec_t* rec, /* in: record in the clustered index; the
- caller must have a latch on the page */
- mtr_t* mtr, /* in: mtr holding the latch on rec; it will
- also hold the latch on purge_view */
- dict_index_t* index, /* in: the secondary index */
- dtuple_t* ientry); /* in: the secondary index entry */
-/*********************************************************************
-Constructs the version of a clustered index record which a consistent
-read should see. We assume that the trx id stored in rec is such that
-the consistent read should not see rec in its present version. */
-
-ulint
-row_vers_build_for_consistent_read(
-/*===============================*/
- /* out: DB_SUCCESS or DB_MISSING_HISTORY */
- rec_t* rec, /* in: record in a clustered index; the
- caller must have a latch on the page; this
- latch locks the top of the stack of versions
- of this records */
- mtr_t* mtr, /* in: mtr holding the latch on rec; it will
- also hold the latch on purge_view */
- dict_index_t* index, /* in: the clustered index */
- ulint** offsets,/* in/out: offsets returned by
- rec_get_offsets(rec, index) */
- read_view_t* view, /* in: the consistent read view */
- mem_heap_t** offset_heap,/* in/out: memory heap from which
- the offsets are allocated */
- mem_heap_t* in_heap,/* in: memory heap from which the memory for
- old_vers is allocated; memory for possible
- intermediate versions is allocated and freed
- locally within the function */
- rec_t** old_vers);/* out, own: old version, or NULL if the
- record does not exist in the view, that is,
- it was freshly inserted afterwards */
-
-/*********************************************************************
-Constructs the last committed version of a clustered index record,
-which should be seen by a semi-consistent read. */
-
-ulint
-row_vers_build_for_semi_consistent_read(
-/*====================================*/
- /* out: DB_SUCCESS or DB_MISSING_HISTORY */
- rec_t* rec, /* in: record in a clustered index; the
- caller must have a latch on the page; this
- latch locks the top of the stack of versions
- of this records */
- mtr_t* mtr, /* in: mtr holding the latch on rec */
- dict_index_t* index, /* in: the clustered index */
- ulint** offsets,/* in/out: offsets returned by
- rec_get_offsets(rec, index) */
- mem_heap_t** offset_heap,/* in/out: memory heap from which
- the offsets are allocated */
- mem_heap_t* in_heap,/* in: memory heap from which the memory for
- old_vers is allocated; memory for possible
- intermediate versions is allocated and freed
- locally within the function */
- rec_t** old_vers);/* out, own: rec, old version, or NULL if the
- record does not exist in the view, that is,
- it was freshly inserted afterwards */
-
-
-#ifndef UNIV_NONINL
-#include "row0vers.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/row0vers.ic b/storage/innobase/include/row0vers.ic
deleted file mode 100644
index ab1e264635b..00000000000
--- a/storage/innobase/include/row0vers.ic
+++ /dev/null
@@ -1,13 +0,0 @@
-/******************************************************
-Row versions
-
-(c) 1997 Innobase Oy
-
-Created 2/6/1997 Heikki Tuuri
-*******************************************************/
-
-#include "row0row.h"
-#include "dict0dict.h"
-#include "read0read.h"
-#include "page0page.h"
-#include "log0recv.h"
diff --git a/storage/innobase/include/srv0que.h b/storage/innobase/include/srv0que.h
deleted file mode 100644
index 05c339cdd32..00000000000
--- a/storage/innobase/include/srv0que.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/******************************************************
-Server query execution
-
-(c) 1996 Innobase Oy
-
-Created 6/5/1996 Heikki Tuuri
-*******************************************************/
-
-
-#ifndef srv0que_h
-#define srv0que_h
-
-#include "univ.i"
-#include "que0types.h"
-
-/**************************************************************************
-Checks if there is work to do in the server task queue. If there is, the
-thread starts processing a task. Before leaving, it again checks the task
-queue and picks a new task if any exists. This is called by a SRV_WORKER
-thread. */
-
-void
-srv_que_task_queue_check(void);
-/*==========================*/
-/**************************************************************************
-Performs round-robin on the server tasks. This is called by a SRV_WORKER
-thread every second or so. */
-
-que_thr_t*
-srv_que_round_robin(
-/*================*/
- /* out: the new (may be == thr) query thread
- to run */
- que_thr_t* thr); /* in: query thread */
-/**************************************************************************
-Enqueues a task to server task queue and releases a worker thread, if
-there exists one suspended. */
-
-void
-srv_que_task_enqueue(
-/*=================*/
- que_thr_t* thr); /* in: query thread */
-/**************************************************************************
-Enqueues a task to server task queue and releases a worker thread, if
-there exists one suspended. */
-
-void
-srv_que_task_enqueue_low(
-/*=====================*/
- que_thr_t* thr); /* in: query thread */
-
-#endif
-
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
deleted file mode 100644
index 05300e38430..00000000000
--- a/storage/innobase/include/srv0srv.h
+++ /dev/null
@@ -1,572 +0,0 @@
-/******************************************************
-The server main program
-
-(c) 1995 Innobase Oy
-
-Created 10/10/1995 Heikki Tuuri
-*******************************************************/
-
-
-#ifndef srv0srv_h
-#define srv0srv_h
-
-#include "univ.i"
-#include "sync0sync.h"
-#include "os0sync.h"
-#include "que0types.h"
-#include "trx0types.h"
-
-extern const char* srv_main_thread_op_info;
-
-/* Prefix used by MySQL to indicate pre-5.1 table name encoding */
-extern const char srv_mysql50_table_name_prefix[9];
-
-/* When this event is set the lock timeout and InnoDB monitor
-thread starts running */
-extern os_event_t srv_lock_timeout_thread_event;
-
-/* If the last data file is auto-extended, we add this many pages to it
-at a time */
-#define SRV_AUTO_EXTEND_INCREMENT \
- (srv_auto_extend_increment * ((1024 * 1024) / UNIV_PAGE_SIZE))
-
-/* This is set to TRUE if the MySQL user has set it in MySQL */
-extern ibool srv_lower_case_table_names;
-
-/* Mutex for locking srv_monitor_file */
-extern mutex_t srv_monitor_file_mutex;
-/* Temporary file for innodb monitor output */
-extern FILE* srv_monitor_file;
-/* Mutex for locking srv_dict_tmpfile.
-This mutex has a very high rank; threads reserving it should not
-be holding any InnoDB latches. */
-extern mutex_t srv_dict_tmpfile_mutex;
-/* Temporary file for output from the data dictionary */
-extern FILE* srv_dict_tmpfile;
-/* Mutex for locking srv_misc_tmpfile.
-This mutex has a very low rank; threads reserving it should not
-acquire any further latches or sleep before releasing this one. */
-extern mutex_t srv_misc_tmpfile_mutex;
-/* Temporary file for miscellanous diagnostic output */
-extern FILE* srv_misc_tmpfile;
-
-/* Server parameters which are read from the initfile */
-
-extern char* srv_data_home;
-#ifdef UNIV_LOG_ARCHIVE
-extern char* srv_arch_dir;
-#endif /* UNIV_LOG_ARCHIVE */
-
-extern ibool srv_file_per_table;
-extern ibool srv_locks_unsafe_for_binlog;
-
-extern ulint srv_n_data_files;
-extern char** srv_data_file_names;
-extern ulint* srv_data_file_sizes;
-extern ulint* srv_data_file_is_raw_partition;
-
-extern ibool srv_auto_extend_last_data_file;
-extern ulint srv_last_file_size_max;
-extern ulong srv_auto_extend_increment;
-
-extern ibool srv_created_new_raw;
-
-#define SRV_NEW_RAW 1
-#define SRV_OLD_RAW 2
-
-extern char** srv_log_group_home_dirs;
-
-extern ulint srv_n_log_groups;
-extern ulint srv_n_log_files;
-extern ulint srv_log_file_size;
-extern ulint srv_log_buffer_size;
-extern ulong srv_flush_log_at_trx_commit;
-
-extern byte srv_latin1_ordering[256];/* The sort order table of the latin1
- character set */
-extern ulint srv_pool_size;
-extern ulint srv_awe_window_size;
-extern ulint srv_mem_pool_size;
-extern ulint srv_lock_table_size;
-
-extern ibool srv_thread_concurrency_timer_based;
-
-/* Number of background IO threads for read and write. Replaces
- * srv_n_file_io_threads. */
-extern ulint srv_n_read_io_threads;
-extern ulint srv_n_write_io_threads;
-/* Max number of adjacent IO requests to merge into one large request. */
-extern ulint srv_max_merged_io;
-
-/* Number of IO operations per second the server can do */
-extern ulint srv_io_capacity;
-
-/* Flush dirty pages when below max dirty percent */
-extern ibool srv_extra_dirty_writes;
-
-
-
-#ifdef UNIV_LOG_ARCHIVE
-extern ibool srv_log_archive_on;
-extern ibool srv_archive_recovery;
-extern dulint srv_archive_recovery_limit_lsn;
-#endif /* UNIV_LOG_ARCHIVE */
-
-extern ulint srv_lock_wait_timeout;
-
-extern char* srv_file_flush_method_str;
-extern ulint srv_unix_file_flush_method;
-extern ulint srv_win_file_flush_method;
-
-extern ulint srv_max_n_open_files;
-
-extern ulint srv_max_dirty_pages_pct;
-
-extern ulint srv_force_recovery;
-extern ulong srv_thread_concurrency;
-extern ulong srv_commit_concurrency;
-
-extern ulint srv_max_n_threads;
-
-extern lint srv_conc_n_threads;
-
-extern ulint srv_fast_shutdown; /* If this is 1, do not do a
- purge and index buffer merge.
- If this 2, do not even flush the
- buffer pool to data files at the
- shutdown: we effectively 'crash'
- InnoDB (but lose no committed
- transactions). */
-extern ibool srv_innodb_status;
-
-extern ibool srv_use_doublewrite_buf;
-extern ibool srv_use_checksums;
-
-extern ibool srv_set_thread_priorities;
-extern int srv_query_thread_priority;
-
-extern ulong srv_max_buf_pool_modified_pct;
-extern ulong srv_max_purge_lag;
-extern ibool srv_use_awe;
-extern ibool srv_use_adaptive_hash_indexes;
-/*-------------------------------------------*/
-
-extern ulint srv_n_rows_inserted;
-extern ulint srv_n_rows_updated;
-extern ulint srv_n_rows_deleted;
-extern ulint srv_n_rows_read;
-
-extern ibool srv_print_innodb_monitor;
-extern ibool srv_print_innodb_lock_monitor;
-extern ibool srv_print_innodb_tablespace_monitor;
-extern ibool srv_print_verbose_log;
-extern ibool srv_print_innodb_table_monitor;
-
-extern ibool srv_lock_timeout_and_monitor_active;
-extern ibool srv_error_monitor_active;
-
-extern ulong srv_n_spin_wait_rounds;
-extern ulong srv_n_free_tickets_to_enter;
-extern ulong srv_thread_sleep_delay;
-extern ulint srv_spin_wait_delay;
-extern ibool srv_priority_boost;
-
-extern ulint srv_pool_size;
-extern ulint srv_mem_pool_size;
-extern ulint srv_lock_table_size;
-
-extern ibool srv_print_thread_releases;
-extern ibool srv_print_lock_waits;
-extern ibool srv_print_buf_io;
-extern ibool srv_print_log_io;
-extern ibool srv_print_latch_waits;
-
-extern ulint srv_activity_count;
-extern ulint srv_fatal_semaphore_wait_threshold;
-extern ulint srv_dml_needed_delay;
-
-extern mutex_t* kernel_mutex_temp;/* mutex protecting the server, trx structs,
- query threads, and lock table: we allocate
- it from dynamic memory to get it to the
- same DRAM page as other hotspot semaphores */
-#define kernel_mutex (*kernel_mutex_temp)
-
-#define SRV_MAX_N_IO_THREADS 100
-
-/* Array of English strings describing the current state of an
-i/o handler thread */
-extern const char* srv_io_thread_op_info[];
-extern const char* srv_io_thread_function[];
-
-/* the number of the log write requests done */
-extern ulint srv_log_write_requests;
-
-/* the number of physical writes to the log performed */
-extern ulint srv_log_writes;
-
-/* amount of data written to the log files in bytes */
-extern ulint srv_os_log_written;
-
-/* amount of writes being done to the log files */
-extern ulint srv_os_log_pending_writes;
-
-/* we increase this counter, when there we don't have enough space in the
-log buffer and have to flush it */
-extern ulint srv_log_waits;
-
-/* variable that counts amount of data read in total (in bytes) */
-extern ulint srv_data_read;
-
-/* here we count the amount of data written in total (in bytes) */
-extern ulint srv_data_written;
-
-/* this variable counts the amount of times, when the doublewrite buffer
-was flushed */
-extern ulint srv_dblwr_writes;
-
-/* here we store the number of pages that have been flushed to the
-doublewrite buffer */
-extern ulint srv_dblwr_pages_written;
-
-/* in this variable we store the number of write requests issued */
-extern ulint srv_buf_pool_write_requests;
-
-/* here we store the number of times when we had to wait for a free page
-in the buffer pool. It happens when the buffer pool is full and we need
-to make a flush, in order to be able to read or create a page. */
-extern ulint srv_buf_pool_wait_free;
-
-/* variable to count the number of pages that were written from the
-buffer pool to disk */
-extern ulint srv_buf_pool_flushed;
-
-/* variable to count the number of buffer pool reads that led to the
-reading of a disk page */
-extern ulint srv_buf_pool_reads;
-
-/* variable to count the number of sequential read-aheads were done */
-extern ulint srv_read_ahead_seq;
-
-/* variable to count the number of random read-aheads were done */
-extern ulint srv_read_ahead_rnd;
-
-/* Number of threads that may have missed a lock wait wakeup */
-extern ulint sync_wake_ups;
-
-/* An option to enable the fix for "Bug#43660 SHOW INDEXES/ANALYZE does
-NOT update cardinality for indexes of InnoDB table". By default we are
-running with the fix disabled because MySQL 5.1 is frozen for such
-behavioral changes. */
-extern char srv_use_legacy_cardinality_algorithm;
-
-/* In this structure we store status variables to be passed to MySQL */
-typedef struct export_var_struct export_struc;
-
-extern export_struc export_vars;
-
-typedef struct srv_sys_struct srv_sys_t;
-
-/* The server system */
-extern srv_sys_t* srv_sys;
-
-/* Alternatives for the file flush option in Unix; see the InnoDB manual
-about what these mean */
-#define SRV_UNIX_FSYNC 1 /* This is the default */
-#define SRV_UNIX_O_DSYNC 2
-#define SRV_UNIX_LITTLESYNC 3
-#define SRV_UNIX_NOSYNC 4
-#define SRV_UNIX_O_DIRECT 5
-
-/* Alternatives for file i/o in Windows */
-#define SRV_WIN_IO_NORMAL 1
-#define SRV_WIN_IO_UNBUFFERED 2 /* This is the default */
-
-/* Alternatives for srv_force_recovery. Non-zero values are intended
-to help the user get a damaged database up so that he can dump intact
-tables and rows with SELECT INTO OUTFILE. The database must not otherwise
-be used with these options! A bigger number below means that all precautions
-of lower numbers are included. */
-
-#define SRV_FORCE_IGNORE_CORRUPT 1 /* let the server run even if it
- detects a corrupt page */
-#define SRV_FORCE_NO_BACKGROUND 2 /* prevent the main thread from
- running: if a crash would occur
- in purge, this prevents it */
-#define SRV_FORCE_NO_TRX_UNDO 3 /* do not run trx rollback after
- recovery */
-#define SRV_FORCE_NO_IBUF_MERGE 4 /* prevent also ibuf operations:
- if they would cause a crash, better
- not do them */
-#define SRV_FORCE_NO_UNDO_LOG_SCAN 5 /* do not look at undo logs when
- starting the database: InnoDB will
- treat even incomplete transactions
- as committed */
-#define SRV_FORCE_NO_LOG_REDO 6 /* do not do the log roll-forward
- in connection with recovery */
-
-/*************************************************************************
-Boots Innobase server. */
-
-ulint
-srv_boot(void);
-/*==========*/
- /* out: DB_SUCCESS or error code */
-/*************************************************************************
-Initializes the server. */
-
-void
-srv_init(void);
-/*==========*/
-/*************************************************************************
-Frees the OS fast mutex created in srv_boot(). */
-
-void
-srv_free(void);
-/*==========*/
-/*************************************************************************
-Initializes the synchronization primitives, memory system, and the thread
-local storage. */
-
-void
-srv_general_init(void);
-/*==================*/
-/*************************************************************************
-Gets the number of threads in the system. */
-
-ulint
-srv_get_n_threads(void);
-/*===================*/
-/*************************************************************************
-Returns the calling thread type. */
-
-ulint
-srv_get_thread_type(void);
-/*=====================*/
- /* out: SRV_COM, ... */
-/*************************************************************************
-Sets the info describing an i/o thread current state. */
-
-void
-srv_set_io_thread_op_info(
-/*======================*/
- ulint i, /* in: the 'segment' of the i/o thread */
- const char* str); /* in: constant char string describing the
- state */
-/*************************************************************************
-Releases threads of the type given from suspension in the thread table.
-NOTE! The server mutex has to be reserved by the caller! */
-
-ulint
-srv_release_threads(
-/*================*/
- /* out: number of threads released: this may be
- < n if not enough threads were suspended at the
- moment */
- ulint type, /* in: thread type */
- ulint n); /* in: number of threads to release */
-/*************************************************************************
-The master thread controlling the server. */
-
-os_thread_ret_t
-srv_master_thread(
-/*==============*/
- /* out: a dummy parameter */
- void* arg); /* in: a dummy parameter required by
- os_thread_create */
-/***********************************************************************
-Tells the Innobase server that there has been activity in the database
-and wakes up the master thread if it is suspended (not sleeping). Used
-in the MySQL interface. Note that there is a small chance that the master
-thread stays suspended (we do not protect our operation with the kernel
-mutex, for performace reasons). */
-
-void
-srv_active_wake_master_thread(void);
-/*===============================*/
-/***********************************************************************
-Wakes up the master thread if it is suspended or being suspended. */
-
-void
-srv_wake_master_thread(void);
-/*========================*/
-/*************************************************************************
-Puts an OS thread to wait if there are too many concurrent threads
-(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */
-
-void
-srv_conc_enter_innodb(
-/*==================*/
- trx_t* trx); /* in: transaction object associated with the
- thread */
-/*************************************************************************
-This lets a thread enter InnoDB regardless of the number of threads inside
-InnoDB. This must be called when a thread ends a lock wait. */
-
-void
-srv_conc_force_enter_innodb(
-/*========================*/
- trx_t* trx); /* in: transaction object associated with the
- thread */
-/*************************************************************************
-This must be called when a thread exits InnoDB in a lock wait or at the
-end of an SQL statement. */
-
-void
-srv_conc_force_exit_innodb(
-/*=======================*/
- trx_t* trx); /* in: transaction object associated with the
- thread */
-/*************************************************************************
-This must be called when a thread exits InnoDB. */
-
-void
-srv_conc_exit_innodb(
-/*=================*/
- trx_t* trx); /* in: transaction object associated with the
- thread */
-/*******************************************************************
-Puts a MySQL OS thread to wait for a lock to be released. If an error
-occurs during the wait trx->error_state associated with thr is
-!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK
-are possible errors. DB_DEADLOCK is returned if selective deadlock
-resolution chose this transaction as a victim. */
-
-void
-srv_suspend_mysql_thread(
-/*=====================*/
- que_thr_t* thr); /* in: query thread associated with the MySQL
- OS thread */
-/************************************************************************
-Releases a MySQL OS thread waiting for a lock to be released, if the
-thread is already suspended. */
-
-void
-srv_release_mysql_thread_if_suspended(
-/*==================================*/
- que_thr_t* thr); /* in: query thread associated with the
- MySQL OS thread */
-/*************************************************************************
-A thread which wakes up threads whose lock wait may have lasted too long.
-This also prints the info output by various InnoDB monitors. */
-
-os_thread_ret_t
-srv_lock_timeout_and_monitor_thread(
-/*================================*/
- /* out: a dummy parameter */
- void* arg); /* in: a dummy parameter required by
- os_thread_create */
-/*************************************************************************
-A thread which prints warnings about semaphore waits which have lasted
-too long. These can be used to track bugs which cause hangs. */
-
-os_thread_ret_t
-srv_error_monitor_thread(
-/*=====================*/
- /* out: a dummy parameter */
- void* arg); /* in: a dummy parameter required by
- os_thread_create */
-/**********************************************************************
-Outputs to a file the output of the InnoDB Monitor. */
-
-void
-srv_printf_innodb_monitor(
-/*======================*/
- FILE* file); /* in: output stream */
-
-/**********************************************************************
-Function to pass InnoDB status variables to MySQL */
-
-void
-srv_export_innodb_status(void);
-/*=====================*/
-
-/* Types for the threads existing in the system. Threads of types 4 - 9
-are called utility threads. Note that utility threads are mainly disk
-bound, except that version threads 6 - 7 may also be CPU bound, if
-cleaning versions from the buffer pool. */
-
-#define SRV_COM 1 /* threads serving communication and queries */
-#define SRV_CONSOLE 2 /* thread serving console */
-#define SRV_WORKER 3 /* threads serving parallelized queries and
- queries released from lock wait */
-#define SRV_BUFFER 4 /* thread flushing dirty buffer blocks,
- not currently in use */
-#define SRV_RECOVERY 5 /* threads finishing a recovery,
- not currently in use */
-#define SRV_INSERT 6 /* thread flushing the insert buffer to disk,
- not currently in use */
-#define SRV_MASTER 7 /* the master thread, (whose type number must
- be biggest) */
-
-/* Thread slot in the thread table */
-typedef struct srv_slot_struct srv_slot_t;
-
-/* Thread table is an array of slots */
-typedef srv_slot_t srv_table_t;
-
-/* In this structure we store status variables to be passed to MySQL */
-struct export_var_struct{
- ulint innodb_data_pending_reads;
- ulint innodb_data_pending_writes;
- ulint innodb_data_pending_fsyncs;
- ulint innodb_data_fsyncs;
- ulint innodb_data_read;
- ulint innodb_data_writes;
- ulint innodb_data_written;
- ulint innodb_data_reads;
- ulint innodb_buffer_pool_pages_total;
- ulint innodb_buffer_pool_pages_data;
- ulint innodb_buffer_pool_pages_dirty;
- ulint innodb_buffer_pool_pages_misc;
- ulint innodb_buffer_pool_pages_free;
-#ifdef UNIV_DEBUG
- ulint innodb_buffer_pool_pages_latched;
-#endif /* UNIV_DEBUG */
- ulint innodb_buffer_pool_read_requests;
- ulint innodb_buffer_pool_reads;
- ulint innodb_buffer_pool_wait_free;
- ulint innodb_buffer_pool_pages_flushed;
- ulint innodb_buffer_pool_write_requests;
- ulint innodb_buffer_pool_read_ahead_seq;
- ulint innodb_buffer_pool_read_ahead_rnd;
- ulint innodb_dblwr_pages_written;
- ulint innodb_dblwr_writes;
- ibool innodb_have_sync_atomic;
- ibool innodb_heap_enabled;
- ulint innodb_log_waits;
- ulint innodb_log_write_requests;
- ulint innodb_log_writes;
- ulint innodb_os_log_written;
- ulint innodb_os_log_fsyncs;
- ulint innodb_os_log_pending_writes;
- ulint innodb_os_log_pending_fsyncs;
- ulint innodb_page_size;
- ulint innodb_pages_created;
- ulint innodb_pages_read;
- ulint innodb_pages_written;
- ulint innodb_row_lock_waits;
- ulint innodb_row_lock_current_waits;
- ib_longlong innodb_row_lock_time;
- ulint innodb_row_lock_time_avg;
- ulint innodb_row_lock_time_max;
- ulint innodb_rows_read;
- ulint innodb_rows_inserted;
- ulint innodb_rows_updated;
- ulint innodb_rows_deleted;
- ulint innodb_wake_ups;
-};
-
-/* The server system struct */
-struct srv_sys_struct{
- srv_table_t* threads; /* server thread table */
- UT_LIST_BASE_NODE_T(que_thr_t)
- tasks; /* task queue */
- dict_index_t* dummy_ind1; /* dummy index for old-style
- supremum and infimum records */
- dict_index_t* dummy_ind2; /* dummy index for new-style
- supremum and infimum records */
-};
-
-extern ulint srv_n_threads_active[];
-
-#endif
diff --git a/storage/innobase/include/srv0srv.ic b/storage/innobase/include/srv0srv.ic
deleted file mode 100644
index 73e0729660f..00000000000
--- a/storage/innobase/include/srv0srv.ic
+++ /dev/null
@@ -1,7 +0,0 @@
-/******************************************************
-Server main program
-
-(c) 1995 Innobase Oy
-
-Created 10/4/1995 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innobase/include/srv0start.h b/storage/innobase/include/srv0start.h
deleted file mode 100644
index a04930d6516..00000000000
--- a/storage/innobase/include/srv0start.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/******************************************************
-Starts the Innobase database server
-
-(c) 1995-2000 Innobase Oy
-
-Created 10/10/1995 Heikki Tuuri
-*******************************************************/
-
-
-#ifndef srv0start_h
-#define srv0start_h
-
-#include "univ.i"
-#include "ut0byte.h"
-
-/*************************************************************************
-Normalizes a directory path for Windows: converts slashes to backslashes. */
-
-void
-srv_normalize_path_for_win(
-/*=======================*/
- char* str); /* in/out: null-terminated character string */
-/*************************************************************************
-Reads the data files and their sizes from a character string given in
-the .cnf file. */
-
-ibool
-srv_parse_data_file_paths_and_sizes(
-/*================================*/
- /* out: TRUE if ok, FALSE if parsing
- error */
- char* str, /* in: the data file path string */
- char*** data_file_names, /* out, own: array of data file
- names */
- ulint** data_file_sizes, /* out, own: array of data file sizes
- in megabytes */
- ulint** data_file_is_raw_partition,/* out, own: array of flags
- showing which data files are raw
- partitions */
- ulint* n_data_files, /* out: number of data files */
- ibool* is_auto_extending, /* out: TRUE if the last data file is
- auto-extending */
- ulint* max_auto_extend_size); /* out: max auto extend size for the
- last file if specified, 0 if not */
-/*************************************************************************
-Reads log group home directories from a character string given in
-the .cnf file. */
-
-ibool
-srv_parse_log_group_home_dirs(
-/*==========================*/
- /* out: TRUE if ok, FALSE if parsing
- error */
- char* str, /* in: character string */
- char*** log_group_home_dirs); /* out, own: log group home dirs */
-/*************************************************************************
-Adds a slash or a backslash to the end of a string if it is missing
-and the string is not empty. */
-
-char*
-srv_add_path_separator_if_needed(
-/*=============================*/
- /* out: string which has the separator if the
- string is not empty */
- char* str); /* in: null-terminated character string */
-/********************************************************************
-Starts Innobase and creates a new database if database files
-are not found and the user wants. Server parameters are
-read from a file of name "srv_init" in the ib_home directory. */
-
-int
-innobase_start_or_create_for_mysql(void);
-/*====================================*/
- /* out: DB_SUCCESS or error code */
-/********************************************************************
-Shuts down the Innobase database. */
-int
-innobase_shutdown_for_mysql(void);
-/*=============================*/
- /* out: DB_SUCCESS or error code */
-extern dulint srv_shutdown_lsn;
-extern dulint srv_start_lsn;
-
-#ifdef __NETWARE__
-void set_panic_flag_for_netware(void);
-#endif
-
-#ifdef HAVE_DARWIN_THREADS
-extern ibool srv_have_fullfsync;
-#endif
-
-extern ulint srv_sizeof_trx_t_in_ha_innodb_cc;
-
-extern ibool srv_is_being_started;
-extern ibool srv_startup_is_before_trx_rollback_phase;
-extern ibool srv_is_being_shut_down;
-
-extern ibool srv_start_raw_disk_in_use;
-
-/* At a shutdown the value first climbs from 0 to SRV_SHUTDOWN_CLEANUP
-and then to SRV_SHUTDOWN_LAST_PHASE, and so on */
-
-extern ulint srv_shutdown_state;
-
-#define SRV_SHUTDOWN_CLEANUP 1
-#define SRV_SHUTDOWN_LAST_PHASE 2
-#define SRV_SHUTDOWN_EXIT_THREADS 3
-
-/* Log 'spaces' have id's >= this */
-#define SRV_LOG_SPACE_FIRST_ID 0xFFFFFFF0UL
-
-#endif
diff --git a/storage/innobase/include/sync0arr.h b/storage/innobase/include/sync0arr.h
deleted file mode 100644
index fae26b7a63e..00000000000
--- a/storage/innobase/include/sync0arr.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/******************************************************
-The wait array used in synchronization primitives
-
-(c) 1995 Innobase Oy
-
-Created 9/5/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef sync0arr_h
-#define sync0arr_h
-
-#include "univ.i"
-#include "ut0lst.h"
-#include "ut0mem.h"
-#include "os0thread.h"
-
-typedef struct sync_cell_struct sync_cell_t;
-typedef struct sync_array_struct sync_array_t;
-
-#define SYNC_ARRAY_OS_MUTEX 1
-#define SYNC_ARRAY_MUTEX 2
-
-/***********************************************************************
-Creates a synchronization wait array. It is protected by a mutex
-which is automatically reserved when the functions operating on it
-are called. */
-
-sync_array_t*
-sync_array_create(
-/*==============*/
- /* out, own: created wait array */
- ulint n_cells, /* in: number of cells in the array
- to create */
- ulint protection); /* in: either SYNC_ARRAY_OS_MUTEX or
- SYNC_ARRAY_MUTEX: determines the type
- of mutex protecting the data structure */
-/**********************************************************************
-Frees the resources in a wait array. */
-
-void
-sync_array_free(
-/*============*/
- sync_array_t* arr); /* in, own: sync wait array */
-/**********************************************************************
-Reserves a wait array cell for waiting for an object.
-The event of the cell is reset to nonsignalled state. */
-
-void
-sync_array_reserve_cell(
-/*====================*/
- sync_array_t* arr, /* in: wait array */
- void* object, /* in: pointer to the object to wait for */
- ulint type, /* in: lock request type */
- const char* file, /* in: file where requested */
- ulint line, /* in: line where requested */
- ulint* index); /* out: index of the reserved cell */
-/**********************************************************************
-This function should be called when a thread starts to wait on
-a wait array cell. In the debug version this function checks
-if the wait for a semaphore will result in a deadlock, in which
-case prints info and asserts. */
-
-void
-sync_array_wait_event(
-/*==================*/
- sync_array_t* arr, /* in: wait array */
- ulint index); /* in: index of the reserved cell */
-/**********************************************************************
-Frees the cell. NOTE! sync_array_wait_event frees the cell
-automatically! */
-
-void
-sync_array_free_cell(
-/*=================*/
- sync_array_t* arr, /* in: wait array */
- ulint index); /* in: index of the cell in array */
-/**************************************************************************
-Note that one of the wait objects was signalled. */
-
-void
-sync_array_object_signalled(
-/*========================*/
- sync_array_t* arr); /* in: wait array */
-/**************************************************************************
-If the wakeup algorithm does not work perfectly at semaphore relases,
-this function will do the waking (see the comment in mutex_exit). This
-function should be called about every 1 second in the server. */
-
-void
-sync_arr_wake_threads_if_sema_free(void);
-/*====================================*/
-/**************************************************************************
-Prints warnings of long semaphore waits to stderr. */
-
-ibool
-sync_array_print_long_waits(void);
-/*=============================*/
- /* out: TRUE if fatal semaphore wait threshold
- was exceeded */
-/************************************************************************
-Validates the integrity of the wait array. Checks
-that the number of reserved cells equals the count variable. */
-
-void
-sync_array_validate(
-/*================*/
- sync_array_t* arr); /* in: sync wait array */
-/**************************************************************************
-Prints info of the wait array. */
-
-void
-sync_array_print_info(
-/*==================*/
- FILE* file, /* in: file where to print */
- sync_array_t* arr); /* in: wait array */
-
-
-#ifndef UNIV_NONINL
-#include "sync0arr.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/sync0arr.ic b/storage/innobase/include/sync0arr.ic
deleted file mode 100644
index dbe35c033e5..00000000000
--- a/storage/innobase/include/sync0arr.ic
+++ /dev/null
@@ -1,10 +0,0 @@
-/******************************************************
-The wait array for synchronization primitives
-
-Inline code
-
-(c) 1995 Innobase Oy
-
-Created 9/5/1995 Heikki Tuuri
-*******************************************************/
-
diff --git a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h
deleted file mode 100644
index 6de26535689..00000000000
--- a/storage/innobase/include/sync0rw.h
+++ /dev/null
@@ -1,517 +0,0 @@
-/******************************************************
-The read-write lock (for threads, not for database transactions)
-
-(c) 1995 Innobase Oy
-
-Created 9/11/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef sync0rw_h
-#define sync0rw_h
-
-#include "univ.i"
-#include "ut0lst.h"
-#include "sync0sync.h"
-#include "os0sync.h"
-
-/* The following undef is to prevent a name conflict with a macro
-in MySQL: */
-#undef rw_lock_t
-
-/* Latch types; these are used also in btr0btr.h: keep the numerical values
-smaller than 30 and the order of the numerical values like below! */
-#define RW_S_LATCH 1
-#define RW_X_LATCH 2
-#define RW_NO_LATCH 3
-
-/* We decrement lock_word by this amount for each x_lock. It is also the
-start value for the lock_word, meaning that it limits the maximum number
-of concurrent read locks before the rw_lock breaks. The current value of
-0x00100000 allows 1,048,575 concurrent readers and 2047 recursive writers.*/
-#define X_LOCK_DECR 0x00100000
-
-typedef struct rw_lock_struct rw_lock_t;
-#ifdef UNIV_SYNC_DEBUG
-typedef struct rw_lock_debug_struct rw_lock_debug_t;
-#endif /* UNIV_SYNC_DEBUG */
-
-typedef UT_LIST_BASE_NODE_T(rw_lock_t) rw_lock_list_t;
-
-extern rw_lock_list_t rw_lock_list;
-extern mutex_t rw_lock_list_mutex;
-
-#ifdef UNIV_SYNC_DEBUG
-/* The global mutex which protects debug info lists of all rw-locks.
-To modify the debug info list of an rw-lock, this mutex has to be
-
-acquired in addition to the mutex protecting the lock. */
-extern mutex_t rw_lock_debug_mutex;
-extern os_event_t rw_lock_debug_event; /* If deadlock detection does
- not get immediately the mutex it
- may wait for this event */
-extern ibool rw_lock_debug_waiters; /* This is set to TRUE, if
- there may be waiters for the event */
-#endif /* UNIV_SYNC_DEBUG */
-
-extern ib_longlong rw_s_spin_wait_count;
-extern ib_longlong rw_s_spin_round_count;
-extern ib_longlong rw_s_exit_count;
-extern ib_longlong rw_s_os_wait_count;
-extern ib_longlong rw_x_spin_wait_count;
-extern ib_longlong rw_x_spin_round_count;
-extern ib_longlong rw_x_os_wait_count;
-extern ib_longlong rw_x_exit_count;
-
-/**********************************************************************
-Creates, or rather, initializes an rw-lock object in a specified memory
-location (which must be appropriately aligned). The rw-lock is initialized
-to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free
-is necessary only if the memory block containing it is freed. */
-#ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
-# define rw_lock_create(L, level) \
- rw_lock_create_func((L), (level), #L, __FILE__, __LINE__)
-# else /* UNIV_SYNC_DEBUG */
-# define rw_lock_create(L, level) \
- rw_lock_create_func((L), #L, __FILE__, __LINE__)
-# endif /* UNIV_SYNC_DEBUG */
-#else /* UNIV_DEBUG */
-# define rw_lock_create(L, level) \
- rw_lock_create_func((L), __FILE__, __LINE__)
-#endif /* UNIV_DEBUG */
-
-/**********************************************************************
-Creates, or rather, initializes an rw-lock object in a specified memory
-location (which must be appropriately aligned). The rw-lock is initialized
-to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free
-is necessary only if the memory block containing it is freed. */
-
-void
-rw_lock_create_func(
-/*================*/
- rw_lock_t* lock, /* in: pointer to memory */
-#ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
- ulint level, /* in: level */
-# endif /* UNIV_SYNC_DEBUG */
- const char* cmutex_name, /* in: mutex name */
-#endif /* UNIV_DEBUG */
- const char* cfile_name, /* in: file name where created */
- ulint cline); /* in: file line where created */
-/**********************************************************************
-Calling this function is obligatory only if the memory buffer containing
-the rw-lock is freed. Removes an rw-lock object from the global list. The
-rw-lock is checked to be in the non-locked state. */
-
-void
-rw_lock_free(
-/*=========*/
- rw_lock_t* lock); /* in: rw-lock */
-#ifdef UNIV_DEBUG
-/**********************************************************************
-Checks that the rw-lock has been initialized and that there are no
-simultaneous shared and exclusive locks. */
-
-ibool
-rw_lock_validate(
-/*=============*/
- rw_lock_t* lock);
-#endif /* UNIV_DEBUG */
-/**********************************************************************
-Low-level function which tries to lock an rw-lock in s-mode. Performs no
-spinning. */
-UNIV_INLINE
-ibool
-rw_lock_s_lock_low(
-/*===============*/
- /* out: TRUE if success */
- rw_lock_t* lock, /* in: pointer to rw-lock */
- ulint pass,
- /* in: pass value; != 0, if the lock will be
- passed to another thread to unlock */
- const char* file_name, /* in: file name where lock requested */
- ulint line); /* in: line where requested */
-/******************************************************************
-NOTE! The following macros should be used in rw s-locking, not the
-corresponding function. */
-
-#define rw_lock_s_lock(M) rw_lock_s_lock_func(\
- (M), 0, __FILE__, __LINE__)
-/******************************************************************
-NOTE! The following macros should be used in rw s-locking, not the
-corresponding function. */
-
-#define rw_lock_s_lock_gen(M, P) rw_lock_s_lock_func(\
- (M), (P), __FILE__, __LINE__)
-/******************************************************************
-NOTE! The following macros should be used in rw s-locking, not the
-corresponding function. */
-
-#define rw_lock_s_lock_nowait(M, F, L) rw_lock_s_lock_low(\
- (M), 0, (F), (L))
-/**********************************************************************
-NOTE! Use the corresponding macro, not directly this function, except if
-you supply the file name and line number. Lock an rw-lock in shared mode
-for the current thread. If the rw-lock is locked in exclusive mode, or
-there is an exclusive lock request waiting, the function spins a preset
-time (controlled by SYNC_SPIN_ROUNDS), waiting for the lock, before
-suspending the thread. */
-UNIV_INLINE
-void
-rw_lock_s_lock_func(
-/*================*/
- rw_lock_t* lock, /* in: pointer to rw-lock */
- ulint pass, /* in: pass value; != 0, if the lock will
- be passed to another thread to unlock */
- const char* file_name,/* in: file name where lock requested */
- ulint line); /* in: line where requested */
-/**********************************************************************
-NOTE! Use the corresponding macro, not directly this function! Lock an
-rw-lock in exclusive mode for the current thread if the lock can be
-obtained immediately. */
-UNIV_INLINE
-ibool
-rw_lock_x_lock_func_nowait(
-/*=======================*/
- /* out: TRUE if success */
- rw_lock_t* lock, /* in: pointer to rw-lock */
- const char* file_name,/* in: file name where lock requested */
- ulint line); /* in: line where requested */
-/**********************************************************************
-Releases a shared mode lock. */
-UNIV_INLINE
-void
-rw_lock_s_unlock_func(
-/*==================*/
- rw_lock_t* lock /* in: rw-lock */
-#ifdef UNIV_SYNC_DEBUG
- ,ulint pass /* in: pass value; != 0, if the lock may have
- been passed to another thread to unlock */
-#endif
- );
-/***********************************************************************
-Releases a shared mode lock. */
-
-#ifdef UNIV_SYNC_DEBUG
-#define rw_lock_s_unlock(L) rw_lock_s_unlock_func(L, 0)
-#else
-#define rw_lock_s_unlock(L) rw_lock_s_unlock_func(L)
-#endif
-/***********************************************************************
-Releases a shared mode lock. */
-
-#ifdef UNIV_SYNC_DEBUG
-#define rw_lock_s_unlock_gen(L, P) rw_lock_s_unlock_func(L, P)
-#else
-#define rw_lock_s_unlock_gen(L, P) rw_lock_s_unlock_func(L)
-#endif
-/******************************************************************
-NOTE! The following macro should be used in rw x-locking, not the
-corresponding function. */
-
-#define rw_lock_x_lock(M) rw_lock_x_lock_func(\
- (M), 0, __FILE__, __LINE__)
-/******************************************************************
-NOTE! The following macro should be used in rw x-locking, not the
-corresponding function. */
-
-#define rw_lock_x_lock_gen(M, P) rw_lock_x_lock_func(\
- (M), (P), __FILE__, __LINE__)
-/******************************************************************
-NOTE! The following macros should be used in rw x-locking, not the
-corresponding function. */
-
-#define rw_lock_x_lock_nowait(M) rw_lock_x_lock_func_nowait(\
- (M), __FILE__, __LINE__)
-/**********************************************************************
-NOTE! Use the corresponding macro, not directly this function! Lock an
-rw-lock in exclusive mode for the current thread. If the rw-lock is locked
-in shared or exclusive mode, or there is an exclusive lock request waiting,
-the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
-for the lock, before suspending the thread. If the same thread has an x-lock
-on the rw-lock, locking succeed, with the following exception: if pass != 0,
-only a single x-lock may be taken on the lock. NOTE: If the same thread has
-an s-lock, locking does not succeed! */
-
-void
-rw_lock_x_lock_func(
-/*================*/
- rw_lock_t* lock, /* in: pointer to rw-lock */
- ulint pass, /* in: pass value; != 0, if the lock will
- be passed to another thread to unlock */
- const char* file_name,/* in: file name where lock requested */
- ulint line); /* in: line where requested */
-/**********************************************************************
-Releases an exclusive mode lock. */
-UNIV_INLINE
-void
-rw_lock_x_unlock_func(
-/*==================*/
- rw_lock_t* lock /* in: rw-lock */
-#ifdef UNIV_SYNC_DEBUG
- ,ulint pass /* in: pass value; != 0, if the lock may have
- been passed to another thread to unlock */
-#endif
- );
-/***********************************************************************
-Releases an exclusive mode lock. */
-
-#ifdef UNIV_SYNC_DEBUG
-#define rw_lock_x_unlock(L) rw_lock_x_unlock_func(L, 0)
-#else
-#define rw_lock_x_unlock(L) rw_lock_x_unlock_func(L)
-#endif
-/***********************************************************************
-Releases an exclusive mode lock. */
-
-#ifdef UNIV_SYNC_DEBUG
-#define rw_lock_x_unlock_gen(L, P) rw_lock_x_unlock_func(L, P)
-#else
-#define rw_lock_x_unlock_gen(L, P) rw_lock_x_unlock_func(L)
-#endif
-/**********************************************************************
-Low-level function which locks an rw-lock in s-mode when we know that it
-is possible and none else is currently accessing the rw-lock structure.
-Then we can do the locking without reserving the mutex. */
-UNIV_INLINE
-void
-rw_lock_s_lock_direct(
-/*==================*/
- rw_lock_t* lock, /* in: pointer to rw-lock */
- const char* file_name, /* in: file name where requested */
- ulint line /* in: line where lock requested */
-);
-/**********************************************************************
-Low-level function which locks an rw-lock in x-mode when we know that it
-is not locked and none else is currently accessing the rw-lock structure.
-Then we can do the locking without reserving the mutex. */
-UNIV_INLINE
-void
-rw_lock_x_lock_direct(
-/*==================*/
- rw_lock_t* lock, /* in: pointer to rw-lock */
- const char* file_name, /* in: file name where requested */
- ulint line /* in: line where lock requested */
-);
-/**********************************************************************
-This function is used in the insert buffer to move the ownership of an
-x-latch on a buffer frame to the current thread. The x-latch was set by
-the buffer read operation and it protected the buffer frame while the
-read was done. The ownership is moved because we want that the current
-thread is able to acquire a second x-latch which is stored in an mtr.
-This, in turn, is needed to pass the debug checks of index page
-operations. */
-
-void
-rw_lock_x_lock_move_ownership(
-/*==========================*/
- rw_lock_t* lock); /* in: lock which was x-locked in the
- buffer read */
-/**********************************************************************
-Releases a shared mode lock when we know there are no waiters and none
-else will access the lock during the time this function is executed. */
-UNIV_INLINE
-void
-rw_lock_s_unlock_direct(
-/*====================*/
- rw_lock_t* lock); /* in: rw-lock */
-/**********************************************************************
-Releases an exclusive mode lock when we know there are no waiters, and
-none else will access the lock durint the time this function is executed. */
-UNIV_INLINE
-void
-rw_lock_x_unlock_direct(
-/*====================*/
- rw_lock_t* lock); /* in: rw-lock */
-/**********************************************************************
-Returns the value of writer_count for the lock. Does not reserve the lock
-mutex, so the caller must be sure it is not changed during the call. */
-UNIV_INLINE
-ulint
-rw_lock_get_x_lock_count(
-/*=====================*/
- /* out: value of writer_count */
- rw_lock_t* lock); /* in: rw-lock */
-/************************************************************************
-Accessor functions for rw lock. */
-UNIV_INLINE
-ulint
-rw_lock_get_waiters(
-/*================*/
- rw_lock_t* lock);
-UNIV_INLINE
-ulint
-rw_lock_get_writer(
-/*===============*/
- rw_lock_t* lock);
-UNIV_INLINE
-ulint
-rw_lock_get_reader_count(
-/*=====================*/
- rw_lock_t* lock);
-/**********************************************************************
-Decrements lock_word the specified amount if it is greater than 0.
-This is used by both s_lock and x_lock operations. */
-UNIV_INLINE
-ibool
-rw_lock_lock_word_decr(
- /* out: TRUE if decr occurs */
- rw_lock_t* lock, /* in: rw-lock */
- ulint amount); /* in: amount to decrement */
-/**********************************************************************
-Increments lock_word the specified amount and returns new value. */
-UNIV_INLINE
-lint
-rw_lock_lock_word_incr(
- /* out: TRUE if decr occurs */
- rw_lock_t* lock,
- ulint amount); /* in: rw-lock */
-#ifdef UNIV_SYNC_DEBUG
-/**********************************************************************
-Checks if the thread has locked the rw-lock in the specified mode, with
-the pass value == 0. */
-
-ibool
-rw_lock_own(
-/*========*/
- rw_lock_t* lock, /* in: rw-lock */
- ulint lock_type); /* in: lock type: RW_LOCK_SHARED,
- RW_LOCK_EX */
-#endif /* UNIV_SYNC_DEBUG */
-/**********************************************************************
-Checks if somebody has locked the rw-lock in the specified mode. */
-
-ibool
-rw_lock_is_locked(
-/*==============*/
- rw_lock_t* lock, /* in: rw-lock */
- ulint lock_type); /* in: lock type: RW_LOCK_SHARED,
- RW_LOCK_EX */
-#ifdef UNIV_SYNC_DEBUG
-/*******************************************************************
-Prints debug info of an rw-lock. */
-
-void
-rw_lock_print(
-/*==========*/
- rw_lock_t* lock); /* in: rw-lock */
-/*******************************************************************
-Prints debug info of currently locked rw-locks. */
-
-void
-rw_lock_list_print_info(
-/*====================*/
- FILE* file); /* in: file where to print */
-/*******************************************************************
-Returns the number of currently locked rw-locks.
-Works only in the debug version. */
-
-ulint
-rw_lock_n_locked(void);
-/*==================*/
-
-/*#####################################################################*/
-
-/**********************************************************************
-Acquires the debug mutex. We cannot use the mutex defined in sync0sync,
-because the debug mutex is also acquired in sync0arr while holding the OS
-mutex protecting the sync array, and the ordinary mutex_enter might
-recursively call routines in sync0arr, leading to a deadlock on the OS
-mutex. */
-
-void
-rw_lock_debug_mutex_enter(void);
-/*==========================*/
-/**********************************************************************
-Releases the debug mutex. */
-
-void
-rw_lock_debug_mutex_exit(void);
-/*==========================*/
-/*************************************************************************
-Prints info of a debug struct. */
-
-void
-rw_lock_debug_print(
-/*================*/
- rw_lock_debug_t* info); /* in: debug struct */
-#endif /* UNIV_SYNC_DEBUG */
-
-/* NOTE! The structure appears here only for the compiler to know its size.
-Do not use its fields directly! The structure used in the spin lock
-implementation of a read-write lock. Several threads may have a shared lock
-simultaneously in this lock, but only one writer may have an exclusive lock,
-in which case no shared locks are allowed. To prevent starving of a writer
-blocked by readers, a writer may queue for x-lock by decrementing lock_word:
-no new readers will be let in while the thread waits for readers to exit. */
-
-struct rw_lock_struct {
- volatile lint lock_word;
- /* Holds the state of the lock. */
- volatile ulint waiters;/* 1: there are waiters */
- volatile ulint pass; /* Default value 0. This is set to some
- value != 0 given by the caller of an x-lock
- operation, if the x-lock is to be passed to
- another thread to unlock (which happens in
- asynchronous i/o). */
- volatile os_thread_id_t writer_thread;
- /* Thread id of writer thread */
- os_event_t event; /* Used by sync0arr.c for thread queueing */
- os_event_t wait_ex_event;
- /* Event for next-writer to wait on. A thread
- must decrement lock_word before waiting. */
-#ifndef UNIV_SYNC_ATOMIC
- mutex_t mutex; /* The mutex protecting rw_lock_struct */
-#endif /* UNIV_SYNC_ATOMIC */
-
- UT_LIST_NODE_T(rw_lock_t) list;
- /* All allocated rw locks are put into a
- list */
-#ifdef UNIV_SYNC_DEBUG
- UT_LIST_BASE_NODE_T(rw_lock_debug_t) debug_list;
- /* In the debug version: pointer to the debug
- info list of the lock */
- ulint level; /* Level in the global latching order. */
-#endif /* UNIV_SYNC_DEBUG */
- ulint count_os_wait; /* Count of os_waits. May not be accurate */
- const char* cfile_name;/* File name where lock created */
- /* last s-lock file/line is not guaranteed to be correct */
- const char* last_s_file_name;/* File name where last s-locked */
- const char* last_x_file_name;/* File name where last x-locked */
- ibool writer_is_wait_ex;
- /* This is TRUE if the writer field is
- RW_LOCK_WAIT_EX; this field is located far
- from the memory update hotspot fields which
- are at the start of this struct, thus we can
- peek this field without causing much memory
- bus traffic */
- unsigned cline:14; /* Line where created */
- unsigned last_s_line:14; /* Line number where last time s-locked */
- unsigned last_x_line:14; /* Line number where last time x-locked */
- ulint magic_n;
-};
-
-#define RW_LOCK_MAGIC_N 22643
-
-#ifdef UNIV_SYNC_DEBUG
-/* The structure for storing debug info of an rw-lock */
-struct rw_lock_debug_struct {
-
- os_thread_id_t thread_id; /* The thread id of the thread which
- locked the rw-lock */
- ulint pass; /* Pass value given in the lock operation */
- ulint lock_type; /* Type of the lock: RW_LOCK_EX,
- RW_LOCK_SHARED, RW_LOCK_WAIT_EX */
- const char* file_name;/* File name where the lock was obtained */
- ulint line; /* Line where the rw-lock was locked */
- UT_LIST_NODE_T(rw_lock_debug_t) list;
- /* Debug structs are linked in a two-way
- list */
-};
-#endif /* UNIV_SYNC_DEBUG */
-
-#ifndef UNIV_NONINL
-#include "sync0rw.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/sync0rw.ic b/storage/innobase/include/sync0rw.ic
deleted file mode 100644
index e3f1d881cb4..00000000000
--- a/storage/innobase/include/sync0rw.ic
+++ /dev/null
@@ -1,559 +0,0 @@
-/******************************************************
-The read-write lock (for threads)
-
-(c) 1995 Innobase Oy
-
-Created 9/11/1995 Heikki Tuuri
-*******************************************************/
-
-/**********************************************************************
-Lock an rw-lock in shared mode for the current thread. If the rw-lock is
-locked in exclusive mode, or there is an exclusive lock request waiting,
-the function spins a preset time (controlled by SYNC_SPIN_ROUNDS),
-waiting for the lock before suspending the thread. */
-
-void
-rw_lock_s_lock_spin(
-/*================*/
- rw_lock_t* lock, /* in: pointer to rw-lock */
- ulint pass, /* in: pass value; != 0, if the lock will
- be passed to another thread to unlock */
- const char* file_name,/* in: file name where lock requested */
- ulint line); /* in: line where requested */
-#ifdef UNIV_SYNC_DEBUG
-/**********************************************************************
-Inserts the debug information for an rw-lock. */
-
-void
-rw_lock_add_debug_info(
-/*===================*/
- rw_lock_t* lock, /* in: rw-lock */
- ulint pass, /* in: pass value */
- ulint lock_type, /* in: lock type */
- const char* file_name, /* in: file where requested */
- ulint line); /* in: line where requested */
-/**********************************************************************
-Removes a debug information struct for an rw-lock. */
-
-void
-rw_lock_remove_debug_info(
-/*======================*/
- rw_lock_t* lock, /* in: rw-lock */
- ulint pass, /* in: pass value */
- ulint lock_type); /* in: lock type */
-#endif /* UNIV_SYNC_DEBUG */
-
-/************************************************************************
-Accessor functions for rw lock. */
-UNIV_INLINE
-ulint
-rw_lock_get_waiters(
-/*================*/
- rw_lock_t* lock)
-{
- return(lock->waiters);
-}
-UNIV_INLINE
-void
-rw_lock_set_waiters(
-/*================*/
- rw_lock_t* lock)
-{
-#ifdef UNIV_SYNC_ATOMIC
- os_compare_and_swap(&(lock->waiters), 0, 1);
-#else /* UNIV_SYNC_ATOMIC */
- lock->waiters = 1;
-#endif /* UNIV_SYNC_ATOMIC */
-}
-UNIV_INLINE
-void
-rw_lock_reset_waiters(
-/*================*/
- rw_lock_t* lock)
-{
-#ifdef UNIV_SYNC_ATOMIC
- os_compare_and_swap(&(lock->waiters), 1, 0);
-#else /* UNIV_SYNC_ATOMIC */
- lock->waiters = 0;
-#endif /* UNIV_SYNC_ATOMIC */
-}
-
-/**********************************************************************
-Returns the write-status of the lock - this function made more sense
-with the old rw_lock implementation.
- */
-UNIV_INLINE
-ulint
-rw_lock_get_writer(
-/*===============*/
- rw_lock_t* lock)
-{
- lint lock_word = lock->lock_word;
- if(lock_word > 0) {
- /* return NOT_LOCKED in s-lock state, like the writer
- member of the old lock implementation. */
- return RW_LOCK_NOT_LOCKED;
- } else if (((-lock_word) % X_LOCK_DECR) == 0) {
- return RW_LOCK_EX;
- } else {
- ut_ad(lock_word > -X_LOCK_DECR);
- return RW_LOCK_WAIT_EX;
- }
-}
-
-UNIV_INLINE
-ulint
-rw_lock_get_reader_count(
-/*=====================*/
- rw_lock_t* lock)
-{
- lint lock_word = lock->lock_word;
- if(lock_word > 0) {
- /* s-locked, no x-waiters */
- return(X_LOCK_DECR - lock_word);
- } else if (lock_word < 0 && lock_word > -X_LOCK_DECR) {
- /* s-locked, with x-waiters */
- return (ulint)(-lock_word);
- }
- return 0;
-}
-
-#ifndef UNIV_SYNC_ATOMIC
-UNIV_INLINE
-mutex_t*
-rw_lock_get_mutex(
-/*==============*/
- rw_lock_t* lock)
-{
- return(&(lock->mutex));
-}
-#endif
-
-/**********************************************************************
-Returns the value of writer_count for the lock. Does not reserve the lock
-mutex, so the caller must be sure it is not changed during the call. */
-UNIV_INLINE
-ulint
-rw_lock_get_x_lock_count(
-/*=====================*/
- /* out: value of writer_count */
- rw_lock_t* lock) /* in: rw-lock */
-{
- lint lock_copy = lock->lock_word;
- /* If there is a reader, lock_word is not divisible by X_LOCK_DECR */
- if(lock_copy > 0 || (-lock_copy) % X_LOCK_DECR != 0) {
- return 0;
- }
- return ((-lock_copy) / X_LOCK_DECR) + 1;
-}
-
-/**********************************************************************
-Two different implementations for decrementing the lock_word of a rw_lock:
-one for systems supporting atomic operations, one for others. This does
-does not support recusive x-locks: they should be handled by the caller and
-need not be atomic since they are performed by the current lock holder.
-Returns true if the decrement was made, false if not. */
-UNIV_INLINE
-ibool
-rw_lock_lock_word_decr(
- /* out: TRUE if decr occurs */
- rw_lock_t* lock, /* in: rw-lock */
- ulint amount) /* in: amount of decrement */
-{
-
-#ifdef UNIV_SYNC_ATOMIC
-
- lint local_lock_word = lock->lock_word;
- while (local_lock_word > 0) {
- if(os_compare_and_swap(&(lock->lock_word),
- local_lock_word,
- local_lock_word - amount)) {
- return TRUE;
- }
- local_lock_word = lock->lock_word;
- }
- return(FALSE);
-
-#else /* UNIV_SYNC_ATOMIC */
-
- ibool success = FALSE;
- mutex_enter(&(lock->mutex));
- if(lock->lock_word > 0) {
- lock->lock_word -= amount;
- success = TRUE;
- }
- mutex_exit(&(lock->mutex));
- return success;
-
-#endif /* UNIV_SYNC_ATOMIC */
-
-}
-
-/**********************************************************************
-Two different implementations for incrementing the lock_word of a rw_lock:
-one for systems supporting atomic operations, one for others.
-Returns the value of lock_word after increment. */
-UNIV_INLINE
-lint
-rw_lock_lock_word_incr(
- /* out: lock->lock_word after increment */
- rw_lock_t* lock, /* in: rw-lock */
- ulint amount) /* in: amount of increment */
-{
-
-#ifdef UNIV_SYNC_ATOMIC
-
- return(os_atomic_increment(&(lock->lock_word), amount));
-
-#else /* UNIV_SYNC_ATOMIC */
-
- lint local_lock_word;
-
- mutex_enter(&(lock->mutex));
-
- lock->lock_word += amount;
- local_lock_word = lock->lock_word;
-
- mutex_exit(&(lock->mutex));
-
- return local_lock_word;
-
-#endif /* UNIV_SYNC_ATOMIC */
-
-}
-
-/**********************************************************************
-Low-level function which tries to lock an rw-lock in s-mode. Performs no
-spinning. */
-UNIV_INLINE
-ibool
-rw_lock_s_lock_low(
-/*===============*/
- /* out: TRUE if success */
- rw_lock_t* lock, /* in: pointer to rw-lock */
- ulint pass __attribute__((unused)),
- /* in: pass value; != 0, if the lock will be
- passed to another thread to unlock */
- const char* file_name, /* in: file name where lock requested */
- ulint line) /* in: line where requested */
-{
- /* TODO: study performance of UNIV_LIKELY branch prediction hints. */
- if (!rw_lock_lock_word_decr(lock, 1)) {
- /* Locking did not succeed */
- return(FALSE);
- }
-
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_add_debug_info(lock, pass, RW_LOCK_SHARED, file_name, line);
-#endif
- /* These debugging values are not set safely: they may be incorrect
- or even refer to a line that is invalid for the file name. */
- lock->last_s_file_name = file_name;
- lock->last_s_line = line;
-
- return(TRUE); /* locking succeeded */
-}
-
-/* TODO: The "direct" functions are not used. Remove them? */
-/**********************************************************************
-Low-level function which locks an rw-lock in s-mode when we know that it
-is possible and none else is currently accessing the rw-lock structure.
-Then we can do the locking without reserving the mutex. */
-UNIV_INLINE
-void
-rw_lock_s_lock_direct(
-/*==================*/
- rw_lock_t* lock, /* in: pointer to rw-lock */
- const char* file_name, /* in: file name where requested */
- ulint line) /* in: line where lock requested */
-{
- ut_ad(lock->lock_word == X_LOCK_DECR);
-
- /* Indicate there is a new reader by decrementing lock_word */
- lock->lock_word--;
-
- lock->last_s_file_name = file_name;
- lock->last_s_line = line;
-
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_add_debug_info(lock, 0, RW_LOCK_SHARED, file_name, line);
-#endif
-}
-
-/* TODO: The "direct" functions are not used. Remove them? */
-/**********************************************************************
-Low-level function which locks an rw-lock in x-mode when we know that it
-is not locked and none else is currently accessing the rw-lock structure.
-Then we can do the locking without reserving the mutex. */
-UNIV_INLINE
-void
-rw_lock_x_lock_direct(
-/*==================*/
- rw_lock_t* lock, /* in: pointer to rw-lock */
- const char* file_name, /* in: file name where requested */
- ulint line) /* in: line where lock requested */
-{
- ut_ad(rw_lock_validate(lock));
- ut_ad(lock->lock_word == X_LOCK_DECR);
-
- lock->lock_word -= X_LOCK_DECR;
- lock->writer_thread = os_thread_get_curr_id();
- lock->pass = 0;
-
- lock->last_x_file_name = file_name;
- lock->last_x_line = line;
-
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line);
-#endif
-}
-
-/**********************************************************************
-NOTE! Use the corresponding macro, not directly this function! Lock an
-rw-lock in shared mode for the current thread. If the rw-lock is locked
-in exclusive mode, or there is an exclusive lock request waiting, the
-function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting for
-the lock, before suspending the thread. */
-UNIV_INLINE
-void
-rw_lock_s_lock_func(
-/*================*/
- rw_lock_t* lock, /* in: pointer to rw-lock */
- ulint pass, /* in: pass value; != 0, if the lock will
- be passed to another thread to unlock */
- const char* file_name,/* in: file name where lock requested */
- ulint line) /* in: line where requested */
-{
- /* NOTE: As we do not know the thread ids for threads which have
- s-locked a latch, and s-lockers will be served only after waiting
- x-lock requests have been fulfilled, then if this thread already
- owns an s-lock here, it may end up in a deadlock with another thread
- which requests an x-lock here. Therefore, we will forbid recursive
- s-locking of a latch: the following assert will warn the programmer
- of the possibility of this kind of a deadlock. If we want to implement
- safe recursive s-locking, we should keep in a list the thread ids of
- the threads which have s-locked a latch. This would use some CPU
- time. */
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED)); /* see NOTE above */
-#endif /* UNIV_SYNC_DEBUG */
-
- /* TODO: study performance of UNIV_LIKELY branch prediction hints. */
- if (rw_lock_s_lock_low(lock, pass, file_name, line)) {
-
- return; /* Success */
- } else {
- /* Did not succeed, try spin wait */
-
- rw_lock_s_lock_spin(lock, pass, file_name, line);
-
- return;
- }
-}
-
-/**********************************************************************
-NOTE! Use the corresponding macro, not directly this function! Lock an
-rw-lock in exclusive mode for the current thread if the lock can be
-obtained immediately. */
-UNIV_INLINE
-ibool
-rw_lock_x_lock_func_nowait(
-/*=======================*/
- /* out: TRUE if success */
- rw_lock_t* lock, /* in: pointer to rw-lock */
- const char* file_name,/* in: file name where lock requested */
- ulint line) /* in: line where requested */
-{
- os_thread_id_t curr_thread = os_thread_get_curr_id();
-
- ibool success;
-
-#ifdef UNIV_SYNC_ATOMIC
- success = os_compare_and_swap(&(lock->lock_word), X_LOCK_DECR, 0);
-#else
-
- success = FALSE;
- mutex_enter(&(lock->mutex));
- if(lock->lock_word == X_LOCK_DECR) {
- lock->lock_word = 0;
- success = TRUE;
- }
- mutex_exit(&(lock->mutex));
-
-#endif
- if(success) {
- lock->writer_thread = curr_thread;
- lock->pass = 0;
-
- } else if (!(lock->pass) &&
- os_thread_eq(lock->writer_thread, curr_thread)) {
- /* Must verify pass first: otherwise another thread can
- call move_ownership suddenly allowing recursive locks.
- and after we have verified our thread_id matches
- (though move_ownership has since changed it).*/
-
- /* Relock: this lock_word modification is safe since no other
- threads can modify (lock, unlock, or reserve) lock_word while
- there is an exclusive writer and this is the writer thread. */
- lock->lock_word -= X_LOCK_DECR;
-
- ut_ad(((-lock->lock_word) % X_LOCK_DECR) == 0);
-
- } else {
- /* Failure */
- return(FALSE);
- }
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line);
-#endif
-
- lock->last_x_file_name = file_name;
- lock->last_x_line = line;
-
- ut_ad(rw_lock_validate(lock));
-
- return(TRUE);
-}
-
-/**********************************************************************
-Releases a shared mode lock. */
-UNIV_INLINE
-void
-rw_lock_s_unlock_func(
-/*==================*/
- rw_lock_t* lock /* in: rw-lock */
-#ifdef UNIV_SYNC_DEBUG
- ,ulint pass /* in: pass value; != 0, if the lock may have
- been passed to another thread to unlock */
-#endif
- )
-{
- ut_ad((lock->lock_word % X_LOCK_DECR) != 0);
-
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_remove_debug_info(lock, pass, RW_LOCK_SHARED);
-#endif
-
- /* Increment lock_word to indicate 1 less reader */
- if(rw_lock_lock_word_incr(lock, 1) == 0) {
-
- /* wait_ex waiter exists. It may not be asleep, but we signal
- anyway. We do not wake other waiters, because they can't
- exist without wait_ex waiter and wait_ex waiter goes first.*/
- os_event_set(lock->wait_ex_event);
- sync_array_object_signalled(sync_primary_wait_array);
-
- }
-
- ut_ad(rw_lock_validate(lock));
-
-#ifdef UNIV_SYNC_PERF_STAT
- rw_s_exit_count++;
-#endif
-}
-
-/* TODO: The "direct" functions are not used. Remove them? */
-/**********************************************************************
-Releases a shared mode lock when we know there are no waiters and none
-else will access the lock during the time this function is executed. */
-UNIV_INLINE
-void
-rw_lock_s_unlock_direct(
-/*====================*/
- rw_lock_t* lock) /* in: rw-lock */
-{
- ut_ad(lock->lock_word < X_LOCK_DECR);
-
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_remove_debug_info(lock, 0, RW_LOCK_SHARED);
-#endif
-
- /* Decrease reader count by incrementing lock_word */
- lock->lock_word++;
-
- ut_ad(!rw_lock_get_waiters(lock));
- ut_ad(rw_lock_validate(lock));
-#ifdef UNIV_SYNC_PERF_STAT
- rw_s_exit_count++;
-#endif
-}
-
-/**********************************************************************
-Releases an exclusive mode lock. */
-UNIV_INLINE
-void
-rw_lock_x_unlock_func(
-/*==================*/
- rw_lock_t* lock /* in: rw-lock */
-#ifdef UNIV_SYNC_DEBUG
- ,ulint pass /* in: pass value; != 0, if the lock may have
- been passed to another thread to unlock */
-#endif
- )
-{
- uint local_pass;
- ut_ad((lock->lock_word % X_LOCK_DECR) == 0);
-
- /*
- Must reset pass while we still have the lock.
- If we are not the last unlocker, we correct it later in the function,
- which is harmless since we still hold the lock.
- */
- local_pass = lock->pass;
- lock->pass = 1;
-
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_remove_debug_info(lock, pass, RW_LOCK_EX);
-#endif
-
- if(rw_lock_lock_word_incr(lock, X_LOCK_DECR) == X_LOCK_DECR) {
- /* Lock is now free. May have to signal read/write waiters.
- We do not need to signal wait_ex waiters, since they cannot
- exist when there is a writer. */
- if(rw_lock_get_waiters(lock)) {
- rw_lock_reset_waiters(lock);
- os_event_set(lock->event);
- sync_array_object_signalled(sync_primary_wait_array);
- }
-
- } else {
- /* We still hold x-lock, so we correct pass. */
- lock->pass = local_pass;
- }
-
- ut_ad(rw_lock_validate(lock));
-
-#ifdef UNIV_SYNC_PERF_STAT
- rw_x_exit_count++;
-#endif
-}
-
-/* TODO: The "direct" functions are not used. Remove them? */
-/**********************************************************************
-Releases an exclusive mode lock when we know there are no waiters, and
-none else will access the lock durint the time this function is executed. */
-UNIV_INLINE
-void
-rw_lock_x_unlock_direct(
-/*====================*/
- rw_lock_t* lock) /* in: rw-lock */
-{
- /* Reset the exclusive lock if this thread no longer has an x-mode
- lock */
-
- ut_ad((lock->lock_word % X_LOCK_DECR) == 0);
-
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_remove_debug_info(lock, 0, RW_LOCK_EX);
-#endif
- lock->pass = 1;
- lock->lock_word += X_LOCK_DECR;
-
- ut_ad(!rw_lock_get_waiters(lock));
- ut_ad(rw_lock_validate(lock));
-
-#ifdef UNIV_SYNC_PERF_STAT
- rw_x_exit_count++;
-#endif
-}
diff --git a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
deleted file mode 100644
index ae6c72bcd15..00000000000
--- a/storage/innobase/include/sync0sync.h
+++ /dev/null
@@ -1,561 +0,0 @@
-/******************************************************
-Mutex, the basic synchronization primitive
-
-(c) 1995 Innobase Oy
-
-Created 9/5/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef sync0sync_h
-#define sync0sync_h
-
-#include "univ.i"
-#include "sync0types.h"
-#include "ut0lst.h"
-#include "ut0mem.h"
-#include "os0thread.h"
-#include "os0sync.h"
-#include "sync0arr.h"
-#ifndef WIN32
-#include "my_atomic.h"
-#endif
-
-#ifndef UNIV_HOTBACKUP
-extern my_bool timed_mutexes;
-#endif /* UNIV_HOTBACKUP */
-
-/**********************************************************************
-Initializes the synchronization data structures. */
-
-void
-sync_init(void);
-/*===========*/
-/**********************************************************************
-Frees the resources in synchronization data structures. */
-
-void
-sync_close(void);
-/*===========*/
-/**********************************************************************
-Creates, or rather, initializes a mutex object to a specified memory
-location (which must be appropriately aligned). The mutex is initialized
-in the reset state. Explicit freeing of the mutex with mutex_free is
-necessary only if the memory block containing it is freed. */
-
-#ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
-# define mutex_create(M, level) \
- mutex_create_func((M), #M, (level), __FILE__, __LINE__)
-# else
-# define mutex_create(M, level) \
- mutex_create_func((M), #M, __FILE__, __LINE__)
-# endif
-#else
-# define mutex_create(M, level) \
- mutex_create_func((M), __FILE__, __LINE__)
-#endif
-
-/**********************************************************************
-Creates, or rather, initializes a mutex object in a specified memory
-location (which must be appropriately aligned). The mutex is initialized
-in the reset state. Explicit freeing of the mutex with mutex_free is
-necessary only if the memory block containing it is freed. */
-
-void
-mutex_create_func(
-/*==============*/
- mutex_t* mutex, /* in: pointer to memory */
-#ifdef UNIV_DEBUG
- const char* cmutex_name, /* in: mutex name */
-# ifdef UNIV_SYNC_DEBUG
- ulint level, /* in: level */
-# endif /* UNIV_SYNC_DEBUG */
-#endif /* UNIV_DEBUG */
- const char* cfile_name, /* in: file name where created */
- ulint cline); /* in: file line where created */
-/**********************************************************************
-Calling this function is obligatory only if the memory buffer containing
-the mutex is freed. Removes a mutex object from the mutex list. The mutex
-is checked to be in the reset state. */
-
-#undef mutex_free /* Fix for MacOS X */
-void
-mutex_free(
-/*=======*/
- mutex_t* mutex); /* in: mutex */
-/******************************************************************
-NOTE! The following macro should be used in mutex locking, not the
-corresponding function. */
-
-#define mutex_enter(M) mutex_enter_func((M), __FILE__, __LINE__)
-/**********************************************************************
-A noninlined function that reserves a mutex. In ha_innodb.cc we have disabled
-inlining of InnoDB functions, and no inlined functions should be called from
-there. That is why we need to duplicate the inlined function here. */
-
-void
-mutex_enter_noninline(
-/*==================*/
- mutex_t* mutex); /* in: mutex */
-/******************************************************************
-NOTE! The following macro should be used in mutex locking, not the
-corresponding function. */
-
-/* NOTE! currently same as mutex_enter! */
-
-#define mutex_enter_fast(M) mutex_enter_func((M), __FILE__, __LINE__)
-#define mutex_enter_fast_func mutex_enter_func;
-/**********************************************************************
-NOTE! Use the corresponding macro in the header file, not this function
-directly. Locks a mutex for the current thread. If the mutex is reserved
-the function spins a preset time (controlled by SYNC_SPIN_ROUNDS) waiting
-for the mutex before suspending the thread. */
-UNIV_INLINE
-void
-mutex_enter_func(
-/*=============*/
- mutex_t* mutex, /* in: pointer to mutex */
- const char* file_name, /* in: file name where locked */
- ulint line); /* in: line where locked */
-/******************************************************************
-NOTE! The following macro should be used in mutex locking, not the
-corresponding function. */
-
-#define mutex_enter_nowait(M) \
- mutex_enter_nowait_func((M), __FILE__, __LINE__)
-/************************************************************************
-NOTE! Use the corresponding macro in the header file, not this function
-directly. Tries to lock the mutex for the current thread. If the lock is not
-acquired immediately, returns with return value 1. */
-
-ulint
-mutex_enter_nowait_func(
-/*====================*/
- /* out: 0 if succeed, 1 if not */
- mutex_t* mutex, /* in: pointer to mutex */
- const char* file_name, /* in: file name where mutex
- requested */
- ulint line); /* in: line where requested */
-/**********************************************************************
-Unlocks a mutex owned by the current thread. */
-UNIV_INLINE
-void
-mutex_exit(
-/*=======*/
- mutex_t* mutex); /* in: pointer to mutex */
-/**********************************************************************
-Releases a mutex. */
-
-void
-mutex_exit_noninline(
-/*=================*/
- mutex_t* mutex); /* in: mutex */
-/**********************************************************************
-Returns TRUE if no mutex or rw-lock is currently locked.
-Works only in the debug version. */
-
-ibool
-sync_all_freed(void);
-/*================*/
-/*#####################################################################
-FUNCTION PROTOTYPES FOR DEBUGGING */
-/***********************************************************************
-Prints wait info of the sync system. */
-
-void
-sync_print_wait_info(
-/*=================*/
- FILE* file); /* in: file where to print */
-/***********************************************************************
-Prints info of the sync system. */
-
-void
-sync_print(
-/*=======*/
- FILE* file); /* in: file where to print */
-#ifdef UNIV_DEBUG
-/**********************************************************************
-Checks that the mutex has been initialized. */
-
-ibool
-mutex_validate(
-/*===========*/
- const mutex_t* mutex);
-/**********************************************************************
-Checks that the current thread owns the mutex. Works only
-in the debug version. */
-
-ibool
-mutex_own(
-/*======*/
- /* out: TRUE if owns */
- const mutex_t* mutex); /* in: mutex */
-#endif /* UNIV_DEBUG */
-#ifdef UNIV_SYNC_DEBUG
-/**********************************************************************
-Adds a latch and its level in the thread level array. Allocates the memory
-for the array if called first time for this OS thread. Makes the checks
-against other latch levels stored in the array for this thread. */
-
-void
-sync_thread_add_level(
-/*==================*/
- void* latch, /* in: pointer to a mutex or an rw-lock */
- ulint level); /* in: level in the latching order; if
- SYNC_LEVEL_VARYING, nothing is done */
-/**********************************************************************
-Removes a latch from the thread level array if it is found there. */
-
-ibool
-sync_thread_reset_level(
-/*====================*/
- /* out: TRUE if found from the array; it is no error
- if the latch is not found, as we presently are not
- able to determine the level for every latch
- reservation the program does */
- void* latch); /* in: pointer to a mutex or an rw-lock */
-/**********************************************************************
-Checks that the level array for the current thread is empty. */
-
-ibool
-sync_thread_levels_empty(void);
-/*==========================*/
- /* out: TRUE if empty */
-/**********************************************************************
-Checks that the level array for the current thread is empty. */
-
-ibool
-sync_thread_levels_empty_gen(
-/*=========================*/
- /* out: TRUE if empty except the
- exceptions specified below */
- ibool dict_mutex_allowed); /* in: TRUE if dictionary mutex is
- allowed to be owned by the thread,
- also purge_is_running mutex is
- allowed */
-/**********************************************************************
-Gets the debug information for a reserved mutex. */
-
-void
-mutex_get_debug_info(
-/*=================*/
- mutex_t* mutex, /* in: mutex */
- const char** file_name, /* out: file where requested */
- ulint* line, /* out: line where requested */
- os_thread_id_t* thread_id); /* out: id of the thread which owns
- the mutex */
-/**********************************************************************
-Counts currently reserved mutexes. Works only in the debug version. */
-
-ulint
-mutex_n_reserved(void);
-/*==================*/
-#endif /* UNIV_SYNC_DEBUG */
-/**********************************************************************
-NOT to be used outside this module except in debugging! Gets the value
-of the lock word. */
-UNIV_INLINE
-byte
-mutex_get_lock_word(
-/*================*/
- const mutex_t* mutex); /* in: mutex */
-#ifdef UNIV_SYNC_DEBUG
-/**********************************************************************
-NOT to be used outside this module except in debugging! Gets the waiters
-field in a mutex. */
-UNIV_INLINE
-ulint
-mutex_get_waiters(
-/*==============*/
- /* out: value to set */
- const mutex_t* mutex); /* in: mutex */
-#endif /* UNIV_SYNC_DEBUG */
-
-/*
- LATCHING ORDER WITHIN THE DATABASE
- ==================================
-
-The mutex or latch in the central memory object, for instance, a rollback
-segment object, must be acquired before acquiring the latch or latches to
-the corresponding file data structure. In the latching order below, these
-file page object latches are placed immediately below the corresponding
-central memory object latch or mutex.
-
-Synchronization object Notes
----------------------- -----
-
-Dictionary mutex If we have a pointer to a dictionary
-| object, e.g., a table, it can be
-| accessed without reserving the
-| dictionary mutex. We must have a
-| reservation, a memoryfix, to the
-| appropriate table object in this case,
-| and the table must be explicitly
-| released later.
-V
-Dictionary header
-|
-V
-Secondary index tree latch The tree latch protects also all
-| the B-tree non-leaf pages. These
-V can be read with the page only
-Secondary index non-leaf bufferfixed to save CPU time,
-| no s-latch is needed on the page.
-| Modification of a page requires an
-| x-latch on the page, however. If a
-| thread owns an x-latch to the tree,
-| it is allowed to latch non-leaf pages
-| even after it has acquired the fsp
-| latch.
-V
-Secondary index leaf The latch on the secondary index leaf
-| can be kept while accessing the
-| clustered index, to save CPU time.
-V
-Clustered index tree latch To increase concurrency, the tree
-| latch is usually released when the
-| leaf page latch has been acquired.
-V
-Clustered index non-leaf
-|
-V
-Clustered index leaf
-|
-V
-Transaction system header
-|
-V
-Transaction undo mutex The undo log entry must be written
-| before any index page is modified.
-| Transaction undo mutex is for the undo
-| logs the analogue of the tree latch
-| for a B-tree. If a thread has the
-| trx undo mutex reserved, it is allowed
-| to latch the undo log pages in any
-| order, and also after it has acquired
-| the fsp latch.
-V
-Rollback segment mutex The rollback segment mutex must be
-| reserved, if, e.g., a new page must
-| be added to an undo log. The rollback
-| segment and the undo logs in its
-| history list can be seen as an
-| analogue of a B-tree, and the latches
-| reserved similarly, using a version of
-| lock-coupling. If an undo log must be
-| extended by a page when inserting an
-| undo log record, this corresponds to
-| a pessimistic insert in a B-tree.
-V
-Rollback segment header
-|
-V
-Purge system latch
-|
-V
-Undo log pages If a thread owns the trx undo mutex,
-| or for a log in the history list, the
-| rseg mutex, it is allowed to latch
-| undo log pages in any order, and even
-| after it has acquired the fsp latch.
-| If a thread does not have the
-| appropriate mutex, it is allowed to
-| latch only a single undo log page in
-| a mini-transaction.
-V
-File space management latch If a mini-transaction must allocate
-| several file pages, it can do that,
-| because it keeps the x-latch to the
-| file space management in its memo.
-V
-File system pages
-|
-V
-Kernel mutex If a kernel operation needs a file
-| page allocation, it must reserve the
-| fsp x-latch before acquiring the kernel
-| mutex.
-V
-Search system mutex
-|
-V
-Buffer pool mutex
-|
-V
-Log mutex
-|
-Any other latch
-|
-V
-Memory pool mutex */
-
-/* Latching order levels */
-
-/* User transaction locks are higher than any of the latch levels below:
-no latches are allowed when a thread goes to wait for a normal table
-or row lock! */
-#define SYNC_USER_TRX_LOCK 9999
-#define SYNC_NO_ORDER_CHECK 3000 /* this can be used to suppress
- latching order checking */
-#define SYNC_LEVEL_VARYING 2000 /* Level is varying. Only used with
- buffer pool page locks, which do not
- have a fixed level, but instead have
- their level set after the page is
- locked; see e.g.
- ibuf_bitmap_get_map_page(). */
-#define SYNC_DICT_OPERATION 1001 /* table create, drop, etc. reserve
- this in X-mode, implicit or backround
- operations purge, rollback, foreign
- key checks reserve this in S-mode */
-#define SYNC_DICT 1000
-#define SYNC_DICT_AUTOINC_MUTEX 999
-#define SYNC_DICT_HEADER 995
-#define SYNC_IBUF_HEADER 914
-#define SYNC_IBUF_PESS_INSERT_MUTEX 912
-#define SYNC_IBUF_MUTEX 910 /* ibuf mutex is really below
- SYNC_FSP_PAGE: we assign a value this
- high only to make the program to pass
- the debug checks */
-/*-------------------------------*/
-#define SYNC_INDEX_TREE 900
-#define SYNC_TREE_NODE_NEW 892
-#define SYNC_TREE_NODE_FROM_HASH 891
-#define SYNC_TREE_NODE 890
-#define SYNC_PURGE_SYS 810
-#define SYNC_PURGE_LATCH 800
-#define SYNC_TRX_UNDO 700
-#define SYNC_RSEG 600
-#define SYNC_RSEG_HEADER_NEW 591
-#define SYNC_RSEG_HEADER 590
-#define SYNC_TRX_UNDO_PAGE 570
-#define SYNC_EXTERN_STORAGE 500
-#define SYNC_FSP 400
-#define SYNC_FSP_PAGE 395
-/*------------------------------------- Insert buffer headers */
-/*------------------------------------- ibuf_mutex */
-/*------------------------------------- Insert buffer tree */
-#define SYNC_IBUF_BITMAP_MUTEX 351
-#define SYNC_IBUF_BITMAP 350
-/*------------------------------------- MySQL query cache mutex */
-/*------------------------------------- MySQL binlog mutex */
-/*-------------------------------*/
-#define SYNC_KERNEL 300
-#define SYNC_REC_LOCK 299
-#define SYNC_TRX_LOCK_HEAP 298
-#define SYNC_TRX_SYS_HEADER 290
-#define SYNC_LOG 170
-#define SYNC_RECV 168
-#define SYNC_WORK_QUEUE 161
-#define SYNC_SEARCH_SYS 160 /* NOTE that if we have a memory
- heap that can be extended to the
- buffer pool, its logical level is
- SYNC_SEARCH_SYS, as memory allocation
- can call routines there! Otherwise
- the level is SYNC_MEM_HASH. */
-#define SYNC_BUF_POOL 150
-#define SYNC_BUF_BLOCK 149
-#define SYNC_DOUBLEWRITE 140
-#define SYNC_ANY_LATCH 135
-#define SYNC_THR_LOCAL 133
-#define SYNC_MEM_HASH 131
-#define SYNC_MEM_POOL 130
-
-/* Codes used to designate lock operations */
-#define RW_LOCK_NOT_LOCKED 350
-#define RW_LOCK_EX 351
-#define RW_LOCK_EXCLUSIVE 351
-#define RW_LOCK_SHARED 352
-#define RW_LOCK_WAIT_EX 353
-#define SYNC_MUTEX 354
-
-/* NOTE! The structure appears here only for the compiler to know its size.
-Do not use its fields directly! The structure used in the spin lock
-implementation of a mutual exclusion semaphore. */
-
-struct mutex_struct {
- os_event_t event; /* Used by sync0arr.c for the wait queue */
-
- byte lock_word; /* This byte is the target of the atomic
- test-and-set instruction in Win32 and
- x86 32/64 with GCC 4.1.0 or later version */
-#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER)
-#elif defined(MY_ATOMIC_NOLOCK)
- /* We have my_atomic_* routines that are
- intrinsically atomic, so no need for the
- mutex. */
-#else
- os_fast_mutex_t
- os_fast_mutex; /* In other systems we use this OS mutex
- in place of lock_word */
-#endif
- ulint waiters; /* This ulint is set to 1 if there are (or
- may be) threads waiting in the global wait
- array for this mutex to be released.
- Otherwise, this is 0. */
- UT_LIST_NODE_T(mutex_t) list; /* All allocated mutexes are put into
- a list. Pointers to the next and prev. */
-#ifdef UNIV_SYNC_DEBUG
- const char* file_name; /* File where the mutex was locked */
- ulint line; /* Line where the mutex was locked */
- ulint level; /* Level in the global latching order */
-#endif /* UNIV_SYNC_DEBUG */
- const char* cfile_name;/* File name where mutex created */
- ulint cline; /* Line where created */
-#ifdef UNIV_DEBUG
- os_thread_id_t thread_id; /* The thread id of the thread
- which locked the mutex. */
- ulint magic_n;
-# define MUTEX_MAGIC_N (ulint)979585
-#endif /* UNIV_DEBUG */
-#ifndef UNIV_HOTBACKUP
- ulong count_os_wait; /* count of os_wait */
-# ifdef UNIV_DEBUG
- ulong count_using; /* count of times mutex used */
- ulong count_spin_loop; /* count of spin loops */
- ulong count_spin_rounds; /* count of spin rounds */
- ulong count_os_yield; /* count of os_wait */
- ulonglong lspent_time; /* mutex os_wait timer msec */
- ulonglong lmax_spent_time; /* mutex os_wait timer msec */
- const char* cmutex_name;/* mutex name */
- ulint mutex_type;/* 0 - usual mutex 1 - rw_lock mutex */
-# endif /* UNIV_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
-};
-
-/* The global array of wait cells for implementation of the databases own
-mutexes and read-write locks. Appears here for debugging purposes only! */
-
-extern sync_array_t* sync_primary_wait_array;
-
-/* Constant determining how long spin wait is continued before suspending
-the thread. A value 600 rounds on a 1995 100 MHz Pentium seems to correspond
-to 20 microseconds. */
-
-#define SYNC_SPIN_ROUNDS srv_n_spin_wait_rounds
-
-/* The number of system calls made in this module. Intended for performance
-monitoring. */
-
-extern ib_longlong mutex_exit_count;
-
-#ifdef UNIV_SYNC_DEBUG
-/* Latching order checks start when this is set TRUE */
-extern ibool sync_order_checks_on;
-#endif /* UNIV_SYNC_DEBUG */
-
-/* This variable is set to TRUE when sync_init is called */
-extern ibool sync_initialized;
-
-/* Global list of database mutexes (not OS mutexes) created. */
-typedef UT_LIST_BASE_NODE_T(mutex_t) ut_list_base_node_t;
-extern ut_list_base_node_t mutex_list;
-
-/* Mutex protecting the mutex_list variable */
-extern mutex_t mutex_list_mutex;
-
-
-#ifndef UNIV_NONINL
-#include "sync0sync.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/sync0sync.ic b/storage/innobase/include/sync0sync.ic
deleted file mode 100644
index f5a85e0e7fb..00000000000
--- a/storage/innobase/include/sync0sync.ic
+++ /dev/null
@@ -1,248 +0,0 @@
-/******************************************************
-Mutex, the basic synchronization primitive
-
-(c) 1995 Innobase Oy
-
-Created 9/5/1995 Heikki Tuuri
-*******************************************************/
-
-/**********************************************************************
-Sets the waiters field in a mutex. */
-
-void
-mutex_set_waiters(
-/*==============*/
- mutex_t* mutex, /* in: mutex */
- ulint n); /* in: value to set */
-/**********************************************************************
-Reserves a mutex for the current thread. If the mutex is reserved, the
-function spins a preset time (controlled by SYNC_SPIN_ROUNDS) waiting
-for the mutex before suspending the thread. */
-
-void
-mutex_spin_wait(
-/*============*/
- mutex_t* mutex, /* in: pointer to mutex */
- const char* file_name, /* in: file name where mutex
- requested */
- ulint line); /* in: line where requested */
-#ifdef UNIV_SYNC_DEBUG
-/**********************************************************************
-Sets the debug information for a reserved mutex. */
-
-void
-mutex_set_debug_info(
-/*=================*/
- mutex_t* mutex, /* in: mutex */
- const char* file_name, /* in: file where requested */
- ulint line); /* in: line where requested */
-#endif /* UNIV_SYNC_DEBUG */
-/**********************************************************************
-Releases the threads waiting in the primary wait array for this mutex. */
-
-void
-mutex_signal_object(
-/*================*/
- mutex_t* mutex); /* in: mutex */
-
-/**********************************************************************
-Performs an atomic test-and-set instruction to the lock_word field of a
-mutex. */
-UNIV_INLINE
-byte
-mutex_test_and_set(
-/*===============*/
- /* out: the previous value of lock_word: 0 or
- 1 */
- mutex_t* mutex) /* in: mutex */
-{
-#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER)
- byte res;
- byte* lw; /* assembler code is used to ensure that
- lock_word is loaded from memory */
- ut_ad(mutex);
- ut_ad(sizeof(byte) == 1);
-
- lw = &(mutex->lock_word);
-
- __asm MOV ECX, lw
- __asm MOV EDX, 1
- __asm XCHG DL, BYTE PTR [ECX]
- __asm MOV res, DL
-
- /* The fence below would prevent this thread from
- reading the data structure protected by the mutex
- before the test-and-set operation is committed, but
- the fence is apparently not needed:
-
- In a posting to comp.arch newsgroup (August 10, 1997)
- Andy Glew said that in P6 a LOCKed instruction like
- XCHG establishes a fence with respect to memory reads
- and writes and thus an explicit fence is not
- needed. In P5 he seemed to agree with a previous
- newsgroup poster that LOCKed instructions serialize
- all instruction execution, and, consequently, also
- memory operations. This is confirmed in Intel Software
- Dev. Manual, Vol. 3. */
-
- /* mutex_fence(); */
-
- return(res);
-#elif defined(MY_ATOMIC_NOLOCK)
- return ((byte)my_atomic_swap8(
- (int8 volatile *)&(mutex->lock_word), 1));
-#else
- ibool ret;
-
- ret = os_fast_mutex_trylock(&(mutex->os_fast_mutex));
-
- if (ret == 0) {
- /* We check that os_fast_mutex_trylock does not leak
- and allow race conditions */
- ut_a(mutex->lock_word == 0);
-
- mutex->lock_word = 1;
- }
-
- return((byte)ret);
-#endif
-}
-
-/**********************************************************************
-Performs a reset instruction to the lock_word field of a mutex. This
-instruction also serializes memory operations to the program order. */
-UNIV_INLINE
-void
-mutex_reset_lock_word(
-/*==================*/
- mutex_t* mutex) /* in: mutex */
-{
-#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER)
- byte* lw; /* assembler code is used to ensure that
- lock_word is loaded from memory */
- ut_ad(mutex);
-
- lw = &(mutex->lock_word);
-
- __asm MOV EDX, 0
- __asm MOV ECX, lw
- __asm XCHG DL, BYTE PTR [ECX]
-#elif defined(MY_ATOMIC_NOLOCK)
- /* In theory __sync_lock_release should be used to release the lock.
- Unfortunately, it does not work properly alone. The workaround is
- that more conservative __sync_lock_test_and_set is used instead. */
- (void)my_atomic_swap8((int8 volatile *)&(mutex->lock_word), 0);
-#else
- mutex->lock_word = 0;
-
- os_fast_mutex_unlock(&(mutex->os_fast_mutex));
-#endif
-}
-
-/**********************************************************************
-Gets the value of the lock word. */
-UNIV_INLINE
-byte
-mutex_get_lock_word(
-/*================*/
- const mutex_t* mutex) /* in: mutex */
-{
- const volatile byte* ptr; /* declared volatile to ensure that
- lock_word is loaded from memory */
- ut_ad(mutex);
-
- ptr = &(mutex->lock_word);
-
- return(*ptr);
-}
-
-/**********************************************************************
-Gets the waiters field in a mutex. */
-UNIV_INLINE
-ulint
-mutex_get_waiters(
-/*==============*/
- /* out: value to set */
- const mutex_t* mutex) /* in: mutex */
-{
- const volatile ulint* ptr; /* declared volatile to ensure that
- the value is read from memory */
- ut_ad(mutex);
-
- ptr = &(mutex->waiters);
-
- return(*ptr); /* Here we assume that the read of a single
- word from memory is atomic */
-}
-
-/**********************************************************************
-Unlocks a mutex owned by the current thread. */
-UNIV_INLINE
-void
-mutex_exit(
-/*=======*/
- mutex_t* mutex) /* in: pointer to mutex */
-{
- ut_ad(mutex_own(mutex));
-
- ut_d(mutex->thread_id = (os_thread_id_t) ULINT_UNDEFINED);
-
-#ifdef UNIV_SYNC_DEBUG
- sync_thread_reset_level(mutex);
-#endif
- mutex_reset_lock_word(mutex);
-
- /* A problem: we assume that mutex_reset_lock word
- is a memory barrier, that is when we read the waiters
- field next, the read must be serialized in memory
- after the reset. A speculative processor might
- perform the read first, which could leave a waiting
- thread hanging indefinitely.
-
- Our current solution call every second
- sync_arr_wake_threads_if_sema_free()
- to wake up possible hanging threads if
- they are missed in mutex_signal_object. */
-
- if (mutex_get_waiters(mutex) != 0) {
-
- mutex_signal_object(mutex);
- }
-
-#ifdef UNIV_SYNC_PERF_STAT
- mutex_exit_count++;
-#endif
-}
-
-/**********************************************************************
-Locks a mutex for the current thread. If the mutex is reserved, the function
-spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting for the mutex
-before suspending the thread. */
-UNIV_INLINE
-void
-mutex_enter_func(
-/*=============*/
- mutex_t* mutex, /* in: pointer to mutex */
- const char* file_name, /* in: file name where locked */
- ulint line) /* in: line where locked */
-{
- ut_ad(mutex_validate(mutex));
- ut_ad(!mutex_own(mutex));
-
- /* Note that we do not peek at the value of lock_word before trying
- the atomic test_and_set; we could peek, and possibly save time. */
-
-#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
- mutex->count_using++;
-#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
-
- if (!mutex_test_and_set(mutex)) {
- ut_d(mutex->thread_id = os_thread_get_curr_id());
-#ifdef UNIV_SYNC_DEBUG
- mutex_set_debug_info(mutex, file_name, line);
-#endif
- return; /* Succeeded! */
- }
-
- mutex_spin_wait(mutex, file_name, line);
-}
diff --git a/storage/innobase/include/sync0types.h b/storage/innobase/include/sync0types.h
deleted file mode 100644
index 57478426f25..00000000000
--- a/storage/innobase/include/sync0types.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/******************************************************
-Global types for sync
-
-(c) 1995 Innobase Oy
-
-Created 9/5/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef sync0types_h
-#define sync0types_h
-
-#define mutex_t ib_mutex_t
-typedef struct mutex_struct mutex_t;
-
-
-#endif
diff --git a/storage/innobase/include/thr0loc.h b/storage/innobase/include/thr0loc.h
deleted file mode 100644
index 32e2dc3ae93..00000000000
--- a/storage/innobase/include/thr0loc.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/******************************************************
-The thread local storage
-
-(c) 1995 Innobase Oy
-
-Created 10/5/1995 Heikki Tuuri
-*******************************************************/
-
-/* This module implements storage private to each thread,
-a capability useful in some situations like storing the
-OS handle to the current thread, or its priority. */
-
-#ifndef thr0loc_h
-#define thr0loc_h
-
-#include "univ.i"
-#include "os0thread.h"
-
-/********************************************************************
-Initializes the thread local storage module. */
-
-void
-thr_local_init(void);
-/*================*/
-/***********************************************************************
-Creates a local storage struct for the calling new thread. */
-
-void
-thr_local_create(void);
-/*==================*/
-/***********************************************************************
-Frees the local storage struct for the specified thread. */
-
-void
-thr_local_free(
-/*===========*/
- os_thread_id_t id); /* in: thread id */
-/***********************************************************************
-Gets the slot number in the thread table of a thread. */
-
-ulint
-thr_local_get_slot_no(
-/*==================*/
- /* out: slot number */
- os_thread_id_t id); /* in: thread id of the thread */
-/***********************************************************************
-Sets in the local storage the slot number in the thread table of a thread. */
-
-void
-thr_local_set_slot_no(
-/*==================*/
- os_thread_id_t id, /* in: thread id of the thread */
- ulint slot_no);/* in: slot number */
-/***********************************************************************
-Returns pointer to the 'in_ibuf' field within the current thread local
-storage. */
-
-ibool*
-thr_local_get_in_ibuf_field(void);
-/*=============================*/
- /* out: pointer to the in_ibuf field */
-
-#ifndef UNIV_NONINL
-#include "thr0loc.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/thr0loc.ic b/storage/innobase/include/thr0loc.ic
deleted file mode 100644
index b8b8136180c..00000000000
--- a/storage/innobase/include/thr0loc.ic
+++ /dev/null
@@ -1,7 +0,0 @@
-/******************************************************
-Thread local storage
-
-(c) 1995 Innobase Oy
-
-Created 10/4/1995 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innobase/include/trx0purge.h b/storage/innobase/include/trx0purge.h
deleted file mode 100644
index c4aab91a93a..00000000000
--- a/storage/innobase/include/trx0purge.h
+++ /dev/null
@@ -1,169 +0,0 @@
-/******************************************************
-Purge old versions
-
-(c) 1996 Innobase Oy
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef trx0purge_h
-#define trx0purge_h
-
-#include "univ.i"
-#include "trx0types.h"
-#include "mtr0mtr.h"
-#include "trx0sys.h"
-#include "que0types.h"
-#include "page0page.h"
-#include "usr0sess.h"
-#include "fil0fil.h"
-
-/* The global data structure coordinating a purge */
-extern trx_purge_t* purge_sys;
-
-/* A dummy undo record used as a return value when we have a whole undo log
-which needs no purge */
-extern trx_undo_rec_t trx_purge_dummy_rec;
-
-/************************************************************************
-Calculates the file address of an undo log header when we have the file
-address of its history list node. */
-UNIV_INLINE
-fil_addr_t
-trx_purge_get_log_from_hist(
-/*========================*/
- /* out: file address of the log */
- fil_addr_t node_addr); /* in: file address of the history
- list node of the log */
-/*********************************************************************
-Checks if trx_id is >= purge_view: then it is guaranteed that its update
-undo log still exists in the system. */
-
-ibool
-trx_purge_update_undo_must_exist(
-/*=============================*/
- /* out: TRUE if is sure that it is preserved, also
- if the function returns FALSE, it is possible that
- the undo log still exists in the system */
- dulint trx_id);/* in: transaction id */
-/************************************************************************
-Creates the global purge system control structure and inits the history
-mutex. */
-
-void
-trx_purge_sys_create(void);
-/*======================*/
-/************************************************************************
-Adds the update undo log as the first log in the history list. Removes the
-update undo log segment from the rseg slot if it is too big for reuse. */
-
-void
-trx_purge_add_update_undo_to_history(
-/*=================================*/
- trx_t* trx, /* in: transaction */
- page_t* undo_page, /* in: update undo log header page,
- x-latched */
- mtr_t* mtr); /* in: mtr */
-/************************************************************************
-Fetches the next undo log record from the history list to purge. It must be
-released with the corresponding release function. */
-
-trx_undo_rec_t*
-trx_purge_fetch_next_rec(
-/*=====================*/
- /* out: copy of an undo log record, or
- pointer to the dummy undo log record
- &trx_purge_dummy_rec if the whole undo log
- can skipped in purge; NULL if none left */
- dulint* roll_ptr,/* out: roll pointer to undo record */
- trx_undo_inf_t** cell, /* out: storage cell for the record in the
- purge array */
- mem_heap_t* heap); /* in: memory heap where copied */
-/***********************************************************************
-Releases a reserved purge undo record. */
-
-void
-trx_purge_rec_release(
-/*==================*/
- trx_undo_inf_t* cell); /* in: storage cell */
-/***********************************************************************
-This function runs a purge batch. */
-
-ulint
-trx_purge(void);
-/*===========*/
- /* out: number of undo log pages handled in
- the batch */
-/**********************************************************************
-Prints information of the purge system to stderr. */
-
-void
-trx_purge_sys_print(void);
-/*======================*/
-
-/* The control structure used in the purge operation */
-struct trx_purge_struct{
- ulint state; /* Purge system state */
- sess_t* sess; /* System session running the purge
- query */
- trx_t* trx; /* System transaction running the purge
- query: this trx is not in the trx list
- of the trx system and it never ends */
- que_t* query; /* The query graph which will do the
- parallelized purge operation */
- rw_lock_t latch; /* The latch protecting the purge view.
- A purge operation must acquire an
- x-latch here for the instant at which
- it changes the purge view: an undo
- log operation can prevent this by
- obtaining an s-latch here. */
- read_view_t* view; /* The purge will not remove undo logs
- which are >= this view (purge view) */
- mutex_t mutex; /* Mutex protecting the fields below */
- ulint n_pages_handled;/* Approximate number of undo log
- pages processed in purge */
- ulint handle_limit; /* Target of how many pages to get
- processed in the current purge */
- /*------------------------------*/
- /* The following two fields form the 'purge pointer' which advances
- during a purge, and which is used in history list truncation */
-
- dulint purge_trx_no; /* Purge has advanced past all
- transactions whose number is less
- than this */
- dulint purge_undo_no; /* Purge has advanced past all records
- whose undo number is less than this */
- /*-----------------------------*/
- ibool next_stored; /* TRUE if the info of the next record
- to purge is stored below: if yes, then
- the transaction number and the undo
- number of the record are stored in
- purge_trx_no and purge_undo_no above */
- trx_rseg_t* rseg; /* Rollback segment for the next undo
- record to purge */
- ulint page_no; /* Page number for the next undo
- record to purge, page number of the
- log header, if dummy record */
- ulint offset; /* Page offset for the next undo
- record to purge, 0 if the dummy
- record */
- ulint hdr_page_no; /* Header page of the undo log where
- the next record to purge belongs */
- ulint hdr_offset; /* Header byte offset on the page */
- /*-----------------------------*/
- trx_undo_arr_t* arr; /* Array of transaction numbers and
- undo numbers of the undo records
- currently under processing in purge */
- mem_heap_t* heap; /* Temporary storage used during a
- purge: can be emptied after purge
- completes */
-};
-
-#define TRX_PURGE_ON 1 /* purge operation is running */
-#define TRX_STOP_PURGE 2 /* purge operation is stopped, or
- it should be stopped */
-#ifndef UNIV_NONINL
-#include "trx0purge.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/trx0purge.ic b/storage/innobase/include/trx0purge.ic
deleted file mode 100644
index 9f1c0ed96f8..00000000000
--- a/storage/innobase/include/trx0purge.ic
+++ /dev/null
@@ -1,26 +0,0 @@
-/******************************************************
-Purge old versions
-
-(c) 1996 Innobase Oy
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "trx0undo.h"
-
-/************************************************************************
-Calculates the file address of an undo log header when we have the file
-address of its history list node. */
-UNIV_INLINE
-fil_addr_t
-trx_purge_get_log_from_hist(
-/*========================*/
- /* out: file address of the log */
- fil_addr_t node_addr) /* in: file address of the history
- list node of the log */
-{
- node_addr.boffset -= TRX_UNDO_HISTORY_NODE;
-
- return(node_addr);
-}
-
diff --git a/storage/innobase/include/trx0rec.h b/storage/innobase/include/trx0rec.h
deleted file mode 100644
index 6447b6a2e35..00000000000
--- a/storage/innobase/include/trx0rec.h
+++ /dev/null
@@ -1,303 +0,0 @@
-/******************************************************
-Transaction undo log record
-
-(c) 1996 Innobase Oy
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef trx0rec_h
-#define trx0rec_h
-
-#include "univ.i"
-#include "trx0types.h"
-#include "row0types.h"
-#include "mtr0mtr.h"
-#include "trx0sys.h"
-#include "dict0types.h"
-#include "que0types.h"
-#include "data0data.h"
-#include "rem0types.h"
-
-/***************************************************************************
-Copies the undo record to the heap. */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_rec_copy(
-/*==============*/
- /* out, own: copy of undo log record */
- trx_undo_rec_t* undo_rec, /* in: undo log record */
- mem_heap_t* heap); /* in: heap where copied */
-/**************************************************************************
-Reads the undo log record type. */
-UNIV_INLINE
-ulint
-trx_undo_rec_get_type(
-/*==================*/
- /* out: record type */
- trx_undo_rec_t* undo_rec); /* in: undo log record */
-/**************************************************************************
-Reads from an undo log record the record compiler info. */
-UNIV_INLINE
-ulint
-trx_undo_rec_get_cmpl_info(
-/*=======================*/
- /* out: compiler info */
- trx_undo_rec_t* undo_rec); /* in: undo log record */
-/**************************************************************************
-Returns TRUE if an undo log record contains an extern storage field. */
-UNIV_INLINE
-ibool
-trx_undo_rec_get_extern_storage(
-/*============================*/
- /* out: TRUE if extern */
- trx_undo_rec_t* undo_rec); /* in: undo log record */
-/**************************************************************************
-Reads the undo log record number. */
-UNIV_INLINE
-dulint
-trx_undo_rec_get_undo_no(
-/*=====================*/
- /* out: undo no */
- trx_undo_rec_t* undo_rec); /* in: undo log record */
-/**************************************************************************
-Reads from an undo log record the general parameters. */
-
-byte*
-trx_undo_rec_get_pars(
-/*==================*/
- /* out: remaining part of undo log
- record after reading these values */
- trx_undo_rec_t* undo_rec, /* in: undo log record */
- ulint* type, /* out: undo record type:
- TRX_UNDO_INSERT_REC, ... */
- ulint* cmpl_info, /* out: compiler info, relevant only
- for update type records */
- ibool* updated_extern, /* out: TRUE if we updated an
- externally stored fild */
- dulint* undo_no, /* out: undo log record number */
- dulint* table_id); /* out: table id */
-/***********************************************************************
-Builds a row reference from an undo log record. */
-
-byte*
-trx_undo_rec_get_row_ref(
-/*=====================*/
- /* out: pointer to remaining part of undo
- record */
- byte* ptr, /* in: remaining part of a copy of an undo log
- record, at the start of the row reference;
- NOTE that this copy of the undo log record must
- be preserved as long as the row reference is
- used, as we do NOT copy the data in the
- record! */
- dict_index_t* index, /* in: clustered index */
- dtuple_t** ref, /* out, own: row reference */
- mem_heap_t* heap); /* in: memory heap from which the memory
- needed is allocated */
-/***********************************************************************
-Skips a row reference from an undo log record. */
-
-byte*
-trx_undo_rec_skip_row_ref(
-/*======================*/
- /* out: pointer to remaining part of undo
- record */
- byte* ptr, /* in: remaining part in update undo log
- record, at the start of the row reference */
- dict_index_t* index); /* in: clustered index */
-/**************************************************************************
-Reads from an undo log update record the system field values of the old
-version. */
-
-byte*
-trx_undo_update_rec_get_sys_cols(
-/*=============================*/
- /* out: remaining part of undo log
- record after reading these values */
- byte* ptr, /* in: remaining part of undo log
- record after reading general
- parameters */
- dulint* trx_id, /* out: trx id */
- dulint* roll_ptr, /* out: roll ptr */
- ulint* info_bits); /* out: info bits state */
-/***********************************************************************
-Builds an update vector based on a remaining part of an undo log record. */
-
-byte*
-trx_undo_update_rec_get_update(
-/*===========================*/
- /* out: remaining part of the record,
- NULL if an error detected, which means that
- the record is corrupted */
- byte* ptr, /* in: remaining part in update undo log
- record, after reading the row reference
- NOTE that this copy of the undo log record must
- be preserved as long as the update vector is
- used, as we do NOT copy the data in the
- record! */
- dict_index_t* index, /* in: clustered index */
- ulint type, /* in: TRX_UNDO_UPD_EXIST_REC,
- TRX_UNDO_UPD_DEL_REC, or
- TRX_UNDO_DEL_MARK_REC; in the last case,
- only trx id and roll ptr fields are added to
- the update vector */
- dulint trx_id, /* in: transaction id from this undorecord */
- dulint roll_ptr,/* in: roll pointer from this undo record */
- ulint info_bits,/* in: info bits from this undo record */
- trx_t* trx, /* in: transaction */
- mem_heap_t* heap, /* in: memory heap from which the memory
- needed is allocated */
- upd_t** upd); /* out, own: update vector */
-/***********************************************************************
-Builds a partial row from an update undo log record. It contains the
-columns which occur as ordering in any index of the table. */
-
-byte*
-trx_undo_rec_get_partial_row(
-/*=========================*/
- /* out: pointer to remaining part of undo
- record */
- byte* ptr, /* in: remaining part in update undo log
- record of a suitable type, at the start of
- the stored index columns;
- NOTE that this copy of the undo log record must
- be preserved as long as the partial row is
- used, as we do NOT copy the data in the
- record! */
- dict_index_t* index, /* in: clustered index */
- dtuple_t** row, /* out, own: partial row */
- mem_heap_t* heap); /* in: memory heap from which the memory
- needed is allocated */
-/***************************************************************************
-Writes information to an undo log about an insert, update, or a delete marking
-of a clustered index record. This information is used in a rollback of the
-transaction and in consistent reads that must look to the history of this
-transaction. */
-
-ulint
-trx_undo_report_row_operation(
-/*==========================*/
- /* out: DB_SUCCESS or error code */
- ulint flags, /* in: if BTR_NO_UNDO_LOG_FLAG bit is
- set, does nothing */
- ulint op_type, /* in: TRX_UNDO_INSERT_OP or
- TRX_UNDO_MODIFY_OP */
- que_thr_t* thr, /* in: query thread */
- dict_index_t* index, /* in: clustered index */
- dtuple_t* clust_entry, /* in: in the case of an insert,
- index entry to insert into the
- clustered index, otherwise NULL */
- upd_t* update, /* in: in the case of an update,
- the update vector, otherwise NULL */
- ulint cmpl_info, /* in: compiler info on secondary
- index updates */
- rec_t* rec, /* in: case of an update or delete
- marking, the record in the clustered
- index, otherwise NULL */
- dulint* roll_ptr); /* out: rollback pointer to the
- inserted undo log record,
- ut_dulint_zero if BTR_NO_UNDO_LOG
- flag was specified */
-/**********************************************************************
-Copies an undo record to heap. This function can be called if we know that
-the undo log record exists. */
-
-trx_undo_rec_t*
-trx_undo_get_undo_rec_low(
-/*======================*/
- /* out, own: copy of the record */
- dulint roll_ptr, /* in: roll pointer to record */
- mem_heap_t* heap); /* in: memory heap where copied */
-/**********************************************************************
-Copies an undo record to heap. */
-
-ulint
-trx_undo_get_undo_rec(
-/*==================*/
- /* out: DB_SUCCESS, or
- DB_MISSING_HISTORY if the undo log
- has been truncated and we cannot
- fetch the old version; NOTE: the
- caller must have latches on the
- clustered index page and purge_view */
- dulint roll_ptr, /* in: roll pointer to record */
- dulint trx_id, /* in: id of the trx that generated
- the roll pointer: it points to an
- undo log of this transaction */
- trx_undo_rec_t** undo_rec, /* out, own: copy of the record */
- mem_heap_t* heap); /* in: memory heap where copied */
-/***********************************************************************
-Build a previous version of a clustered index record. This function checks
-that the caller has a latch on the index page of the clustered index record
-and an s-latch on the purge_view. This guarantees that the stack of versions
-is locked. */
-
-ulint
-trx_undo_prev_version_build(
-/*========================*/
- /* out: DB_SUCCESS, or DB_MISSING_HISTORY if
- the previous version is not >= purge_view,
- which means that it may have been removed,
- DB_ERROR if corrupted record */
- rec_t* index_rec,/* in: clustered index record in the
- index tree */
- mtr_t* index_mtr,/* in: mtr which contains the latch to
- index_rec page and purge_view */
- rec_t* rec, /* in: version of a clustered index record */
- dict_index_t* index, /* in: clustered index */
- ulint* offsets,/* in: rec_get_offsets(rec, index) */
- mem_heap_t* heap, /* in: memory heap from which the memory
- needed is allocated */
- rec_t** old_vers);/* out, own: previous version, or NULL if
- rec is the first inserted version, or if
- history data has been deleted */
-/***************************************************************
-Parses a redo log record of adding an undo log record. */
-
-byte*
-trx_undo_parse_add_undo_rec(
-/*========================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page); /* in: page or NULL */
-/***************************************************************
-Parses a redo log record of erasing of an undo page end. */
-
-byte*
-trx_undo_parse_erase_page_end(
-/*==========================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr); /* in: mtr or NULL */
-
-/* Types of an undo log record: these have to be smaller than 16, as the
-compilation info multiplied by 16 is ORed to this value in an undo log
-record */
-#define TRX_UNDO_INSERT_REC 11 /* fresh insert into clustered index */
-#define TRX_UNDO_UPD_EXIST_REC 12 /* update of a non-delete-marked
- record */
-#define TRX_UNDO_UPD_DEL_REC 13 /* update of a delete marked record to
- a not delete marked record; also the
- fields of the record can change */
-#define TRX_UNDO_DEL_MARK_REC 14 /* delete marking of a record; fields
- do not change */
-#define TRX_UNDO_CMPL_INFO_MULT 16 /* compilation info is multiplied by
- this and ORed to the type above */
-#define TRX_UNDO_UPD_EXTERN 128 /* This bit can be ORed to type_cmpl
- to denote that we updated external
- storage fields: used by purge to
- free the external storage */
-
-/* Operation type flags used in trx_undo_report_row_operation */
-#define TRX_UNDO_INSERT_OP 1
-#define TRX_UNDO_MODIFY_OP 2
-
-#ifndef UNIV_NONINL
-#include "trx0rec.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/trx0rec.ic b/storage/innobase/include/trx0rec.ic
deleted file mode 100644
index a1ddc127ec7..00000000000
--- a/storage/innobase/include/trx0rec.ic
+++ /dev/null
@@ -1,86 +0,0 @@
-/******************************************************
-Transaction undo log record
-
-(c) 1996 Innobase Oy
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-/**************************************************************************
-Reads from an undo log record the record type. */
-UNIV_INLINE
-ulint
-trx_undo_rec_get_type(
-/*==================*/
- /* out: record type */
- trx_undo_rec_t* undo_rec) /* in: undo log record */
-{
- return(mach_read_from_1(undo_rec + 2) & (TRX_UNDO_CMPL_INFO_MULT - 1));
-}
-
-/**************************************************************************
-Reads from an undo log record the record compiler info. */
-UNIV_INLINE
-ulint
-trx_undo_rec_get_cmpl_info(
-/*=======================*/
- /* out: compiler info */
- trx_undo_rec_t* undo_rec) /* in: undo log record */
-{
- return(mach_read_from_1(undo_rec + 2) / TRX_UNDO_CMPL_INFO_MULT);
-}
-
-/**************************************************************************
-Returns TRUE if an undo log record contains an extern storage field. */
-UNIV_INLINE
-ibool
-trx_undo_rec_get_extern_storage(
-/*============================*/
- /* out: TRUE if extern */
- trx_undo_rec_t* undo_rec) /* in: undo log record */
-{
- if (mach_read_from_1(undo_rec + 2) & TRX_UNDO_UPD_EXTERN) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/**************************************************************************
-Reads the undo log record number. */
-UNIV_INLINE
-dulint
-trx_undo_rec_get_undo_no(
-/*=====================*/
- /* out: undo no */
- trx_undo_rec_t* undo_rec) /* in: undo log record */
-{
- byte* ptr;
-
- ptr = undo_rec + 3;
-
- return(mach_dulint_read_much_compressed(ptr));
-}
-
-/***************************************************************************
-Copies the undo record to the heap. */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_rec_copy(
-/*==============*/
- /* out, own: copy of undo log record */
- trx_undo_rec_t* undo_rec, /* in: undo log record */
- mem_heap_t* heap) /* in: heap where copied */
-{
- ulint len;
- trx_undo_rec_t* rec_copy;
-
- len = mach_read_from_2(undo_rec) + buf_frame_align(undo_rec)
- - undo_rec;
- rec_copy = mem_heap_alloc(heap, len);
-
- ut_memcpy(rec_copy, undo_rec, len);
-
- return(rec_copy);
-}
diff --git a/storage/innobase/include/trx0roll.h b/storage/innobase/include/trx0roll.h
deleted file mode 100644
index c1eca3d5753..00000000000
--- a/storage/innobase/include/trx0roll.h
+++ /dev/null
@@ -1,314 +0,0 @@
-/******************************************************
-Transaction rollback
-
-(c) 1996 Innobase Oy
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef trx0roll_h
-#define trx0roll_h
-
-#include "univ.i"
-#include "trx0trx.h"
-#include "trx0types.h"
-#include "mtr0mtr.h"
-#include "trx0sys.h"
-
-#define trx_roll_free_all_savepoints(s) trx_roll_savepoints_free((s), NULL)
-
-/***********************************************************************
-Returns a transaction savepoint taken at this point in time. */
-
-trx_savept_t
-trx_savept_take(
-/*============*/
- /* out: savepoint */
- trx_t* trx); /* in: transaction */
-/***********************************************************************
-Creates an undo number array. */
-
-trx_undo_arr_t*
-trx_undo_arr_create(void);
-/*=====================*/
-/***********************************************************************
-Frees an undo number array. */
-
-void
-trx_undo_arr_free(
-/*==============*/
- trx_undo_arr_t* arr); /* in: undo number array */
-/***********************************************************************
-Returns pointer to nth element in an undo number array. */
-UNIV_INLINE
-trx_undo_inf_t*
-trx_undo_arr_get_nth_info(
-/*======================*/
- /* out: pointer to the nth element */
- trx_undo_arr_t* arr, /* in: undo number array */
- ulint n); /* in: position */
-/***************************************************************************
-Tries truncate the undo logs. */
-
-void
-trx_roll_try_truncate(
-/*==================*/
- trx_t* trx); /* in: transaction */
-/************************************************************************
-Pops the topmost record when the two undo logs of a transaction are seen
-as a single stack of records ordered by their undo numbers. Inserts the
-undo number of the popped undo record to the array of currently processed
-undo numbers in the transaction. When the query thread finishes processing
-of this undo record, it must be released with trx_undo_rec_release. */
-
-trx_undo_rec_t*
-trx_roll_pop_top_rec_of_trx(
-/*========================*/
- /* out: undo log record copied to heap, NULL
- if none left, or if the undo number of the
- top record would be less than the limit */
- trx_t* trx, /* in: transaction */
- dulint limit, /* in: least undo number we need */
- dulint* roll_ptr,/* out: roll pointer to undo record */
- mem_heap_t* heap); /* in: memory heap where copied */
-/************************************************************************
-Reserves an undo log record for a query thread to undo. This should be
-called if the query thread gets the undo log record not using the pop
-function above. */
-
-ibool
-trx_undo_rec_reserve(
-/*=================*/
- /* out: TRUE if succeeded */
- trx_t* trx, /* in: transaction */
- dulint undo_no);/* in: undo number of the record */
-/***********************************************************************
-Releases a reserved undo record. */
-
-void
-trx_undo_rec_release(
-/*=================*/
- trx_t* trx, /* in: transaction */
- dulint undo_no);/* in: undo number */
-/*************************************************************************
-Starts a rollback operation. */
-
-void
-trx_rollback(
-/*=========*/
- trx_t* trx, /* in: transaction */
- trx_sig_t* sig, /* in: signal starting the rollback */
- que_thr_t** next_thr);/* in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread */
-/***********************************************************************
-Rollback or clean up transactions which have no user session. If the
-transaction already was committed, then we clean up a possible insert
-undo log. If the transaction was not yet committed, then we roll it back.
-Note: this is done in a background thread. */
-
-os_thread_ret_t
-trx_rollback_or_clean_all_without_sess(
-/*===================================*/
- /* out: a dummy parameter */
- void* arg __attribute__((unused)));
- /* in: a dummy parameter required by
- os_thread_create */
-/********************************************************************
-Finishes a transaction rollback. */
-
-void
-trx_finish_rollback_off_kernel(
-/*===========================*/
- que_t* graph, /* in: undo graph which can now be freed */
- trx_t* trx, /* in: transaction */
- que_thr_t** next_thr);/* in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread; if this parameter is
- NULL, it is ignored */
-/********************************************************************
-Builds an undo 'query' graph for a transaction. The actual rollback is
-performed by executing this query graph like a query subprocedure call.
-The reply about the completion of the rollback will be sent by this
-graph. */
-
-que_t*
-trx_roll_graph_build(
-/*=================*/
- /* out, own: the query graph */
- trx_t* trx); /* in: trx handle */
-/*************************************************************************
-Creates a rollback command node struct. */
-
-roll_node_t*
-roll_node_create(
-/*=============*/
- /* out, own: rollback node struct */
- mem_heap_t* heap); /* in: mem heap where created */
-/***************************************************************
-Performs an execution step for a rollback command node in a query graph. */
-
-que_thr_t*
-trx_rollback_step(
-/*==============*/
- /* out: query thread to run next, or NULL */
- que_thr_t* thr); /* in: query thread */
-/***********************************************************************
-Rollback a transaction used in MySQL. */
-
-int
-trx_rollback_for_mysql(
-/*===================*/
- /* out: error code or DB_SUCCESS */
- trx_t* trx); /* in: transaction handle */
-/***********************************************************************
-Rollback the latest SQL statement for MySQL. */
-
-int
-trx_rollback_last_sql_stat_for_mysql(
-/*=================================*/
- /* out: error code or DB_SUCCESS */
- trx_t* trx); /* in: transaction handle */
-/***********************************************************************
-Rollback a transaction used in MySQL. */
-
-int
-trx_general_rollback_for_mysql(
-/*===========================*/
- /* out: error code or DB_SUCCESS */
- trx_t* trx, /* in: transaction handle */
- ibool partial,/* in: TRUE if partial rollback requested */
- trx_savept_t* savept);/* in: pointer to savepoint undo number, if
- partial rollback requested */
-/***********************************************************************
-Rolls back a transaction back to a named savepoint. Modifications after the
-savepoint are undone but InnoDB does NOT release the corresponding locks
-which are stored in memory. If a lock is 'implicit', that is, a new inserted
-row holds a lock where the lock information is carried by the trx id stored in
-the row, these locks are naturally released in the rollback. Savepoints which
-were set after this savepoint are deleted. */
-
-ulint
-trx_rollback_to_savepoint_for_mysql(
-/*================================*/
- /* out: if no savepoint
- of the name found then
- DB_NO_SAVEPOINT,
- otherwise DB_SUCCESS */
- trx_t* trx, /* in: transaction handle */
- const char* savepoint_name, /* in: savepoint name */
- ib_longlong* mysql_binlog_cache_pos);/* out: the MySQL binlog cache
- position corresponding to this
- savepoint; MySQL needs this
- information to remove the
- binlog entries of the queries
- executed after the savepoint */
-/***********************************************************************
-Creates a named savepoint. If the transaction is not yet started, starts it.
-If there is already a savepoint of the same name, this call erases that old
-savepoint and replaces it with a new. Savepoints are deleted in a transaction
-commit or rollback. */
-
-ulint
-trx_savepoint_for_mysql(
-/*====================*/
- /* out: always DB_SUCCESS */
- trx_t* trx, /* in: transaction handle */
- const char* savepoint_name, /* in: savepoint name */
- ib_longlong binlog_cache_pos); /* in: MySQL binlog cache
- position corresponding to this
- connection at the time of the
- savepoint */
-
-/***********************************************************************
-Releases a named savepoint. Savepoints which
-were set after this savepoint are deleted. */
-
-ulint
-trx_release_savepoint_for_mysql(
-/*============================*/
- /* out: if no savepoint
- of the name found then
- DB_NO_SAVEPOINT,
- otherwise DB_SUCCESS */
- trx_t* trx, /* in: transaction handle */
- const char* savepoint_name); /* in: savepoint name */
-
-/***********************************************************************
-Frees a single savepoint struct. */
-
-void
-trx_roll_savepoint_free(
-/*=====================*/
- trx_t* trx, /* in: transaction handle */
- trx_named_savept_t* savep); /* in: savepoint to free */
-
-/***********************************************************************
-Frees savepoint structs starting from savep, if savep == NULL then
-free all savepoints. */
-
-void
-trx_roll_savepoints_free(
-/*=====================*/
- trx_t* trx, /* in: transaction handle */
- trx_named_savept_t* savep); /* in: free all savepoints > this one;
- if this is NULL, free all savepoints
- of trx */
-
-extern sess_t* trx_dummy_sess;
-
-/* A cell in the array used during a rollback and a purge */
-struct trx_undo_inf_struct{
- dulint trx_no; /* transaction number: not defined during
- a rollback */
- dulint undo_no; /* undo number of an undo record */
- ibool in_use; /* TRUE if the cell is in use */
-};
-
-/* During a rollback and a purge, undo numbers of undo records currently being
-processed are stored in this array */
-
-struct trx_undo_arr_struct{
- ulint n_cells; /* number of cells in the array */
- ulint n_used; /* number of cells currently in use */
- trx_undo_inf_t* infos; /* the array of undo infos */
- mem_heap_t* heap; /* memory heap from which allocated */
-};
-
-/* Rollback command node in a query graph */
-struct roll_node_struct{
- que_common_t common; /* node type: QUE_NODE_ROLLBACK */
- ulint state; /* node execution state */
- ibool partial;/* TRUE if we want a partial rollback */
- trx_savept_t savept; /* savepoint to which to roll back, in the
- case of a partial rollback */
-};
-
-/* A savepoint set with SQL's "SAVEPOINT savepoint_id" command */
-struct trx_named_savept_struct{
- char* name; /* savepoint name */
- trx_savept_t savept; /* the undo number corresponding to
- the savepoint */
- ib_longlong mysql_binlog_cache_pos;
- /* the MySQL binlog cache position
- corresponding to this savepoint, not
- defined if the MySQL binlogging is not
- enabled */
- UT_LIST_NODE_T(trx_named_savept_t)
- trx_savepoints; /* the list of savepoints of a
- transaction */
-};
-
-/* Rollback node states */
-#define ROLL_NODE_SEND 1
-#define ROLL_NODE_WAIT 2
-
-#ifndef UNIV_NONINL
-#include "trx0roll.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/trx0roll.ic b/storage/innobase/include/trx0roll.ic
deleted file mode 100644
index dfde83ac478..00000000000
--- a/storage/innobase/include/trx0roll.ic
+++ /dev/null
@@ -1,23 +0,0 @@
-/******************************************************
-Transaction rollback
-
-(c) 1996 Innobase Oy
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-/***********************************************************************
-Returns pointer to nth element in an undo number array. */
-UNIV_INLINE
-trx_undo_inf_t*
-trx_undo_arr_get_nth_info(
-/*======================*/
- /* out: pointer to the nth element */
- trx_undo_arr_t* arr, /* in: undo number array */
- ulint n) /* in: position */
-{
- ut_ad(arr);
- ut_ad(n < arr->n_cells);
-
- return(arr->infos + n);
-}
diff --git a/storage/innobase/include/trx0rseg.h b/storage/innobase/include/trx0rseg.h
deleted file mode 100644
index 46ba010bd1d..00000000000
--- a/storage/innobase/include/trx0rseg.h
+++ /dev/null
@@ -1,193 +0,0 @@
-/******************************************************
-Rollback segment
-
-(c) 1996 Innobase Oy
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef trx0rseg_h
-#define trx0rseg_h
-
-#include "univ.i"
-#include "trx0types.h"
-#include "trx0sys.h"
-
-/**********************************************************************
-Gets a rollback segment header. */
-UNIV_INLINE
-trx_rsegf_t*
-trx_rsegf_get(
-/*==========*/
- /* out: rollback segment header, page
- x-latched */
- ulint space, /* in: space where placed */
- ulint page_no, /* in: page number of the header */
- mtr_t* mtr); /* in: mtr */
-/**********************************************************************
-Gets a newly created rollback segment header. */
-UNIV_INLINE
-trx_rsegf_t*
-trx_rsegf_get_new(
-/*==============*/
- /* out: rollback segment header, page
- x-latched */
- ulint space, /* in: space where placed */
- ulint page_no, /* in: page number of the header */
- mtr_t* mtr); /* in: mtr */
-/*******************************************************************
-Gets the file page number of the nth undo log slot. */
-UNIV_INLINE
-ulint
-trx_rsegf_get_nth_undo(
-/*===================*/
- /* out: page number of the undo log segment */
- trx_rsegf_t* rsegf, /* in: rollback segment header */
- ulint n, /* in: index of slot */
- mtr_t* mtr); /* in: mtr */
-/*******************************************************************
-Sets the file page number of the nth undo log slot. */
-UNIV_INLINE
-void
-trx_rsegf_set_nth_undo(
-/*===================*/
- trx_rsegf_t* rsegf, /* in: rollback segment header */
- ulint n, /* in: index of slot */
- ulint page_no,/* in: page number of the undo log segment */
- mtr_t* mtr); /* in: mtr */
-/********************************************************************
-Looks for a free slot for an undo log segment. */
-UNIV_INLINE
-ulint
-trx_rsegf_undo_find_free(
-/*=====================*/
- /* out: slot index or ULINT_UNDEFINED if not
- found */
- trx_rsegf_t* rsegf, /* in: rollback segment header */
- mtr_t* mtr); /* in: mtr */
-/**********************************************************************
-Looks for a rollback segment, based on the rollback segment id. */
-
-trx_rseg_t*
-trx_rseg_get_on_id(
-/*===============*/
- /* out: rollback segment */
- ulint id); /* in: rollback segment id */
-/********************************************************************
-Creates a rollback segment header. This function is called only when
-a new rollback segment is created in the database. */
-
-ulint
-trx_rseg_header_create(
-/*===================*/
- /* out: page number of the created segment,
- FIL_NULL if fail */
- ulint space, /* in: space id */
- ulint max_size, /* in: max size in pages */
- ulint* slot_no, /* out: rseg id == slot number in trx sys */
- mtr_t* mtr); /* in: mtr */
-/*************************************************************************
-Creates the memory copies for rollback segments and initializes the
-rseg list and array in trx_sys at a database startup. */
-
-void
-trx_rseg_list_and_array_init(
-/*=========================*/
- trx_sysf_t* sys_header, /* in: trx system header */
- mtr_t* mtr); /* in: mtr */
-/********************************************************************
-Creates a new rollback segment to the database. */
-
-trx_rseg_t*
-trx_rseg_create(
-/*============*/
- /* out: the created segment object, NULL if
- fail */
- ulint space, /* in: space id */
- ulint max_size, /* in: max size in pages */
- ulint* id, /* out: rseg id */
- mtr_t* mtr); /* in: mtr */
-
-
-/* Number of undo log slots in a rollback segment file copy */
-#define TRX_RSEG_N_SLOTS 1024
-
-/* Maximum number of transactions supported by a single rollback segment */
-#define TRX_RSEG_MAX_N_TRXS (TRX_RSEG_N_SLOTS / 2)
-
-/* The rollback segment memory object */
-struct trx_rseg_struct{
- /*--------------------------------------------------------*/
- ulint id; /* rollback segment id == the index of
- its slot in the trx system file copy */
- mutex_t mutex; /* mutex protecting the fields in this
- struct except id; NOTE that the latching
- order must always be kernel mutex ->
- rseg mutex */
- ulint space; /* space where the rollback segment is
- header is placed */
- ulint page_no;/* page number of the rollback segment
- header */
- ulint max_size;/* maximum allowed size in pages */
- ulint curr_size;/* current size in pages */
- /*--------------------------------------------------------*/
- /* Fields for update undo logs */
- UT_LIST_BASE_NODE_T(trx_undo_t) update_undo_list;
- /* List of update undo logs */
- UT_LIST_BASE_NODE_T(trx_undo_t) update_undo_cached;
- /* List of update undo log segments
- cached for fast reuse */
- /*--------------------------------------------------------*/
- /* Fields for insert undo logs */
- UT_LIST_BASE_NODE_T(trx_undo_t) insert_undo_list;
- /* List of insert undo logs */
- UT_LIST_BASE_NODE_T(trx_undo_t) insert_undo_cached;
- /* List of insert undo log segments
- cached for fast reuse */
- /*--------------------------------------------------------*/
- ulint last_page_no; /* Page number of the last not yet
- purged log header in the history list;
- FIL_NULL if all list purged */
- ulint last_offset; /* Byte offset of the last not yet
- purged log header */
- dulint last_trx_no; /* Transaction number of the last not
- yet purged log */
- ibool last_del_marks; /* TRUE if the last not yet purged log
- needs purging */
- /*--------------------------------------------------------*/
- UT_LIST_NODE_T(trx_rseg_t) rseg_list;
- /* the list of the rollback segment
- memory objects */
-};
-
-/* Undo log segment slot in a rollback segment header */
-/*-------------------------------------------------------------*/
-#define TRX_RSEG_SLOT_PAGE_NO 0 /* Page number of the header page of
- an undo log segment */
-/*-------------------------------------------------------------*/
-/* Slot size */
-#define TRX_RSEG_SLOT_SIZE 4
-
-/* The offset of the rollback segment header on its page */
-#define TRX_RSEG FSEG_PAGE_DATA
-
-/* Transaction rollback segment header */
-/*-------------------------------------------------------------*/
-#define TRX_RSEG_MAX_SIZE 0 /* Maximum allowed size for rollback
- segment in pages */
-#define TRX_RSEG_HISTORY_SIZE 4 /* Number of file pages occupied
- by the logs in the history list */
-#define TRX_RSEG_HISTORY 8 /* The update undo logs for committed
- transactions */
-#define TRX_RSEG_FSEG_HEADER (8 + FLST_BASE_NODE_SIZE)
- /* Header for the file segment where
- this page is placed */
-#define TRX_RSEG_UNDO_SLOTS (8 + FLST_BASE_NODE_SIZE + FSEG_HEADER_SIZE)
- /* Undo log segment slots */
-/*-------------------------------------------------------------*/
-
-#ifndef UNIV_NONINL
-#include "trx0rseg.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/trx0rseg.ic b/storage/innobase/include/trx0rseg.ic
deleted file mode 100644
index eb1893587a6..00000000000
--- a/storage/innobase/include/trx0rseg.ic
+++ /dev/null
@@ -1,126 +0,0 @@
-/******************************************************
-Rollback segment
-
-(c) 1996 Innobase Oy
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "srv0srv.h"
-
-/**********************************************************************
-Gets a rollback segment header. */
-UNIV_INLINE
-trx_rsegf_t*
-trx_rsegf_get(
-/*==========*/
- /* out: rollback segment header, page
- x-latched */
- ulint space, /* in: space where placed */
- ulint page_no, /* in: page number of the header */
- mtr_t* mtr) /* in: mtr */
-{
- trx_rsegf_t* header;
-
- header = TRX_RSEG + buf_page_get(space, page_no, RW_X_LATCH, mtr);
-
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(header, SYNC_RSEG_HEADER);
-#endif /* UNIV_SYNC_DEBUG */
-
- return(header);
-}
-
-/**********************************************************************
-Gets a newly created rollback segment header. */
-UNIV_INLINE
-trx_rsegf_t*
-trx_rsegf_get_new(
-/*==============*/
- /* out: rollback segment header, page
- x-latched */
- ulint space, /* in: space where placed */
- ulint page_no, /* in: page number of the header */
- mtr_t* mtr) /* in: mtr */
-{
- trx_rsegf_t* header;
-
- header = TRX_RSEG + buf_page_get(space, page_no, RW_X_LATCH, mtr);
-
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(header, SYNC_RSEG_HEADER_NEW);
-#endif /* UNIV_SYNC_DEBUG */
-
- return(header);
-}
-
-/*******************************************************************
-Gets the file page number of the nth undo log slot. */
-UNIV_INLINE
-ulint
-trx_rsegf_get_nth_undo(
-/*===================*/
- /* out: page number of the undo log segment */
- trx_rsegf_t* rsegf, /* in: rollback segment header */
- ulint n, /* in: index of slot */
- mtr_t* mtr) /* in: mtr */
-{
- if (UNIV_UNLIKELY(n >= TRX_RSEG_N_SLOTS)) {
- fprintf(stderr,
- "InnoDB: Error: trying to get slot %lu of rseg\n",
- (ulong) n);
- ut_error;
- }
-
- return(mtr_read_ulint(rsegf + TRX_RSEG_UNDO_SLOTS
- + n * TRX_RSEG_SLOT_SIZE, MLOG_4BYTES, mtr));
-}
-
-/*******************************************************************
-Sets the file page number of the nth undo log slot. */
-UNIV_INLINE
-void
-trx_rsegf_set_nth_undo(
-/*===================*/
- trx_rsegf_t* rsegf, /* in: rollback segment header */
- ulint n, /* in: index of slot */
- ulint page_no,/* in: page number of the undo log segment */
- mtr_t* mtr) /* in: mtr */
-{
- if (UNIV_UNLIKELY(n >= TRX_RSEG_N_SLOTS)) {
- fprintf(stderr,
- "InnoDB: Error: trying to set slot %lu of rseg\n",
- (ulong) n);
- ut_error;
- }
-
- mlog_write_ulint(rsegf + TRX_RSEG_UNDO_SLOTS + n * TRX_RSEG_SLOT_SIZE,
- page_no, MLOG_4BYTES, mtr);
-}
-
-/********************************************************************
-Looks for a free slot for an undo log segment. */
-UNIV_INLINE
-ulint
-trx_rsegf_undo_find_free(
-/*=====================*/
- /* out: slot index or ULINT_UNDEFINED if not
- found */
- trx_rsegf_t* rsegf, /* in: rollback segment header */
- mtr_t* mtr) /* in: mtr */
-{
- ulint i;
- ulint page_no;
-
- for (i = 0; i < TRX_RSEG_N_SLOTS; i++) {
-
- page_no = trx_rsegf_get_nth_undo(rsegf, i, mtr);
-
- if (page_no == FIL_NULL) {
-
- return(i);
- }
- }
-
- return(ULINT_UNDEFINED);
-}
diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h
deleted file mode 100644
index a8da5cd51a3..00000000000
--- a/storage/innobase/include/trx0sys.h
+++ /dev/null
@@ -1,453 +0,0 @@
-/******************************************************
-Transaction system
-
-(c) 1996 Innobase Oy
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef trx0sys_h
-#define trx0sys_h
-
-#include "univ.i"
-
-#include "trx0types.h"
-#include "mtr0mtr.h"
-#include "mtr0log.h"
-#include "ut0byte.h"
-#include "mem0mem.h"
-#include "sync0sync.h"
-#include "ut0lst.h"
-#include "buf0buf.h"
-#include "fil0fil.h"
-#include "fut0lst.h"
-#include "fsp0fsp.h"
-#include "read0types.h"
-
-/* In a MySQL replication slave, in crash recovery we store the master log
-file name and position here. We have successfully got the updates to InnoDB
-up to this position. If .._pos is -1, it means no crash recovery was needed,
-or there was no master log position info inside InnoDB. */
-
-extern char trx_sys_mysql_master_log_name[];
-extern ib_longlong trx_sys_mysql_master_log_pos;
-
-/* If this MySQL server uses binary logging, after InnoDB has been inited
-and if it has done a crash recovery, we store the binlog file name and position
-here. If .._pos is -1, it means there was no binlog position info inside
-InnoDB. */
-
-extern char trx_sys_mysql_bin_log_name[];
-extern ib_longlong trx_sys_mysql_bin_log_pos;
-
-/* The transaction system */
-extern trx_sys_t* trx_sys;
-
-/* Doublewrite system */
-extern trx_doublewrite_t* trx_doublewrite;
-extern ibool trx_doublewrite_must_reset_space_ids;
-extern ibool trx_sys_multiple_tablespace_format;
-
-/********************************************************************
-Creates the doublewrite buffer to a new InnoDB installation. The header of the
-doublewrite buffer is placed on the trx system header page. */
-
-void
-trx_sys_create_doublewrite_buf(void);
-/*================================*/
-/********************************************************************
-At a database startup initializes the doublewrite buffer memory structure if
-we already have a doublewrite buffer created in the data files. If we are
-upgrading to an InnoDB version which supports multiple tablespaces, then this
-function performs the necessary update operations. If we are in a crash
-recovery, this function uses a possible doublewrite buffer to restore
-half-written pages in the data files. */
-
-void
-trx_sys_doublewrite_init_or_restore_pages(
-/*======================================*/
- ibool restore_corrupt_pages);
-/********************************************************************
-Marks the trx sys header when we have successfully upgraded to the >= 4.1.x
-multiple tablespace format. */
-
-void
-trx_sys_mark_upgraded_to_multiple_tablespaces(void);
-/*===============================================*/
-/********************************************************************
-Determines if a page number is located inside the doublewrite buffer. */
-
-ibool
-trx_doublewrite_page_inside(
-/*========================*/
- /* out: TRUE if the location is inside
- the two blocks of the doublewrite buffer */
- ulint page_no); /* in: page number */
-/*******************************************************************
-Checks if a page address is the trx sys header page. */
-UNIV_INLINE
-ibool
-trx_sys_hdr_page(
-/*=============*/
- /* out: TRUE if trx sys header page */
- ulint space, /* in: space */
- ulint page_no);/* in: page number */
-/*********************************************************************
-Creates and initializes the central memory structures for the transaction
-system. This is called when the database is started. */
-
-void
-trx_sys_init_at_db_start(void);
-/*==========================*/
-/*********************************************************************
-Creates and initializes the transaction system at the database creation. */
-
-void
-trx_sys_create(void);
-/*================*/
-/********************************************************************
-Looks for a free slot for a rollback segment in the trx system file copy. */
-
-ulint
-trx_sysf_rseg_find_free(
-/*====================*/
- /* out: slot index or ULINT_UNDEFINED
- if not found */
- mtr_t* mtr); /* in: mtr */
-/*******************************************************************
-Gets the pointer in the nth slot of the rseg array. */
-UNIV_INLINE
-trx_rseg_t*
-trx_sys_get_nth_rseg(
-/*=================*/
- /* out: pointer to rseg object, NULL if slot
- not in use */
- trx_sys_t* sys, /* in: trx system */
- ulint n); /* in: index of slot */
-/*******************************************************************
-Sets the pointer in the nth slot of the rseg array. */
-UNIV_INLINE
-void
-trx_sys_set_nth_rseg(
-/*=================*/
- trx_sys_t* sys, /* in: trx system */
- ulint n, /* in: index of slot */
- trx_rseg_t* rseg); /* in: pointer to rseg object, NULL if slot
- not in use */
-/**************************************************************************
-Gets a pointer to the transaction system file copy and x-locks its page. */
-UNIV_INLINE
-trx_sysf_t*
-trx_sysf_get(
-/*=========*/
- /* out: pointer to system file copy, page x-locked */
- mtr_t* mtr); /* in: mtr */
-/*********************************************************************
-Gets the space of the nth rollback segment slot in the trx system
-file copy. */
-UNIV_INLINE
-ulint
-trx_sysf_rseg_get_space(
-/*====================*/
- /* out: space id */
- trx_sysf_t* sys_header, /* in: trx sys file copy */
- ulint i, /* in: slot index == rseg id */
- mtr_t* mtr); /* in: mtr */
-/*********************************************************************
-Gets the page number of the nth rollback segment slot in the trx system
-file copy. */
-UNIV_INLINE
-ulint
-trx_sysf_rseg_get_page_no(
-/*======================*/
- /* out: page number, FIL_NULL
- if slot unused */
- trx_sysf_t* sys_header, /* in: trx sys file copy */
- ulint i, /* in: slot index == rseg id */
- mtr_t* mtr); /* in: mtr */
-/*********************************************************************
-Sets the space id of the nth rollback segment slot in the trx system
-file copy. */
-UNIV_INLINE
-void
-trx_sysf_rseg_set_space(
-/*====================*/
- trx_sysf_t* sys_header, /* in: trx sys file copy */
- ulint i, /* in: slot index == rseg id */
- ulint space, /* in: space id */
- mtr_t* mtr); /* in: mtr */
-/*********************************************************************
-Sets the page number of the nth rollback segment slot in the trx system
-file copy. */
-UNIV_INLINE
-void
-trx_sysf_rseg_set_page_no(
-/*======================*/
- trx_sysf_t* sys_header, /* in: trx sys file copy */
- ulint i, /* in: slot index == rseg id */
- ulint page_no, /* in: page number, FIL_NULL if
- the slot is reset to unused */
- mtr_t* mtr); /* in: mtr */
-/*********************************************************************
-Allocates a new transaction id. */
-UNIV_INLINE
-dulint
-trx_sys_get_new_trx_id(void);
-/*========================*/
- /* out: new, allocated trx id */
-/*********************************************************************
-Allocates a new transaction number. */
-UNIV_INLINE
-dulint
-trx_sys_get_new_trx_no(void);
-/*========================*/
- /* out: new, allocated trx number */
-/*********************************************************************
-Writes a trx id to an index page. In case that the id size changes in
-some future version, this function should be used instead of
-mach_write_... */
-UNIV_INLINE
-void
-trx_write_trx_id(
-/*=============*/
- byte* ptr, /* in: pointer to memory where written */
- dulint id); /* in: id */
-/*********************************************************************
-Reads a trx id from an index page. In case that the id size changes in
-some future version, this function should be used instead of
-mach_read_... */
-UNIV_INLINE
-dulint
-trx_read_trx_id(
-/*============*/
- /* out: id */
- byte* ptr); /* in: pointer to memory from where to read */
-/********************************************************************
-Looks for the trx handle with the given id in trx_list. */
-UNIV_INLINE
-trx_t*
-trx_get_on_id(
-/*==========*/
- /* out: the trx handle or NULL if not found */
- dulint trx_id); /* in: trx id to search for */
-/********************************************************************
-Returns the minumum trx id in trx list. This is the smallest id for which
-the trx can possibly be active. (But, you must look at the trx->conc_state to
-find out if the minimum trx id transaction itself is active, or already
-committed.) */
-UNIV_INLINE
-dulint
-trx_list_get_min_trx_id(void);
-/*=========================*/
- /* out: the minimum trx id, or trx_sys->max_trx_id
- if the trx list is empty */
-/********************************************************************
-Checks if a transaction with the given id is active. */
-UNIV_INLINE
-ibool
-trx_is_active(
-/*==========*/
- /* out: TRUE if active */
- dulint trx_id);/* in: trx id of the transaction */
-/********************************************************************
-Checks that trx is in the trx list. */
-
-ibool
-trx_in_trx_list(
-/*============*/
- /* out: TRUE if is in */
- trx_t* in_trx);/* in: trx */
-/*********************************************************************
-Updates the offset information about the end of the MySQL binlog entry
-which corresponds to the transaction just being committed. In a MySQL
-replication slave updates the latest master binlog position up to which
-replication has proceeded. */
-
-void
-trx_sys_update_mysql_binlog_offset(
-/*===============================*/
- const char* file_name,/* in: MySQL log file name */
- ib_longlong offset, /* in: position in that log file */
- ulint field, /* in: offset of the MySQL log info field in
- the trx sys header */
- mtr_t* mtr); /* in: mtr */
-/*********************************************************************
-Prints to stderr the MySQL binlog offset info in the trx system header if
-the magic number shows it valid. */
-
-void
-trx_sys_print_mysql_binlog_offset(void);
-/*===================================*/
-#ifdef UNIV_HOTBACKUP
-/*********************************************************************
-Prints to stderr the MySQL binlog info in the system header if the
-magic number shows it valid. */
-
-void
-trx_sys_print_mysql_binlog_offset_from_page(
-/*========================================*/
- byte* page); /* in: buffer containing the trx system header page,
- i.e., page number TRX_SYS_PAGE_NO in the tablespace */
-#endif /* UNIV_HOTBACKUP */
-/*********************************************************************
-Prints to stderr the MySQL master log offset info in the trx system header if
-the magic number shows it valid. */
-
-void
-trx_sys_print_mysql_master_log_pos(void);
-/*====================================*/
-
-/* The automatically created system rollback segment has this id */
-#define TRX_SYS_SYSTEM_RSEG_ID 0
-
-/* Space id and page no where the trx system file copy resides */
-#define TRX_SYS_SPACE 0 /* the SYSTEM tablespace */
-#define TRX_SYS_PAGE_NO FSP_TRX_SYS_PAGE_NO
-
-/* The offset of the transaction system header on the page */
-#define TRX_SYS FSEG_PAGE_DATA
-
-/* Transaction system header */
-/*-------------------------------------------------------------*/
-#define TRX_SYS_TRX_ID_STORE 0 /* the maximum trx id or trx number
- modulo TRX_SYS_TRX_ID_UPDATE_MARGIN
- written to a file page by any
- transaction; the assignment of
- transaction ids continues from this
- number rounded up by .._MARGIN plus
- .._MARGIN when the database is
- started */
-#define TRX_SYS_FSEG_HEADER 8 /* segment header for the tablespace
- segment the trx system is created
- into */
-#define TRX_SYS_RSEGS (8 + FSEG_HEADER_SIZE)
- /* the start of the array of rollback
- segment specification slots */
-/*-------------------------------------------------------------*/
-
-/* Max number of rollback segments: the number of segment specification slots
-in the transaction system array; rollback segment id must fit in one byte,
-therefore 256; each slot is currently 8 bytes in size */
-#define TRX_SYS_N_RSEGS 256
-
-#define TRX_SYS_MYSQL_LOG_NAME_LEN 512
-#define TRX_SYS_MYSQL_LOG_MAGIC_N 873422344
-
-/* The offset of the MySQL replication info in the trx system header;
-this contains the same fields as TRX_SYS_MYSQL_LOG_INFO below */
-#define TRX_SYS_MYSQL_MASTER_LOG_INFO (UNIV_PAGE_SIZE - 2000)
-
-/* The offset of the MySQL binlog offset info in the trx system header */
-#define TRX_SYS_MYSQL_LOG_INFO (UNIV_PAGE_SIZE - 1000)
-#define TRX_SYS_MYSQL_LOG_MAGIC_N_FLD 0 /* magic number which shows
- if we have valid data in the
- MySQL binlog info; the value
- is ..._MAGIC_N if yes */
-#define TRX_SYS_MYSQL_LOG_OFFSET_HIGH 4 /* high 4 bytes of the offset
- within that file */
-#define TRX_SYS_MYSQL_LOG_OFFSET_LOW 8 /* low 4 bytes of the offset
- within that file */
-#define TRX_SYS_MYSQL_LOG_NAME 12 /* MySQL log file name */
-
-/* The offset of the doublewrite buffer header on the trx system header page */
-#define TRX_SYS_DOUBLEWRITE (UNIV_PAGE_SIZE - 200)
-/*-------------------------------------------------------------*/
-#define TRX_SYS_DOUBLEWRITE_FSEG 0 /* fseg header of the fseg
- containing the doublewrite
- buffer */
-#define TRX_SYS_DOUBLEWRITE_MAGIC FSEG_HEADER_SIZE
- /* 4-byte magic number which
- shows if we already have
- created the doublewrite
- buffer */
-#define TRX_SYS_DOUBLEWRITE_BLOCK1 (4 + FSEG_HEADER_SIZE)
- /* page number of the
- first page in the first
- sequence of 64
- (= FSP_EXTENT_SIZE) consecutive
- pages in the doublewrite
- buffer */
-#define TRX_SYS_DOUBLEWRITE_BLOCK2 (8 + FSEG_HEADER_SIZE)
- /* page number of the
- first page in the second
- sequence of 64 consecutive
- pages in the doublewrite
- buffer */
-#define TRX_SYS_DOUBLEWRITE_REPEAT 12 /* we repeat the above 3
- numbers so that if the trx
- sys header is half-written
- to disk, we still may be able
- to recover the information */
-#define TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED (24 + FSEG_HEADER_SIZE)
- /* If this is not yet set to
- .._N, we must reset the
- doublewrite buffer, because
- starting from 4.1.x the space
- id of a data page is stored to
- FIL_PAGE_ARCH_LOG_NO_OR_SPACE_NO */
-/*-------------------------------------------------------------*/
-#define TRX_SYS_DOUBLEWRITE_MAGIC_N 536853855
-#define TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N 1783657386
-
-
-#define TRX_SYS_DOUBLEWRITE_BLOCK_SIZE FSP_EXTENT_SIZE
-
-/* Doublewrite control struct */
-struct trx_doublewrite_struct{
- mutex_t mutex; /* mutex protecting the first_free field and
- write_buf */
- ulint block1; /* the page number of the first
- doublewrite block (64 pages) */
- ulint block2; /* page number of the second block */
- ulint first_free; /* first free position in write_buf measured
- in units of UNIV_PAGE_SIZE */
- byte* write_buf; /* write buffer used in writing to the
- doublewrite buffer, aligned to an
- address divisible by UNIV_PAGE_SIZE
- (which is required by Windows aio) */
- byte* write_buf_unaligned; /* pointer to write_buf, but unaligned */
- buf_block_t**
- buf_block_arr; /* array to store pointers to the buffer
- blocks which have been cached to write_buf */
-};
-
-/* The transaction system central memory data structure; protected by the
-kernel mutex */
-struct trx_sys_struct{
- dulint max_trx_id; /* The smallest number not yet
- assigned as a transaction id or
- transaction number */
- UT_LIST_BASE_NODE_T(trx_t) trx_list;
- /* List of active and committed in
- memory transactions, sorted on trx id,
- biggest first */
- UT_LIST_BASE_NODE_T(trx_t) mysql_trx_list;
- /* List of transactions created
- for MySQL */
- UT_LIST_BASE_NODE_T(trx_rseg_t) rseg_list;
- /* List of rollback segment objects */
- trx_rseg_t* latest_rseg; /* Latest rollback segment in the
- round-robin assignment of rollback
- segments to transactions */
- trx_rseg_t* rseg_array[TRX_SYS_N_RSEGS];
- /* Pointer array to rollback segments;
- NULL if slot not in use */
- ulint rseg_history_len;/* Length of the TRX_RSEG_HISTORY
- list (update undo logs for committed
- transactions), protected by
- rseg->mutex */
- UT_LIST_BASE_NODE_T(read_view_t) view_list;
- /* List of read views sorted on trx no,
- biggest first */
-};
-
-/* When a trx id which is zero modulo this number (which must be a power of
-two) is assigned, the field TRX_SYS_TRX_ID_STORE on the transaction system
-page is updated */
-#define TRX_SYS_TRX_ID_WRITE_MARGIN 256
-
-#ifndef UNIV_NONINL
-#include "trx0sys.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/trx0sys.ic b/storage/innobase/include/trx0sys.ic
deleted file mode 100644
index 86b71df08d6..00000000000
--- a/storage/innobase/include/trx0sys.ic
+++ /dev/null
@@ -1,366 +0,0 @@
-/******************************************************
-Transaction system
-
-(c) 1996 Innobase Oy
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "srv0srv.h"
-#include "trx0trx.h"
-#include "data0type.h"
-
-/* The typedef for rseg slot in the file copy */
-typedef byte trx_sysf_rseg_t;
-
-/* Rollback segment specification slot offsets */
-/*-------------------------------------------------------------*/
-#define TRX_SYS_RSEG_SPACE 0 /* space where the the segment
- header is placed; starting with
- MySQL/InnoDB 5.1.7, this is
- UNIV_UNDEFINED if the slot is unused */
-#define TRX_SYS_RSEG_PAGE_NO 4 /* page number where the the segment
- header is placed; this is FIL_NULL
- if the slot is unused */
-/*-------------------------------------------------------------*/
-/* Size of a rollback segment specification slot */
-#define TRX_SYS_RSEG_SLOT_SIZE 8
-
-/*********************************************************************
-Writes the value of max_trx_id to the file based trx system header. */
-
-void
-trx_sys_flush_max_trx_id(void);
-/*==========================*/
-
-/*******************************************************************
-Checks if a page address is the trx sys header page. */
-UNIV_INLINE
-ibool
-trx_sys_hdr_page(
-/*=============*/
- /* out: TRUE if trx sys header page */
- ulint space, /* in: space */
- ulint page_no)/* in: page number */
-{
- if ((space == TRX_SYS_SPACE) && (page_no == TRX_SYS_PAGE_NO)) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*******************************************************************
-Gets the pointer in the nth slot of the rseg array. */
-UNIV_INLINE
-trx_rseg_t*
-trx_sys_get_nth_rseg(
-/*=================*/
- /* out: pointer to rseg object, NULL if slot
- not in use */
- trx_sys_t* sys, /* in: trx system */
- ulint n) /* in: index of slot */
-{
- ut_ad(mutex_own(&(kernel_mutex)));
- ut_ad(n < TRX_SYS_N_RSEGS);
-
- return(sys->rseg_array[n]);
-}
-
-/*******************************************************************
-Sets the pointer in the nth slot of the rseg array. */
-UNIV_INLINE
-void
-trx_sys_set_nth_rseg(
-/*=================*/
- trx_sys_t* sys, /* in: trx system */
- ulint n, /* in: index of slot */
- trx_rseg_t* rseg) /* in: pointer to rseg object, NULL if slot
- not in use */
-{
- ut_ad(n < TRX_SYS_N_RSEGS);
-
- sys->rseg_array[n] = rseg;
-}
-
-/**************************************************************************
-Gets a pointer to the transaction system header and x-latches its page. */
-UNIV_INLINE
-trx_sysf_t*
-trx_sysf_get(
-/*=========*/
- /* out: pointer to system header, page x-latched. */
- mtr_t* mtr) /* in: mtr */
-{
- trx_sysf_t* header;
-
- ut_ad(mtr);
-
- header = TRX_SYS + buf_page_get(TRX_SYS_SPACE, TRX_SYS_PAGE_NO,
- RW_X_LATCH, mtr);
-
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(header, SYNC_TRX_SYS_HEADER);
-#endif /* UNIV_SYNC_DEBUG */
-
- return(header);
-}
-
-/*********************************************************************
-Gets the space of the nth rollback segment slot in the trx system
-file copy. */
-UNIV_INLINE
-ulint
-trx_sysf_rseg_get_space(
-/*====================*/
- /* out: space id */
- trx_sysf_t* sys_header, /* in: trx sys header */
- ulint i, /* in: slot index == rseg id */
- mtr_t* mtr) /* in: mtr */
-{
- ut_ad(mutex_own(&(kernel_mutex)));
- ut_ad(sys_header);
- ut_ad(i < TRX_SYS_N_RSEGS);
-
- return(mtr_read_ulint(sys_header + TRX_SYS_RSEGS
- + i * TRX_SYS_RSEG_SLOT_SIZE
- + TRX_SYS_RSEG_SPACE, MLOG_4BYTES, mtr));
-}
-
-/*********************************************************************
-Gets the page number of the nth rollback segment slot in the trx system
-header. */
-UNIV_INLINE
-ulint
-trx_sysf_rseg_get_page_no(
-/*======================*/
- /* out: page number, FIL_NULL
- if slot unused */
- trx_sysf_t* sys_header, /* in: trx system header */
- ulint i, /* in: slot index == rseg id */
- mtr_t* mtr) /* in: mtr */
-{
- ut_ad(sys_header);
- ut_ad(mutex_own(&(kernel_mutex)));
- ut_ad(i < TRX_SYS_N_RSEGS);
-
- return(mtr_read_ulint(sys_header + TRX_SYS_RSEGS
- + i * TRX_SYS_RSEG_SLOT_SIZE
- + TRX_SYS_RSEG_PAGE_NO, MLOG_4BYTES, mtr));
-}
-
-/*********************************************************************
-Sets the space id of the nth rollback segment slot in the trx system
-file copy. */
-UNIV_INLINE
-void
-trx_sysf_rseg_set_space(
-/*====================*/
- trx_sysf_t* sys_header, /* in: trx sys file copy */
- ulint i, /* in: slot index == rseg id */
- ulint space, /* in: space id */
- mtr_t* mtr) /* in: mtr */
-{
- ut_ad(mutex_own(&(kernel_mutex)));
- ut_ad(sys_header);
- ut_ad(i < TRX_SYS_N_RSEGS);
-
- mlog_write_ulint(sys_header + TRX_SYS_RSEGS
- + i * TRX_SYS_RSEG_SLOT_SIZE
- + TRX_SYS_RSEG_SPACE,
- space,
- MLOG_4BYTES, mtr);
-}
-
-/*********************************************************************
-Sets the page number of the nth rollback segment slot in the trx system
-header. */
-UNIV_INLINE
-void
-trx_sysf_rseg_set_page_no(
-/*======================*/
- trx_sysf_t* sys_header, /* in: trx sys header */
- ulint i, /* in: slot index == rseg id */
- ulint page_no, /* in: page number, FIL_NULL if the
- slot is reset to unused */
- mtr_t* mtr) /* in: mtr */
-{
- ut_ad(mutex_own(&(kernel_mutex)));
- ut_ad(sys_header);
- ut_ad(i < TRX_SYS_N_RSEGS);
-
- mlog_write_ulint(sys_header + TRX_SYS_RSEGS
- + i * TRX_SYS_RSEG_SLOT_SIZE
- + TRX_SYS_RSEG_PAGE_NO,
- page_no,
- MLOG_4BYTES, mtr);
-}
-
-/*********************************************************************
-Writes a trx id to an index page. In case that the id size changes in
-some future version, this function should be used instead of
-mach_write_... */
-UNIV_INLINE
-void
-trx_write_trx_id(
-/*=============*/
- byte* ptr, /* in: pointer to memory where written */
- dulint id) /* in: id */
-{
- ut_ad(DATA_TRX_ID_LEN == 6);
-
- mach_write_to_6(ptr, id);
-}
-
-/*********************************************************************
-Reads a trx id from an index page. In case that the id size changes in
-some future version, this function should be used instead of
-mach_read_... */
-UNIV_INLINE
-dulint
-trx_read_trx_id(
-/*============*/
- /* out: id */
- byte* ptr) /* in: pointer to memory from where to read */
-{
- ut_ad(DATA_TRX_ID_LEN == 6);
-
- return(mach_read_from_6(ptr));
-}
-
-/********************************************************************
-Looks for the trx handle with the given id in trx_list. */
-UNIV_INLINE
-trx_t*
-trx_get_on_id(
-/*==========*/
- /* out: the trx handle or NULL if not found */
- dulint trx_id) /* in: trx id to search for */
-{
- trx_t* trx;
-
- ut_ad(mutex_own(&(kernel_mutex)));
-
- trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
-
- while (trx != NULL) {
- if (0 == ut_dulint_cmp(trx_id, trx->id)) {
-
- return(trx);
- }
-
- trx = UT_LIST_GET_NEXT(trx_list, trx);
- }
-
- return(NULL);
-}
-
-/********************************************************************
-Returns the minumum trx id in trx list. This is the smallest id for which
-the trx can possibly be active. (But, you must look at the trx->conc_state to
-find out if the minimum trx id transaction itself is active, or already
-committed.) */
-UNIV_INLINE
-dulint
-trx_list_get_min_trx_id(void)
-/*=========================*/
- /* out: the minimum trx id, or trx_sys->max_trx_id
- if the trx list is empty */
-{
- trx_t* trx;
-
- ut_ad(mutex_own(&(kernel_mutex)));
-
- trx = UT_LIST_GET_LAST(trx_sys->trx_list);
-
- if (trx == NULL) {
-
- return(trx_sys->max_trx_id);
- }
-
- return(trx->id);
-}
-
-/********************************************************************
-Checks if a transaction with the given id is active. */
-UNIV_INLINE
-ibool
-trx_is_active(
-/*==========*/
- /* out: TRUE if active */
- dulint trx_id) /* in: trx id of the transaction */
-{
- trx_t* trx;
-
- ut_ad(mutex_own(&(kernel_mutex)));
-
- if (ut_dulint_cmp(trx_id, trx_list_get_min_trx_id()) < 0) {
-
- return(FALSE);
- }
-
- if (ut_dulint_cmp(trx_id, trx_sys->max_trx_id) >= 0) {
-
- /* There must be corruption: we return TRUE because this
- function is only called by lock_clust_rec_some_has_impl()
- and row_vers_impl_x_locked_off_kernel() and they have
- diagnostic prints in this case */
-
- return(TRUE);
- }
-
- trx = trx_get_on_id(trx_id);
- if (trx && (trx->conc_state == TRX_ACTIVE
- || trx->conc_state == TRX_PREPARED)) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*********************************************************************
-Allocates a new transaction id. */
-UNIV_INLINE
-dulint
-trx_sys_get_new_trx_id(void)
-/*========================*/
- /* out: new, allocated trx id */
-{
- dulint id;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- /* VERY important: after the database is started, max_trx_id value is
- divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the following if
- will evaluate to TRUE when this function is first time called,
- and the value for trx id will be written to disk-based header!
- Thus trx id values will not overlap when the database is
- repeatedly started! */
-
- if (ut_dulint_get_low(trx_sys->max_trx_id)
- % TRX_SYS_TRX_ID_WRITE_MARGIN == 0) {
-
- trx_sys_flush_max_trx_id();
- }
-
- id = trx_sys->max_trx_id;
-
- UT_DULINT_INC(trx_sys->max_trx_id);
-
- return(id);
-}
-
-/*********************************************************************
-Allocates a new transaction number. */
-UNIV_INLINE
-dulint
-trx_sys_get_new_trx_no(void)
-/*========================*/
- /* out: new, allocated trx number */
-{
- ut_ad(mutex_own(&kernel_mutex));
-
- return(trx_sys_get_new_trx_id());
-}
diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h
deleted file mode 100644
index f0833bc6f21..00000000000
--- a/storage/innobase/include/trx0trx.h
+++ /dev/null
@@ -1,713 +0,0 @@
-/******************************************************
-The transaction
-
-(c) 1996 Innobase Oy
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef trx0trx_h
-#define trx0trx_h
-
-#include "univ.i"
-#include "trx0types.h"
-#include "lock0types.h"
-#include "usr0types.h"
-#include "que0types.h"
-#include "mem0mem.h"
-#include "read0types.h"
-#include "dict0types.h"
-#include "trx0xa.h"
-
-extern ulint trx_n_mysql_transactions;
-
-/************************************************************************
-Releases the search latch if trx has reserved it. */
-
-void
-trx_search_latch_release_if_reserved(
-/*=================================*/
- trx_t* trx); /* in: transaction */
-/**********************************************************************
-Set detailed error message for the transaction. */
-void
-trx_set_detailed_error(
-/*===================*/
- trx_t* trx, /* in: transaction struct */
- const char* msg); /* in: detailed error message */
-/*****************************************************************
-Set detailed error message for the transaction from a file. Note that the
-file is rewinded before reading from it. */
-
-void
-trx_set_detailed_error_from_file(
-/*=============================*/
- trx_t* trx, /* in: transaction struct */
- FILE* file); /* in: file to read message from */
-/********************************************************************
-Retrieves the error_info field from a trx. */
-
-void*
-trx_get_error_info(
-/*===============*/
- /* out: the error info */
- trx_t* trx); /* in: trx object */
-/********************************************************************
-Creates and initializes a transaction object. */
-
-trx_t*
-trx_create(
-/*=======*/
- /* out, own: the transaction */
- sess_t* sess); /* in: session or NULL */
-/************************************************************************
-Creates a transaction object for MySQL. */
-
-trx_t*
-trx_allocate_for_mysql(void);
-/*========================*/
- /* out, own: transaction object */
-/************************************************************************
-Creates a transaction object for background operations by the master thread. */
-
-trx_t*
-trx_allocate_for_background(void);
-/*=============================*/
- /* out, own: transaction object */
-/************************************************************************
-Frees a transaction object. */
-
-void
-trx_free(
-/*=====*/
- trx_t* trx); /* in, own: trx object */
-/************************************************************************
-Frees a transaction object for MySQL. */
-
-void
-trx_free_for_mysql(
-/*===============*/
- trx_t* trx); /* in, own: trx object */
-/************************************************************************
-Frees a transaction object of a background operation of the master thread. */
-
-void
-trx_free_for_background(
-/*====================*/
- trx_t* trx); /* in, own: trx object */
-/********************************************************************
-Creates trx objects for transactions and initializes the trx list of
-trx_sys at database start. Rollback segment and undo log lists must
-already exist when this function is called, because the lists of
-transactions to be rolled back or cleaned up are built based on the
-undo log lists. */
-
-void
-trx_lists_init_at_db_start(void);
-/*============================*/
-/********************************************************************
-Starts a new transaction. */
-
-ibool
-trx_start(
-/*======*/
- /* out: TRUE if success, FALSE if the rollback
- segment could not support this many transactions */
- trx_t* trx, /* in: transaction */
- ulint rseg_id);/* in: rollback segment id; if ULINT_UNDEFINED
- is passed, the system chooses the rollback segment
- automatically in a round-robin fashion */
-/********************************************************************
-Starts a new transaction. */
-
-ibool
-trx_start_low(
-/*==========*/
- /* out: TRUE */
- trx_t* trx, /* in: transaction */
- ulint rseg_id);/* in: rollback segment id; if ULINT_UNDEFINED
- is passed, the system chooses the rollback segment
- automatically in a round-robin fashion */
-/*****************************************************************
-Starts the transaction if it is not yet started. */
-UNIV_INLINE
-void
-trx_start_if_not_started(
-/*=====================*/
- trx_t* trx); /* in: transaction */
-/*****************************************************************
-Starts the transaction if it is not yet started. Assumes we have reserved
-the kernel mutex! */
-UNIV_INLINE
-void
-trx_start_if_not_started_low(
-/*=========================*/
- trx_t* trx); /* in: transaction */
-/*****************************************************************
-Starts the transaction if it is not yet started. */
-
-void
-trx_start_if_not_started_noninline(
-/*===============================*/
- trx_t* trx); /* in: transaction */
-/********************************************************************
-Commits a transaction. */
-
-void
-trx_commit_off_kernel(
-/*==================*/
- trx_t* trx); /* in: transaction */
-/********************************************************************
-Cleans up a transaction at database startup. The cleanup is needed if
-the transaction already got to the middle of a commit when the database
-crashed, andf we cannot roll it back. */
-
-void
-trx_cleanup_at_db_startup(
-/*======================*/
- trx_t* trx); /* in: transaction */
-/**************************************************************************
-Does the transaction commit for MySQL. */
-
-ulint
-trx_commit_for_mysql(
-/*=================*/
- /* out: 0 or error number */
- trx_t* trx); /* in: trx handle */
-/**************************************************************************
-Does the transaction prepare for MySQL. */
-
-ulint
-trx_prepare_for_mysql(
-/*==================*/
- /* out: 0 or error number */
- trx_t* trx); /* in: trx handle */
-/**************************************************************************
-This function is used to find number of prepared transactions and
-their transaction objects for a recovery. */
-
-int
-trx_recover_for_mysql(
-/*==================*/
- /* out: number of prepared transactions */
- XID* xid_list, /* in/out: prepared transactions */
- ulint len); /* in: number of slots in xid_list */
-/***********************************************************************
-This function is used to find one X/Open XA distributed transaction
-which is in the prepared state */
-trx_t *
-trx_get_trx_by_xid(
-/*===============*/
- /* out: trx or NULL */
- XID* xid); /* in: X/Open XA transaction identification */
-/**************************************************************************
-If required, flushes the log to disk if we called trx_commit_for_mysql()
-with trx->flush_log_later == TRUE. */
-
-ulint
-trx_commit_complete_for_mysql(
-/*==========================*/
- /* out: 0 or error number */
- trx_t* trx); /* in: trx handle */
-/**************************************************************************
-Marks the latest SQL statement ended. */
-
-void
-trx_mark_sql_stat_end(
-/*==================*/
- trx_t* trx); /* in: trx handle */
-/************************************************************************
-Assigns a read view for a consistent read query. All the consistent reads
-within the same transaction will get the same read view, which is created
-when this function is first called for a new started transaction. */
-
-read_view_t*
-trx_assign_read_view(
-/*=================*/
- /* out: consistent read view */
- trx_t* trx); /* in: active transaction */
-/***************************************************************
-The transaction must be in the TRX_QUE_LOCK_WAIT state. Puts it to
-the TRX_QUE_RUNNING state and releases query threads which were
-waiting for a lock in the wait_thrs list. */
-
-void
-trx_end_lock_wait(
-/*==============*/
- trx_t* trx); /* in: transaction */
-/********************************************************************
-Sends a signal to a trx object. */
-
-void
-trx_sig_send(
-/*=========*/
- trx_t* trx, /* in: trx handle */
- ulint type, /* in: signal type */
- ulint sender, /* in: TRX_SIG_SELF or
- TRX_SIG_OTHER_SESS */
- que_thr_t* receiver_thr, /* in: query thread which wants the
- reply, or NULL; if type is
- TRX_SIG_END_WAIT, this must be NULL */
- trx_savept_t* savept, /* in: possible rollback savepoint, or
- NULL */
- que_thr_t** next_thr); /* in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread; if the parameter
- is NULL, it is ignored */
-/********************************************************************
-Send the reply message when a signal in the queue of the trx has
-been handled. */
-
-void
-trx_sig_reply(
-/*==========*/
- trx_sig_t* sig, /* in: signal */
- que_thr_t** next_thr); /* in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread */
-/********************************************************************
-Removes the signal object from a trx signal queue. */
-
-void
-trx_sig_remove(
-/*===========*/
- trx_t* trx, /* in: trx handle */
- trx_sig_t* sig); /* in, own: signal */
-/********************************************************************
-Starts handling of a trx signal. */
-
-void
-trx_sig_start_handle(
-/*=================*/
- trx_t* trx, /* in: trx handle */
- que_thr_t** next_thr); /* in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread */
-/********************************************************************
-Ends signal handling. If the session is in the error state, and
-trx->graph_before_signal_handling != NULL, returns control to the error
-handling routine of the graph (currently only returns the control to the
-graph root which then sends an error message to the client). */
-
-void
-trx_end_signal_handling(
-/*====================*/
- trx_t* trx); /* in: trx */
-/*************************************************************************
-Creates a commit command node struct. */
-
-commit_node_t*
-commit_node_create(
-/*===============*/
- /* out, own: commit node struct */
- mem_heap_t* heap); /* in: mem heap where created */
-/***************************************************************
-Performs an execution step for a commit type node in a query graph. */
-
-que_thr_t*
-trx_commit_step(
-/*============*/
- /* out: query thread to run next, or NULL */
- que_thr_t* thr); /* in: query thread */
-
-/**************************************************************************
-Prints info about a transaction to the given file. The caller must own the
-kernel mutex and must have called
-innobase_mysql_prepare_print_arbitrary_thd(), unless he knows that MySQL
-or InnoDB cannot meanwhile change the info printed here. */
-
-void
-trx_print(
-/*======*/
- FILE* f, /* in: output stream */
- trx_t* trx, /* in: transaction */
- ulint max_query_len); /* in: max query length to print, or 0 to
- use the default max length */
-
-#ifndef UNIV_HOTBACKUP
-/**************************************************************************
-Determines if the currently running transaction has been interrupted. */
-
-ibool
-trx_is_interrupted(
-/*===============*/
- /* out: TRUE if interrupted */
- trx_t* trx); /* in: transaction */
-#else /* !UNIV_HOTBACKUP */
-#define trx_is_interrupted(trx) FALSE
-#endif /* !UNIV_HOTBACKUP */
-
-/***********************************************************************
-Compares the "weight" (or size) of two transactions. The weight of one
-transaction is estimated as the number of altered rows + the number of
-locked rows. Transactions that have edited non-transactional tables are
-considered heavier than ones that have not. */
-
-int
-trx_weight_cmp(
-/*===========*/
- /* out: <0, 0 or >0; similar to strcmp(3) */
- trx_t* a, /* in: the first transaction to be compared */
- trx_t* b); /* in: the second transaction to be compared */
-
-/* Signal to a transaction */
-struct trx_sig_struct{
- ulint type; /* signal type */
- ulint sender; /* TRX_SIG_SELF or
- TRX_SIG_OTHER_SESS */
- que_thr_t* receiver; /* non-NULL if the sender of the signal
- wants reply after the operation induced
- by the signal is completed */
- trx_savept_t savept; /* possible rollback savepoint */
- UT_LIST_NODE_T(trx_sig_t)
- signals; /* queue of pending signals to the
- transaction */
- UT_LIST_NODE_T(trx_sig_t)
- reply_signals; /* list of signals for which the sender
- transaction is waiting a reply */
-};
-
-#define TRX_MAGIC_N 91118598
-
-/* The transaction handle; every session has a trx object which is freed only
-when the session is freed; in addition there may be session-less transactions
-rolling back after a database recovery */
-
-struct trx_struct{
- ulint magic_n;
- /* All the next fields are protected by the kernel mutex, except the
- undo logs which are protected by undo_mutex */
- const char* op_info; /* English text describing the
- current operation, or an empty
- string */
- unsigned is_purge:1; /* 0=user transaction, 1=purge */
- ulint conc_state; /* state of the trx from the point
- of view of concurrency control:
- TRX_ACTIVE, TRX_COMMITTED_IN_MEMORY,
- ... */
- time_t start_time; /* time the trx object was created
- or the state last time became
- TRX_ACTIVE */
- ulint isolation_level;/* TRX_ISO_REPEATABLE_READ, ... */
- ibool check_foreigns; /* normally TRUE, but if the user
- wants to suppress foreign key checks,
- (in table imports, for example) we
- set this FALSE */
- ibool check_unique_secondary;
- /* normally TRUE, but if the user
- wants to speed up inserts by
- suppressing unique key checks
- for secondary indexes when we decide
- if we can use the insert buffer for
- them, we set this FALSE */
- dulint id; /* transaction id */
- XID xid; /* X/Open XA transaction
- identification to identify a
- transaction branch */
- ibool support_xa; /* normally we do the XA two-phase
- commit steps, but by setting this to
- FALSE, one can save CPU time and about
- 150 bytes in the undo log size as then
- we skip XA steps */
- dulint no; /* transaction serialization number ==
- max trx id when the transaction is
- moved to COMMITTED_IN_MEMORY state */
- ibool flush_log_later;/* when we commit the transaction
- in MySQL's binlog write, we will
- flush the log to disk later in
- a separate call */
- ibool must_flush_log_later;/* this flag is set to TRUE in
- trx_commit_off_kernel() if
- flush_log_later was TRUE, and there
- were modifications by the transaction;
- in that case we must flush the log
- in trx_commit_complete_for_mysql() */
- dulint commit_lsn; /* lsn at the time of the commit */
- ibool dict_operation; /* TRUE if the trx is used to create
- a table, create an index, or drop a
- table. This is a hint that the table
- may need to be dropped in crash
- recovery. */
- dulint table_id; /* table id if the preceding field is
- TRUE */
- /*------------------------------*/
- unsigned duplicates:2; /* TRX_DUP_IGNORE | TRX_DUP_REPLACE */
- unsigned active_trans:2; /* 1 - if a transaction in MySQL
- is active. 2 - if prepare_commit_mutex
- was taken */
- void* mysql_thd; /* MySQL thread handle corresponding
- to this trx, or NULL */
- char** mysql_query_str;/* pointer to the field in mysqld_thd
- which contains the pointer to the
- current SQL query string */
- const char* mysql_log_file_name;
- /* if MySQL binlog is used, this field
- contains a pointer to the latest file
- name; this is NULL if binlog is not
- used */
- ib_longlong mysql_log_offset;/* if MySQL binlog is used, this field
- contains the end offset of the binlog
- entry */
- os_thread_id_t mysql_thread_id;/* id of the MySQL thread associated
- with this transaction object */
- ulint mysql_process_no;/* since in Linux, 'top' reports
- process id's and not thread id's, we
- store the process number too */
- /*------------------------------*/
- ulint n_mysql_tables_in_use; /* number of Innobase tables
- used in the processing of the current
- SQL statement in MySQL */
- ulint mysql_n_tables_locked;
- /* how many tables the current SQL
- statement uses, except those
- in consistent read */
- ibool dict_operation_lock_mode;
- /* 0, RW_S_LATCH, or RW_X_LATCH:
- the latch mode trx currently holds
- on dict_operation_lock */
- ibool has_search_latch;
- /* TRUE if this trx has latched the
- search system latch in S-mode */
- ulint search_latch_timeout;
- /* If we notice that someone is
- waiting for our S-lock on the search
- latch to be released, we wait in
- row0sel.c for BTR_SEA_TIMEOUT new
- searches until we try to keep
- the search latch again over
- calls from MySQL; this is intended
- to reduce contention on the search
- latch */
- /*------------------------------*/
- ibool declared_to_be_inside_innodb;
- /* this is TRUE if we have declared
- this transaction in
- srv_conc_enter_innodb to be inside the
- InnoDB engine */
- ulint n_tickets_to_enter_innodb;
- /* this can be > 0 only when
- declared_to_... is TRUE; when we come
- to srv_conc_innodb_enter, if the value
- here is > 0, we decrement this by 1 */
- /*------------------------------*/
- lock_t* auto_inc_lock; /* possible auto-inc lock reserved by
- the transaction; note that it is also
- in the lock list trx_locks */
- UT_LIST_NODE_T(trx_t)
- trx_list; /* list of transactions */
- UT_LIST_NODE_T(trx_t)
- mysql_trx_list; /* list of transactions created for
- MySQL */
- /*------------------------------*/
- ulint error_state; /* 0 if no error, otherwise error
- number; NOTE That ONLY the thread
- doing the transaction is allowed to
- set this field: this is NOT protected
- by the kernel mutex */
- void* error_info; /* if the error number indicates a
- duplicate key error, a pointer to
- the problematic index is stored here */
- sess_t* sess; /* session of the trx, NULL if none */
- ulint que_state; /* TRX_QUE_RUNNING, TRX_QUE_LOCK_WAIT,
- ... */
- que_t* graph; /* query currently run in the session,
- or NULL if none; NOTE that the query
- belongs to the session, and it can
- survive over a transaction commit, if
- it is a stored procedure with a COMMIT
- WORK statement, for instance */
- ulint n_active_thrs; /* number of active query threads */
- ibool handling_signals;/* this is TRUE as long as the trx
- is handling signals */
- que_t* graph_before_signal_handling;
- /* value of graph when signal handling
- for this trx started: this is used to
- return control to the original query
- graph for error processing */
- trx_sig_t sig; /* one signal object can be allocated
- in this space, avoiding mem_alloc */
- UT_LIST_BASE_NODE_T(trx_sig_t)
- signals; /* queue of processed or pending
- signals to the trx */
- UT_LIST_BASE_NODE_T(trx_sig_t)
- reply_signals; /* list of signals sent by the query
- threads of this trx for which a thread
- is waiting for a reply; if this trx is
- killed, the reply requests in the list
- must be canceled */
- /*------------------------------*/
- lock_t* wait_lock; /* if trx execution state is
- TRX_QUE_LOCK_WAIT, this points to
- the lock request, otherwise this is
- NULL */
- ibool was_chosen_as_deadlock_victim;
- /* when the transaction decides to wait
- for a lock, it sets this to FALSE;
- if another transaction chooses this
- transaction as a victim in deadlock
- resolution, it sets this to TRUE */
- time_t wait_started; /* lock wait started at this time */
- UT_LIST_BASE_NODE_T(que_thr_t)
- wait_thrs; /* query threads belonging to this
- trx that are in the QUE_THR_LOCK_WAIT
- state */
- ulint deadlock_mark; /* a mark field used in deadlock
- checking algorithm */
- /*------------------------------*/
- mem_heap_t* lock_heap; /* memory heap for the locks of the
- transaction */
- UT_LIST_BASE_NODE_T(lock_t)
- trx_locks; /* locks reserved by the transaction */
- /*------------------------------*/
- mem_heap_t* global_read_view_heap;
- /* memory heap for the global read
- view */
- read_view_t* global_read_view;
- /* consistent read view associated
- to a transaction or NULL */
- read_view_t* read_view; /* consistent read view used in the
- transaction or NULL, this read view
- if defined can be normal read view
- associated to a transaction (i.e.
- same as global_read_view) or read view
- associated to a cursor */
- /*------------------------------*/
- UT_LIST_BASE_NODE_T(trx_named_savept_t)
- trx_savepoints; /* savepoints set with SAVEPOINT ...,
- oldest first */
- /*------------------------------*/
- mutex_t undo_mutex; /* mutex protecting the fields in this
- section (down to undo_no_arr), EXCEPT
- last_sql_stat_start, which can be
- accessed only when we know that there
- cannot be any activity in the undo
- logs! */
- dulint undo_no; /* next undo log record number to
- assign; since the undo log is
- private for a transaction, this
- is a simple ascending sequence
- with no gaps; thus it represents
- the number of modified/inserted
- rows in a transaction */
- trx_savept_t last_sql_stat_start;
- /* undo_no when the last sql statement
- was started: in case of an error, trx
- is rolled back down to this undo
- number; see note at undo_mutex! */
- trx_rseg_t* rseg; /* rollback segment assigned to the
- transaction, or NULL if not assigned
- yet */
- trx_undo_t* insert_undo; /* pointer to the insert undo log, or
- NULL if no inserts performed yet */
- trx_undo_t* update_undo; /* pointer to the update undo log, or
- NULL if no update performed yet */
- dulint roll_limit; /* least undo number to undo during
- a rollback */
- ulint pages_undone; /* number of undo log pages undone
- since the last undo log truncation */
- trx_undo_arr_t* undo_no_arr; /* array of undo numbers of undo log
- records which are currently processed
- by a rollback operation */
- ulint n_autoinc_rows; /* no. of AUTO-INC rows required for
- an SQL statement. This is useful for
- multi-row INSERTs */
- /*------------------------------*/
- char detailed_error[256]; /* detailed error message for last
- error, or empty. */
-};
-
-#define TRX_MAX_N_THREADS 32 /* maximum number of
- concurrent threads running a
- single operation of a
- transaction, e.g., a parallel
- query */
-/* Transaction concurrency states (trx->conc_state) */
-#define TRX_NOT_STARTED 1
-#define TRX_ACTIVE 2
-#define TRX_COMMITTED_IN_MEMORY 3
-#define TRX_PREPARED 4 /* Support for 2PC/XA */
-
-/* Transaction execution states when trx->conc_state == TRX_ACTIVE */
-#define TRX_QUE_RUNNING 1 /* transaction is running */
-#define TRX_QUE_LOCK_WAIT 2 /* transaction is waiting for a lock */
-#define TRX_QUE_ROLLING_BACK 3 /* transaction is rolling back */
-#define TRX_QUE_COMMITTING 4 /* transaction is committing */
-
-/* Transaction isolation levels (trx->isolation_level) */
-#define TRX_ISO_READ_UNCOMMITTED 1 /* dirty read: non-locking
- SELECTs are performed so that
- we do not look at a possible
- earlier version of a record;
- thus they are not 'consistent'
- reads under this isolation
- level; otherwise like level
- 2 */
-
-#define TRX_ISO_READ_COMMITTED 2 /* somewhat Oracle-like
- isolation, except that in
- range UPDATE and DELETE we
- must block phantom rows
- with next-key locks;
- SELECT ... FOR UPDATE and ...
- LOCK IN SHARE MODE only lock
- the index records, NOT the
- gaps before them, and thus
- allow free inserting;
- each consistent read reads its
- own snapshot */
-
-#define TRX_ISO_REPEATABLE_READ 3 /* this is the default;
- all consistent reads in the
- same trx read the same
- snapshot;
- full next-key locking used
- in locking reads to block
- insertions into gaps */
-
-#define TRX_ISO_SERIALIZABLE 4 /* all plain SELECTs are
- converted to LOCK IN SHARE
- MODE reads */
-
-/* Treatment of duplicate values (trx->duplicates; for example, in inserts).
-Multiple flags can be combined with bitwise OR. */
-#define TRX_DUP_IGNORE 1 /* duplicate rows are to be updated */
-#define TRX_DUP_REPLACE 2 /* duplicate rows are to be replaced */
-
-
-/* Types of a trx signal */
-#define TRX_SIG_NO_SIGNAL 100
-#define TRX_SIG_TOTAL_ROLLBACK 1
-#define TRX_SIG_ROLLBACK_TO_SAVEPT 2
-#define TRX_SIG_COMMIT 3
-#define TRX_SIG_ERROR_OCCURRED 4
-#define TRX_SIG_BREAK_EXECUTION 5
-
-/* Sender types of a signal */
-#define TRX_SIG_SELF 1 /* sent by the session itself, or
- by an error occurring within this
- session */
-#define TRX_SIG_OTHER_SESS 2 /* sent by another session (which
- must hold rights to this) */
-
-/* Commit command node in a query graph */
-struct commit_node_struct{
- que_common_t common; /* node type: QUE_NODE_COMMIT */
- ulint state; /* node execution state */
-};
-
-/* Commit node states */
-#define COMMIT_NODE_SEND 1
-#define COMMIT_NODE_WAIT 2
-
-
-#ifndef UNIV_NONINL
-#include "trx0trx.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/trx0trx.ic b/storage/innobase/include/trx0trx.ic
deleted file mode 100644
index 09b2f822ff7..00000000000
--- a/storage/innobase/include/trx0trx.ic
+++ /dev/null
@@ -1,40 +0,0 @@
-/******************************************************
-The transaction
-
-(c) 1996 Innobase Oy
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-/*****************************************************************
-Starts the transaction if it is not yet started. */
-UNIV_INLINE
-void
-trx_start_if_not_started(
-/*=====================*/
- trx_t* trx) /* in: transaction */
-{
- ut_ad(trx->conc_state != TRX_COMMITTED_IN_MEMORY);
-
- if (trx->conc_state == TRX_NOT_STARTED) {
-
- trx_start(trx, ULINT_UNDEFINED);
- }
-}
-
-/*****************************************************************
-Starts the transaction if it is not yet started. Assumes we have reserved
-the kernel mutex! */
-UNIV_INLINE
-void
-trx_start_if_not_started_low(
-/*=========================*/
- trx_t* trx) /* in: transaction */
-{
- ut_ad(trx->conc_state != TRX_COMMITTED_IN_MEMORY);
-
- if (trx->conc_state == TRX_NOT_STARTED) {
-
- trx_start_low(trx, ULINT_UNDEFINED);
- }
-}
diff --git a/storage/innobase/include/trx0types.h b/storage/innobase/include/trx0types.h
deleted file mode 100644
index 0e6ee79498c..00000000000
--- a/storage/innobase/include/trx0types.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/******************************************************
-Transaction system global type definitions
-
-(c) 1996 Innobase Oy
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef trx0types_h
-#define trx0types_h
-
-#include "lock0types.h"
-#include "ut0byte.h"
-
-/* Memory objects */
-typedef struct trx_struct trx_t;
-typedef struct trx_sys_struct trx_sys_t;
-typedef struct trx_doublewrite_struct trx_doublewrite_t;
-typedef struct trx_sig_struct trx_sig_t;
-typedef struct trx_rseg_struct trx_rseg_t;
-typedef struct trx_undo_struct trx_undo_t;
-typedef struct trx_undo_arr_struct trx_undo_arr_t;
-typedef struct trx_undo_inf_struct trx_undo_inf_t;
-typedef struct trx_purge_struct trx_purge_t;
-typedef struct roll_node_struct roll_node_t;
-typedef struct commit_node_struct commit_node_t;
-typedef struct trx_named_savept_struct trx_named_savept_t;
-
-/* Transaction savepoint */
-typedef struct trx_savept_struct trx_savept_t;
-struct trx_savept_struct{
- dulint least_undo_no; /* least undo number to undo */
-};
-
-/* File objects */
-typedef byte trx_sysf_t;
-typedef byte trx_rsegf_t;
-typedef byte trx_usegf_t;
-typedef byte trx_ulogf_t;
-typedef byte trx_upagef_t;
-
-/* Undo log record */
-typedef byte trx_undo_rec_t;
-
-#endif
diff --git a/storage/innobase/include/trx0undo.h b/storage/innobase/include/trx0undo.h
deleted file mode 100644
index 7f10e407746..00000000000
--- a/storage/innobase/include/trx0undo.h
+++ /dev/null
@@ -1,503 +0,0 @@
-/******************************************************
-Transaction undo log
-
-(c) 1996 Innobase Oy
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef trx0undo_h
-#define trx0undo_h
-
-#include "univ.i"
-#include "trx0types.h"
-#include "mtr0mtr.h"
-#include "trx0sys.h"
-#include "page0types.h"
-#include "trx0xa.h"
-
-/***************************************************************************
-Builds a roll pointer dulint. */
-UNIV_INLINE
-dulint
-trx_undo_build_roll_ptr(
-/*====================*/
- /* out: roll pointer */
- ibool is_insert, /* in: TRUE if insert undo log */
- ulint rseg_id, /* in: rollback segment id */
- ulint page_no, /* in: page number */
- ulint offset); /* in: offset of the undo entry within page */
-/***************************************************************************
-Decodes a roll pointer dulint. */
-UNIV_INLINE
-void
-trx_undo_decode_roll_ptr(
-/*=====================*/
- dulint roll_ptr, /* in: roll pointer */
- ibool* is_insert, /* out: TRUE if insert undo log */
- ulint* rseg_id, /* out: rollback segment id */
- ulint* page_no, /* out: page number */
- ulint* offset); /* out: offset of the undo entry within page */
-/***************************************************************************
-Returns TRUE if the roll pointer is of the insert type. */
-UNIV_INLINE
-ibool
-trx_undo_roll_ptr_is_insert(
-/*========================*/
- /* out: TRUE if insert undo log */
- dulint roll_ptr); /* in: roll pointer */
-/*********************************************************************
-Writes a roll ptr to an index page. In case that the size changes in
-some future version, this function should be used instead of
-mach_write_... */
-UNIV_INLINE
-void
-trx_write_roll_ptr(
-/*===============*/
- byte* ptr, /* in: pointer to memory where written */
- dulint roll_ptr); /* in: roll ptr */
-/*********************************************************************
-Reads a roll ptr from an index page. In case that the roll ptr size
-changes in some future version, this function should be used instead of
-mach_read_... */
-UNIV_INLINE
-dulint
-trx_read_roll_ptr(
-/*==============*/
- /* out: roll ptr */
- byte* ptr); /* in: pointer to memory from where to read */
-/**********************************************************************
-Gets an undo log page and x-latches it. */
-UNIV_INLINE
-page_t*
-trx_undo_page_get(
-/*==============*/
- /* out: pointer to page x-latched */
- ulint space, /* in: space where placed */
- ulint page_no, /* in: page number */
- mtr_t* mtr); /* in: mtr */
-/**********************************************************************
-Gets an undo log page and s-latches it. */
-UNIV_INLINE
-page_t*
-trx_undo_page_get_s_latched(
-/*========================*/
- /* out: pointer to page s-latched */
- ulint space, /* in: space where placed */
- ulint page_no, /* in: page number */
- mtr_t* mtr); /* in: mtr */
-/**********************************************************************
-Returns the previous undo record on the page in the specified log, or
-NULL if none exists. */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_prev_rec(
-/*=======================*/
- /* out: pointer to record, NULL if none */
- trx_undo_rec_t* rec, /* in: undo log record */
- ulint page_no,/* in: undo log header page number */
- ulint offset);/* in: undo log header offset on page */
-/**********************************************************************
-Returns the next undo log record on the page in the specified log, or
-NULL if none exists. */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_next_rec(
-/*=======================*/
- /* out: pointer to record, NULL if none */
- trx_undo_rec_t* rec, /* in: undo log record */
- ulint page_no,/* in: undo log header page number */
- ulint offset);/* in: undo log header offset on page */
-/**********************************************************************
-Returns the last undo record on the page in the specified undo log, or
-NULL if none exists. */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_last_rec(
-/*=======================*/
- /* out: pointer to record, NULL if none */
- page_t* undo_page,/* in: undo log page */
- ulint page_no,/* in: undo log header page number */
- ulint offset); /* in: undo log header offset on page */
-/**********************************************************************
-Returns the first undo record on the page in the specified undo log, or
-NULL if none exists. */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_first_rec(
-/*========================*/
- /* out: pointer to record, NULL if none */
- page_t* undo_page,/* in: undo log page */
- ulint page_no,/* in: undo log header page number */
- ulint offset);/* in: undo log header offset on page */
-/***************************************************************************
-Gets the previous record in an undo log. */
-
-trx_undo_rec_t*
-trx_undo_get_prev_rec(
-/*==================*/
- /* out: undo log record, the page s-latched,
- NULL if none */
- trx_undo_rec_t* rec, /* in: undo record */
- ulint page_no,/* in: undo log header page number */
- ulint offset, /* in: undo log header offset on page */
- mtr_t* mtr); /* in: mtr */
-/***************************************************************************
-Gets the next record in an undo log. */
-
-trx_undo_rec_t*
-trx_undo_get_next_rec(
-/*==================*/
- /* out: undo log record, the page s-latched,
- NULL if none */
- trx_undo_rec_t* rec, /* in: undo record */
- ulint page_no,/* in: undo log header page number */
- ulint offset, /* in: undo log header offset on page */
- mtr_t* mtr); /* in: mtr */
-/***************************************************************************
-Gets the first record in an undo log. */
-
-trx_undo_rec_t*
-trx_undo_get_first_rec(
-/*===================*/
- /* out: undo log record, the page latched, NULL if
- none */
- ulint space, /* in: undo log header space */
- ulint page_no,/* in: undo log header page number */
- ulint offset, /* in: undo log header offset on page */
- ulint mode, /* in: latching mode: RW_S_LATCH or RW_X_LATCH */
- mtr_t* mtr); /* in: mtr */
-/************************************************************************
-Tries to add a page to the undo log segment where the undo log is placed. */
-
-ulint
-trx_undo_add_page(
-/*==============*/
- /* out: page number if success, else
- FIL_NULL */
- trx_t* trx, /* in: transaction */
- trx_undo_t* undo, /* in: undo log memory object */
- mtr_t* mtr); /* in: mtr which does not have a latch to any
- undo log page; the caller must have reserved
- the rollback segment mutex */
-/***************************************************************************
-Truncates an undo log from the end. This function is used during a rollback
-to free space from an undo log. */
-
-void
-trx_undo_truncate_end(
-/*==================*/
- trx_t* trx, /* in: transaction whose undo log it is */
- trx_undo_t* undo, /* in: undo log */
- dulint limit); /* in: all undo records with undo number
- >= this value should be truncated */
-/***************************************************************************
-Truncates an undo log from the start. This function is used during a purge
-operation. */
-
-void
-trx_undo_truncate_start(
-/*====================*/
- trx_rseg_t* rseg, /* in: rollback segment */
- ulint space, /* in: space id of the log */
- ulint hdr_page_no, /* in: header page number */
- ulint hdr_offset, /* in: header offset on the page */
- dulint limit); /* in: all undo pages with undo numbers <
- this value should be truncated; NOTE that
- the function only frees whole pages; the
- header page is not freed, but emptied, if
- all the records there are < limit */
-/************************************************************************
-Initializes the undo log lists for a rollback segment memory copy.
-This function is only called when the database is started or a new
-rollback segment created. */
-
-ulint
-trx_undo_lists_init(
-/*================*/
- /* out: the combined size of undo log segments
- in pages */
- trx_rseg_t* rseg); /* in: rollback segment memory object */
-/**************************************************************************
-Assigns an undo log for a transaction. A new undo log is created or a cached
-undo log reused. */
-
-ulint
-trx_undo_assign_undo(
-/*=================*/
- /* out: DB_SUCCESS if undo log assign
- * successful, possible error codes are:
- * ER_TOO_MANY_CONCURRENT_TRXS
- * DB_OUT_OF_FILE_SPAC
- * DB_OUT_OF_MEMORY */
- trx_t* trx, /* in: transaction */
- ulint type); /* in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */
-/**********************************************************************
-Sets the state of the undo log segment at a transaction finish. */
-
-page_t*
-trx_undo_set_state_at_finish(
-/*=========================*/
- /* out: undo log segment header page,
- x-latched */
- trx_rseg_t* rseg, /* in: rollback segment memory object */
- trx_t* trx, /* in: transaction */
- trx_undo_t* undo, /* in: undo log memory copy */
- mtr_t* mtr); /* in: mtr */
-/**********************************************************************
-Sets the state of the undo log segment at a transaction prepare. */
-
-page_t*
-trx_undo_set_state_at_prepare(
-/*==========================*/
- /* out: undo log segment header page,
- x-latched */
- trx_t* trx, /* in: transaction */
- trx_undo_t* undo, /* in: undo log memory copy */
- mtr_t* mtr); /* in: mtr */
-
-/**************************************************************************
-Adds the update undo log header as the first in the history list, and
-frees the memory object, or puts it to the list of cached update undo log
-segments. */
-
-void
-trx_undo_update_cleanup(
-/*====================*/
- trx_t* trx, /* in: trx owning the update undo log */
- page_t* undo_page, /* in: update undo log header page,
- x-latched */
- mtr_t* mtr); /* in: mtr */
-/**********************************************************************
-Frees or caches an insert undo log after a transaction commit or rollback.
-Knowledge of inserts is not needed after a commit or rollback, therefore
-the data can be discarded. */
-
-void
-trx_undo_insert_cleanup(
-/*====================*/
- trx_t* trx); /* in: transaction handle */
-/***************************************************************
-Parses the redo log entry of an undo log page initialization. */
-
-byte*
-trx_undo_parse_page_init(
-/*=====================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr); /* in: mtr or NULL */
-/***************************************************************
-Parses the redo log entry of an undo log page header create or reuse. */
-
-byte*
-trx_undo_parse_page_header(
-/*=======================*/
- /* out: end of log record or NULL */
- ulint type, /* in: MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr); /* in: mtr or NULL */
-/***************************************************************
-Parses the redo log entry of an undo log page header discard. */
-
-byte*
-trx_undo_parse_discard_latest(
-/*==========================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr); /* in: mtr or NULL */
-
-/* Types of an undo log segment */
-#define TRX_UNDO_INSERT 1 /* contains undo entries for inserts */
-#define TRX_UNDO_UPDATE 2 /* contains undo entries for updates
- and delete markings: in short,
- modifys (the name 'UPDATE' is a
- historical relic) */
-/* States of an undo log segment */
-#define TRX_UNDO_ACTIVE 1 /* contains an undo log of an active
- transaction */
-#define TRX_UNDO_CACHED 2 /* cached for quick reuse */
-#define TRX_UNDO_TO_FREE 3 /* insert undo segment can be freed */
-#define TRX_UNDO_TO_PURGE 4 /* update undo segment will not be
- reused: it can be freed in purge when
- all undo data in it is removed */
-#define TRX_UNDO_PREPARED 5 /* contains an undo log of an
- prepared transaction */
-
-/* Transaction undo log memory object; this is protected by the undo_mutex
-in the corresponding transaction object */
-
-struct trx_undo_struct{
- /*-----------------------------*/
- ulint id; /* undo log slot number within the
- rollback segment */
- ulint type; /* TRX_UNDO_INSERT or
- TRX_UNDO_UPDATE */
- ulint state; /* state of the corresponding undo log
- segment */
- ibool del_marks; /* relevant only in an update undo log:
- this is TRUE if the transaction may
- have delete marked records, because of
- a delete of a row or an update of an
- indexed field; purge is then
- necessary; also TRUE if the transaction
- has updated an externally stored
- field */
- dulint trx_id; /* id of the trx assigned to the undo
- log */
- XID xid; /* X/Open XA transaction
- identification */
- ibool dict_operation; /* TRUE if a dict operation trx */
- dulint table_id; /* if a dict operation, then the table
- id */
- trx_rseg_t* rseg; /* rseg where the undo log belongs */
- /*-----------------------------*/
- ulint space; /* space id where the undo log
- placed */
- ulint hdr_page_no; /* page number of the header page in
- the undo log */
- ulint hdr_offset; /* header offset of the undo log on the
- page */
- ulint last_page_no; /* page number of the last page in the
- undo log; this may differ from
- top_page_no during a rollback */
- ulint size; /* current size in pages */
- /*-----------------------------*/
- ulint empty; /* TRUE if the stack of undo log
- records is currently empty */
- ulint top_page_no; /* page number where the latest undo
- log record was catenated; during
- rollback the page from which the latest
- undo record was chosen */
- ulint top_offset; /* offset of the latest undo record,
- i.e., the topmost element in the undo
- log if we think of it as a stack */
- dulint top_undo_no; /* undo number of the latest record */
- page_t* guess_page; /* guess for the buffer frame where
- the top page might reside */
- /*-----------------------------*/
- UT_LIST_NODE_T(trx_undo_t) undo_list;
- /* undo log objects in the rollback
- segment are chained into lists */
-};
-
-/* The offset of the undo log page header on pages of the undo log */
-#define TRX_UNDO_PAGE_HDR FSEG_PAGE_DATA
-/*-------------------------------------------------------------*/
-/* Transaction undo log page header offsets */
-#define TRX_UNDO_PAGE_TYPE 0 /* TRX_UNDO_INSERT or
- TRX_UNDO_UPDATE */
-#define TRX_UNDO_PAGE_START 2 /* Byte offset where the undo log
- records for the LATEST transaction
- start on this page (remember that
- in an update undo log, the first page
- can contain several undo logs) */
-#define TRX_UNDO_PAGE_FREE 4 /* On each page of the undo log this
- field contains the byte offset of the
- first free byte on the page */
-#define TRX_UNDO_PAGE_NODE 6 /* The file list node in the chain
- of undo log pages */
-/*-------------------------------------------------------------*/
-#define TRX_UNDO_PAGE_HDR_SIZE (6 + FLST_NODE_SIZE)
-
-/* An update undo segment with just one page can be reused if it has
-< this number bytes used; we must leave space at least for one new undo
-log header on the page */
-
-#define TRX_UNDO_PAGE_REUSE_LIMIT (3 * UNIV_PAGE_SIZE / 4)
-
-/* An update undo log segment may contain several undo logs on its first page
-if the undo logs took so little space that the segment could be cached and
-reused. All the undo log headers are then on the first page, and the last one
-owns the undo log records on subsequent pages if the segment is bigger than
-one page. If an undo log is stored in a segment, then on the first page it is
-allowed to have zero undo records, but if the segment extends to several
-pages, then all the rest of the pages must contain at least one undo log
-record. */
-
-/* The offset of the undo log segment header on the first page of the undo
-log segment */
-
-#define TRX_UNDO_SEG_HDR (TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE)
-/*-------------------------------------------------------------*/
-#define TRX_UNDO_STATE 0 /* TRX_UNDO_ACTIVE, ... */
-#define TRX_UNDO_LAST_LOG 2 /* Offset of the last undo log header
- on the segment header page, 0 if
- none */
-#define TRX_UNDO_FSEG_HEADER 4 /* Header for the file segment which
- the undo log segment occupies */
-#define TRX_UNDO_PAGE_LIST (4 + FSEG_HEADER_SIZE)
- /* Base node for the list of pages in
- the undo log segment; defined only on
- the undo log segment's first page */
-/*-------------------------------------------------------------*/
-/* Size of the undo log segment header */
-#define TRX_UNDO_SEG_HDR_SIZE (4 + FSEG_HEADER_SIZE + FLST_BASE_NODE_SIZE)
-
-
-/* The undo log header. There can be several undo log headers on the first
-page of an update undo log segment. */
-/*-------------------------------------------------------------*/
-#define TRX_UNDO_TRX_ID 0 /* Transaction id */
-#define TRX_UNDO_TRX_NO 8 /* Transaction number of the
- transaction; defined only if the log
- is in a history list */
-#define TRX_UNDO_DEL_MARKS 16 /* Defined only in an update undo
- log: TRUE if the transaction may have
- done delete markings of records, and
- thus purge is necessary */
-#define TRX_UNDO_LOG_START 18 /* Offset of the first undo log record
- of this log on the header page; purge
- may remove undo log record from the
- log start, and therefore this is not
- necessarily the same as this log
- header end offset */
-#define TRX_UNDO_XID_EXISTS 20 /* TRUE if undo log header includes
- X/Open XA transaction identification
- XID */
-#define TRX_UNDO_DICT_TRANS 21 /* TRUE if the transaction is a table
- create, index create, or drop
- transaction: in recovery
- the transaction cannot be rolled back
- in the usual way: a 'rollback' rather
- means dropping the created or dropped
- table, if it still exists */
-#define TRX_UNDO_TABLE_ID 22 /* Id of the table if the preceding
- field is TRUE */
-#define TRX_UNDO_NEXT_LOG 30 /* Offset of the next undo log header
- on this page, 0 if none */
-#define TRX_UNDO_PREV_LOG 32 /* Offset of the previous undo log
- header on this page, 0 if none */
-#define TRX_UNDO_HISTORY_NODE 34 /* If the log is put to the history
- list, the file list node is here */
-/*-------------------------------------------------------------*/
-#define TRX_UNDO_LOG_OLD_HDR_SIZE (34 + FLST_NODE_SIZE)
-
-/* Note: the writing of the undo log old header is coded by a log record
-MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE. The appending of an XID to the
-header is logged separately. In this sense, the XID is not really a member
-of the undo log header. TODO: do not append the XID to the log header if XA
-is not needed by the user. The XID wastes about 150 bytes of space in every
-undo log. In the history list we may have millions of undo logs, which means
-quite a large overhead. */
-
-/* X/Open XA Transaction Identification (XID) */
-
-#define TRX_UNDO_XA_FORMAT (TRX_UNDO_LOG_OLD_HDR_SIZE)
-#define TRX_UNDO_XA_TRID_LEN (TRX_UNDO_XA_FORMAT + 4)
-#define TRX_UNDO_XA_BQUAL_LEN (TRX_UNDO_XA_TRID_LEN + 4)
-#define TRX_UNDO_XA_XID (TRX_UNDO_XA_BQUAL_LEN + 4)
-/*--------------------------------------------------------------*/
-#define TRX_UNDO_LOG_XA_HDR_SIZE (TRX_UNDO_XA_XID + XIDDATASIZE)
- /* Total size of the header with the XA XID */
-
-#ifndef UNIV_NONINL
-#include "trx0undo.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/trx0undo.ic b/storage/innobase/include/trx0undo.ic
deleted file mode 100644
index f28f36ade03..00000000000
--- a/storage/innobase/include/trx0undo.ic
+++ /dev/null
@@ -1,330 +0,0 @@
-/******************************************************
-Transaction undo log
-
-(c) 1996 Innobase Oy
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "data0type.h"
-
-/***************************************************************************
-Builds a roll pointer dulint. */
-UNIV_INLINE
-dulint
-trx_undo_build_roll_ptr(
-/*====================*/
- /* out: roll pointer */
- ibool is_insert, /* in: TRUE if insert undo log */
- ulint rseg_id, /* in: rollback segment id */
- ulint page_no, /* in: page number */
- ulint offset) /* in: offset of the undo entry within page */
-{
-#if DATA_ROLL_PTR_LEN != 7
-# error "DATA_ROLL_PTR_LEN != 7"
-#endif
- ut_ad(rseg_id < 128);
-
- return(ut_dulint_create(is_insert * 128 * 256 * 256
- + rseg_id * 256 * 256
- + (page_no / 256) / 256,
- (page_no % (256 * 256)) * 256 * 256
- + offset));
-}
-
-/***************************************************************************
-Decodes a roll pointer dulint. */
-UNIV_INLINE
-void
-trx_undo_decode_roll_ptr(
-/*=====================*/
- dulint roll_ptr, /* in: roll pointer */
- ibool* is_insert, /* out: TRUE if insert undo log */
- ulint* rseg_id, /* out: rollback segment id */
- ulint* page_no, /* out: page number */
- ulint* offset) /* out: offset of the undo entry within page */
-{
- ulint low;
- ulint high;
-#if DATA_ROLL_PTR_LEN != 7
-# error "DATA_ROLL_PTR_LEN != 7"
-#endif
-#if TRUE != 1
-# error "TRUE != 1"
-#endif
- high = ut_dulint_get_high(roll_ptr);
- low = ut_dulint_get_low(roll_ptr);
-
- *offset = low % (256 * 256);
-
- *is_insert = high / (256 * 256 * 128); /* TRUE == 1 */
- *rseg_id = (high / (256 * 256)) % 128;
-
- *page_no = (high % (256 * 256)) * 256 * 256
- + (low / 256) / 256;
-}
-
-/***************************************************************************
-Returns TRUE if the roll pointer is of the insert type. */
-UNIV_INLINE
-ibool
-trx_undo_roll_ptr_is_insert(
-/*========================*/
- /* out: TRUE if insert undo log */
- dulint roll_ptr) /* in: roll pointer */
-{
- ulint high;
-#if DATA_ROLL_PTR_LEN != 7
-# error "DATA_ROLL_PTR_LEN != 7"
-#endif
-#if TRUE != 1
-# error "TRUE != 1"
-#endif
- high = ut_dulint_get_high(roll_ptr);
-
- return(high / (256 * 256 * 128));
-}
-
-/*********************************************************************
-Writes a roll ptr to an index page. In case that the size changes in
-some future version, this function should be used instead of
-mach_write_... */
-UNIV_INLINE
-void
-trx_write_roll_ptr(
-/*===============*/
- byte* ptr, /* in: pointer to memory where written */
- dulint roll_ptr) /* in: roll ptr */
-{
- ut_ad(DATA_ROLL_PTR_LEN == 7);
-
- mach_write_to_7(ptr, roll_ptr);
-}
-
-/*********************************************************************
-Reads a roll ptr from an index page. In case that the roll ptr size
-changes in some future version, this function should be used instead of
-mach_read_... */
-UNIV_INLINE
-dulint
-trx_read_roll_ptr(
-/*==============*/
- /* out: roll ptr */
- byte* ptr) /* in: pointer to memory from where to read */
-{
-#if DATA_ROLL_PTR_LEN != 7
-# error "DATA_ROLL_PTR_LEN != 7"
-#endif
- return(mach_read_from_7(ptr));
-}
-
-/**********************************************************************
-Gets an undo log page and x-latches it. */
-UNIV_INLINE
-page_t*
-trx_undo_page_get(
-/*==============*/
- /* out: pointer to page x-latched */
- ulint space, /* in: space where placed */
- ulint page_no, /* in: page number */
- mtr_t* mtr) /* in: mtr */
-{
- page_t* page;
-
- page = buf_page_get(space, page_no, RW_X_LATCH, mtr);
-
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_TRX_UNDO_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
-
- return(page);
-}
-
-/**********************************************************************
-Gets an undo log page and s-latches it. */
-UNIV_INLINE
-page_t*
-trx_undo_page_get_s_latched(
-/*========================*/
- /* out: pointer to page s-latched */
- ulint space, /* in: space where placed */
- ulint page_no, /* in: page number */
- mtr_t* mtr) /* in: mtr */
-{
- page_t* page;
-
- page = buf_page_get(space, page_no, RW_S_LATCH, mtr);
-
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_TRX_UNDO_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
-
- return(page);
-}
-
-/**********************************************************************
-Returns the start offset of the undo log records of the specified undo
-log on the page. */
-UNIV_INLINE
-ulint
-trx_undo_page_get_start(
-/*====================*/
- /* out: start offset */
- page_t* undo_page,/* in: undo log page */
- ulint page_no,/* in: undo log header page number */
- ulint offset) /* in: undo log header offset on page */
-{
- ulint start;
-
- if (page_no == buf_frame_get_page_no(undo_page)) {
-
- start = mach_read_from_2(offset + undo_page
- + TRX_UNDO_LOG_START);
- } else {
- start = TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE;
- }
-
- return(start);
-}
-
-/**********************************************************************
-Returns the end offset of the undo log records of the specified undo
-log on the page. */
-UNIV_INLINE
-ulint
-trx_undo_page_get_end(
-/*==================*/
- /* out: end offset */
- page_t* undo_page,/* in: undo log page */
- ulint page_no,/* in: undo log header page number */
- ulint offset) /* in: undo log header offset on page */
-{
- trx_ulogf_t* log_hdr;
- ulint end;
-
- if (page_no == buf_frame_get_page_no(undo_page)) {
-
- log_hdr = undo_page + offset;
-
- end = mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG);
-
- if (end == 0) {
- end = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_FREE);
- }
- } else {
- end = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_FREE);
- }
-
- return(end);
-}
-
-/**********************************************************************
-Returns the previous undo record on the page in the specified log, or
-NULL if none exists. */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_prev_rec(
-/*=======================*/
- /* out: pointer to record, NULL if none */
- trx_undo_rec_t* rec, /* in: undo log record */
- ulint page_no,/* in: undo log header page number */
- ulint offset) /* in: undo log header offset on page */
-{
- page_t* undo_page;
- ulint start;
-
- undo_page = buf_frame_align(rec);
-
- start = trx_undo_page_get_start(undo_page, page_no, offset);
-
- if (start + undo_page == rec) {
-
- return(NULL);
- }
-
- return(undo_page + mach_read_from_2(rec - 2));
-}
-
-/**********************************************************************
-Returns the next undo log record on the page in the specified log, or
-NULL if none exists. */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_next_rec(
-/*=======================*/
- /* out: pointer to record, NULL if none */
- trx_undo_rec_t* rec, /* in: undo log record */
- ulint page_no,/* in: undo log header page number */
- ulint offset) /* in: undo log header offset on page */
-{
- page_t* undo_page;
- ulint end;
- ulint next;
-
- undo_page = buf_frame_align(rec);
-
- end = trx_undo_page_get_end(undo_page, page_no, offset);
-
- next = mach_read_from_2(rec);
-
- if (next == end) {
-
- return(NULL);
- }
-
- return(undo_page + next);
-}
-
-/**********************************************************************
-Returns the last undo record on the page in the specified undo log, or
-NULL if none exists. */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_last_rec(
-/*=======================*/
- /* out: pointer to record, NULL if none */
- page_t* undo_page,/* in: undo log page */
- ulint page_no,/* in: undo log header page number */
- ulint offset) /* in: undo log header offset on page */
-{
- ulint start;
- ulint end;
-
- start = trx_undo_page_get_start(undo_page, page_no, offset);
- end = trx_undo_page_get_end(undo_page, page_no, offset);
-
- if (start == end) {
-
- return(NULL);
- }
-
- return(undo_page + mach_read_from_2(undo_page + end - 2));
-}
-
-/**********************************************************************
-Returns the first undo record on the page in the specified undo log, or
-NULL if none exists. */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_first_rec(
-/*========================*/
- /* out: pointer to record, NULL if none */
- page_t* undo_page,/* in: undo log page */
- ulint page_no,/* in: undo log header page number */
- ulint offset) /* in: undo log header offset on page */
-{
- ulint start;
- ulint end;
-
- start = trx_undo_page_get_start(undo_page, page_no, offset);
- end = trx_undo_page_get_end(undo_page, page_no, offset);
-
- if (start == end) {
-
- return(NULL);
- }
-
- return(undo_page + start);
-}
diff --git a/storage/innobase/include/trx0xa.h b/storage/innobase/include/trx0xa.h
deleted file mode 100644
index df85cd663cb..00000000000
--- a/storage/innobase/include/trx0xa.h
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Start of xa.h header
- *
- * Define a symbol to prevent multiple inclusions of this header file
- */
-#ifndef XA_H
-#define XA_H
-
-/*
- * Transaction branch identification: XID and NULLXID:
- */
-#ifndef XIDDATASIZE
-
-#define XIDDATASIZE 128 /* size in bytes */
-#define MAXGTRIDSIZE 64 /* maximum size in bytes of gtrid */
-#define MAXBQUALSIZE 64 /* maximum size in bytes of bqual */
-
-struct xid_t {
- long formatID; /* format identifier */
- long gtrid_length; /* value from 1 through 64 */
- long bqual_length; /* value from 1 through 64 */
- char data[XIDDATASIZE];
-};
-typedef struct xid_t XID;
-#endif
-/*
- * A value of -1 in formatID means that the XID is null.
- */
-
-
-#ifdef NOTDEFINED
-/* Let us comment this out to remove compiler errors!!!!!!!!!!!! */
-
-/*
- * Declarations of routines by which RMs call TMs:
- */
-extern int ax_reg __P((int, XID *, long));
-extern int ax_unreg __P((int, long));
-
-/*
- * XA Switch Data Structure
- */
-#define RMNAMESZ 32 /* length of resource manager name, */
- /* including the null terminator */
-#define MAXINFOSIZE 256 /* maximum size in bytes of xa_info */
- /* strings, including the null
- terminator */
-
-
-struct xa_switch_t {
- char name[RMNAMESZ]; /* name of resource manager */
- long flags; /* resource manager specific options */
- long version; /* must be 0 */
- int (*xa_open_entry) /* xa_open function pointer */
- __P((char *, int, long));
- int (*xa_close_entry) /* xa_close function pointer */
- __P((char *, int, long));
- int (*xa_start_entry) /* xa_start function pointer */
- __P((XID *, int, long));
- int (*xa_end_entry) /* xa_end function pointer */
- __P((XID *, int, long));
- int (*xa_rollback_entry) /* xa_rollback function pointer */
- __P((XID *, int, long));
- int (*xa_prepare_entry) /* xa_prepare function pointer */
- __P((XID *, int, long));
- int (*xa_commit_entry) /* xa_commit function pointer */
- __P((XID *, int, long));
- int (*xa_recover_entry) /* xa_recover function pointer */
- __P((XID *, long, int, long));
- int (*xa_forget_entry) /* xa_forget function pointer */
- __P((XID *, int, long));
- int (*xa_complete_entry) /* xa_complete function pointer */
- __P((int *, int *, int, long));
-};
-#endif /* NOTDEFINED */
-
-
-/*
- * Flag definitions for the RM switch
- */
-#define TMNOFLAGS 0x00000000L /* no resource manager features
- selected */
-#define TMREGISTER 0x00000001L /* resource manager dynamically
- registers */
-#define TMNOMIGRATE 0x00000002L /* resource manager does not support
- association migration */
-#define TMUSEASYNC 0x00000004L /* resource manager supports
- asynchronous operations */
-/*
- * Flag definitions for xa_ and ax_ routines
- */
-/* use TMNOFLAGGS, defined above, when not specifying other flags */
-#define TMASYNC 0x80000000L /* perform routine asynchronously */
-#define TMONEPHASE 0x40000000L /* caller is using one-phase commit
- optimisation */
-#define TMFAIL 0x20000000L /* dissociates caller and marks
- transaction branch rollback-only */
-#define TMNOWAIT 0x10000000L /* return if blocking condition
- exists */
-#define TMRESUME 0x08000000L /* caller is resuming association with
- suspended transaction branch */
-#define TMSUCCESS 0x04000000L /* dissociate caller from transaction
- branch */
-#define TMSUSPEND 0x02000000L /* caller is suspending, not ending,
- association */
-#define TMSTARTRSCAN 0x01000000L /* start a recovery scan */
-#define TMENDRSCAN 0x00800000L /* end a recovery scan */
-#define TMMULTIPLE 0x00400000L /* wait for any asynchronous
- operation */
-#define TMJOIN 0x00200000L /* caller is joining existing
- transaction branch */
-#define TMMIGRATE 0x00100000L /* caller intends to perform
- migration */
-
-/*
- * ax_() return codes (transaction manager reports to resource manager)
- */
-#define TM_JOIN 2 /* caller is joining existing
- transaction branch */
-#define TM_RESUME 1 /* caller is resuming association with
- suspended transaction branch */
-#define TM_OK 0 /* normal execution */
-#define TMER_TMERR -1 /* an error occurred in the transaction
- manager */
-#define TMER_INVAL -2 /* invalid arguments were given */
-#define TMER_PROTO -3 /* routine invoked in an improper
- context */
-
-/*
- * xa_() return codes (resource manager reports to transaction manager)
- */
-#define XA_RBBASE 100 /* The inclusive lower bound of the
- rollback codes */
-#define XA_RBROLLBACK XA_RBBASE /* The rollback was caused by an
- unspecified reason */
-#define XA_RBCOMMFAIL XA_RBBASE+1 /* The rollback was caused by a
- communication failure */
-#define XA_RBDEADLOCK XA_RBBASE+2 /* A deadlock was detected */
-#define XA_RBINTEGRITY XA_RBBASE+3 /* A condition that violates the
- integrity of the resources was
- detected */
-#define XA_RBOTHER XA_RBBASE+4 /* The resource manager rolled back the
- transaction branch for a reason not
- on this list */
-#define XA_RBPROTO XA_RBBASE+5 /* A protocol error occurred in the
- resource manager */
-#define XA_RBTIMEOUT XA_RBBASE+6 /* A transaction branch took
- too long */
-#define XA_RBTRANSIENT XA_RBBASE+7 /* May retry the transaction branch */
-#define XA_RBEND XA_RBTRANSIENT /* The inclusive upper bound of the
- rollback codes */
-#define XA_NOMIGRATE 9 /* resumption must occur where
- suspension occurred */
-#define XA_HEURHAZ 8 /* the transaction branch may have
- been heuristically completed */
-#define XA_HEURCOM 7 /* the transaction branch has been
- heuristically committed */
-#define XA_HEURRB 6 /* the transaction branch has been
- heuristically rolled back */
-#define XA_HEURMIX 5 /* the transaction branch has been
- heuristically committed and rolled
- back */
-#define XA_RETRY 4 /* routine returned with no effect and
- may be re-issued */
-#define XA_RDONLY 3 /* the transaction branch was read-only
- and has been committed */
-#define XA_OK 0 /* normal execution */
-#define XAER_ASYNC -2 /* asynchronous operation already
- outstanding */
-#define XAER_RMERR -3 /* a resource manager error occurred in
- the transaction branch */
-#define XAER_NOTA -4 /* the XID is not valid */
-#define XAER_INVAL -5 /* invalid arguments were given */
-#define XAER_PROTO -6 /* routine invoked in an improper
- context */
-#define XAER_RMFAIL -7 /* resource manager unavailable */
-#define XAER_DUPID -8 /* the XID already exists */
-#define XAER_OUTSIDE -9 /* resource manager doing work outside
- transaction */
-#endif /* ifndef XA_H */
-/*
- * End of xa.h header
- */
diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i
deleted file mode 100644
index bb44a91a343..00000000000
--- a/storage/innobase/include/univ.i
+++ /dev/null
@@ -1,376 +0,0 @@
-/***************************************************************************
-Version control for database, common definitions, and include files
-
-(c) 1994 - 2000 Innobase Oy
-
-Created 1/20/1994 Heikki Tuuri
-****************************************************************************/
-
-#ifndef univ_i
-#define univ_i
-
-#ifdef __SUNPRO_C
-# include <sun_prefetch.h>
-#endif
-
-#if (defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)) && !defined(MYSQL_SERVER) && !defined(__WIN__)
-# undef __WIN__
-# define __WIN__
-
-# include <windows.h>
-
-# if !defined(WIN64) && !defined(_WIN64)
-# define UNIV_CAN_USE_X86_ASSEMBLER
-# endif
-
-# ifdef _NT_
-# define __NT__
-# endif
-
-#else
-/* The defines used with MySQL */
-
-/* Include two header files from MySQL to make the Unix flavor used
-in compiling more Posix-compatible. These headers also define __WIN__
-if we are compiling on Windows. */
-
-# include <my_global.h>
-# include <my_pthread.h>
-
-/* Include <sys/stat.h> to get S_I... macros defined for os0file.c */
-# include <sys/stat.h>
-
-# undef PACKAGE
-# undef VERSION
-
-/* Include the header file generated by GNU autoconf */
-# ifndef __WIN__
-# include "config.h"
-# endif
-
-# ifdef HAVE_SCHED_H
-# include <sched.h>
-# endif
-
-/* When compiling for Itanium IA64, undefine the flag below to prevent use
-of the 32-bit x86 assembler in mutex operations. */
-
-# if defined(__WIN__) && !defined(WIN64) && !defined(_WIN64)
-# define UNIV_CAN_USE_X86_ASSEMBLER
-# endif
-
-/* We only try to do explicit inlining of functions with gcc and
- Sun Studio */
-
-# if !defined(__GNUC__) && !defined(__SUNPRO_C)
-# undef UNIV_MUST_NOT_INLINE /* Remove compiler warning */
-# define UNIV_MUST_NOT_INLINE
-# endif
-
-# ifdef HAVE_PREAD
-# define HAVE_PWRITE
-# endif
-
-#endif /* #if (defined(WIN32) || ... */
-
-/* DEBUG VERSION CONTROL
- ===================== */
-
-/* The following flag will make InnoDB to initialize
-all memory it allocates to zero. It hides Purify
-warnings about reading unallocated memory unless
-memory is read outside the allocated blocks. */
-/*
-#define UNIV_INIT_MEM_TO_ZERO
-*/
-
-/* Make a non-inline debug version */
-
-#if 0
-#define UNIV_DEBUG_VALGRIND /* Enable extra
- Valgrind instrumentation */
-#define UNIV_DEBUG /* Enable ut_ad() assertions */
-#define UNIV_LIST_DEBUG /* debug UT_LIST_ macros */
-#define UNIV_MEM_DEBUG /* detect memory leaks etc */
-#define UNIV_IBUF_DEBUG /* debug the insert buffer;
-this limits the database to IBUF_COUNT_N_SPACES and IBUF_COUNT_N_PAGES,
-and the insert buffer must be empty when the database is started */
-#define UNIV_SYNC_DEBUG /* debug mutex and latch
-operations (very slow); also UNIV_DEBUG must be defined */
-#define UNIV_SEARCH_DEBUG /* debug B-tree comparisons */
-#define UNIV_SYNC_PERF_STAT /* operation counts for
- rw-locks and mutexes */
-#define UNIV_SEARCH_PERF_STAT /* statistics for the
- adaptive hash index */
-#define UNIV_SRV_PRINT_LATCH_WAITS /* enable diagnostic output
- in sync0sync.c */
-#define UNIV_BTR_PRINT /* enable functions for
- printing B-trees */
-#endif
-
-#define UNIV_BTR_DEBUG /* check B-tree links */
-#define UNIV_LIGHT_MEM_DEBUG /* light memory debugging */
-
-#ifdef HAVE_purify
-/* The following sets all new allocated memory to zero before use:
-this can be used to eliminate unnecessary Purify warnings, but note that
-it also masks many bugs Purify could detect. For detailed Purify analysis it
-is best to remove the define below and look through the warnings one
-by one. */
-#define UNIV_SET_MEM_TO_ZERO
-#endif
-
-/* Use malloc instead of innodb additional memory pool (great with tcmalloc) */
-#define UNIV_DISABLE_MEM_POOL
-
-#if defined(HAVE_GCC_ATOMIC_BUILTINS) || defined(HAVE_SOLARIS_ATOMIC)
-/*
- * We have a full set of atomic ops available - we will use them
- */
-#define UNIV_SYNC_ATOMIC
-#endif
-
-#if defined(WIN_ATOMICS32) || defined(WIN_ATOMICS64)
-/*
- * We have a full set of atomic ops available - we will use them
- * This is on Windows
- */
-#define UNIV_SYNC_ATOMIC
-#endif
-
-/*
-#define UNIV_SQL_DEBUG
-#define UNIV_LOG_DEBUG
-*/
- /* the above option prevents forcing of log to disk
- at a buffer page write: it should be tested with this
- option off; also some ibuf tests are suppressed */
-/*
-#define UNIV_BASIC_LOG_DEBUG
-*/
- /* the above option enables basic recovery debugging:
- new allocated file pages are reset */
-
-#if (!defined(UNIV_DEBUG) && !defined(INSIDE_HA_INNOBASE_CC) && !defined(UNIV_MUST_NOT_INLINE))
-/* Definition for inline version */
-
-#ifdef __WIN__
-#define UNIV_INLINE __inline
-#else
-#define UNIV_INLINE static __inline__
-#endif
-
-#else
-/* If we want to compile a noninlined version we use the following macro
-definitions: */
-
-#define UNIV_NONINL
-#define UNIV_INLINE
-
-#endif /* UNIV_DEBUG */
-
-#ifdef _WIN32
-#define UNIV_WORD_SIZE 4
-#elif defined(_WIN64)
-#define UNIV_WORD_SIZE 8
-#else
-/* MySQL config.h generated by GNU autoconf will define SIZEOF_LONG in Posix */
-#define UNIV_WORD_SIZE SIZEOF_LONG
-#endif
-
-/* The following alignment is used in memory allocations in memory heap
-management to ensure correct alignment for doubles etc. */
-#define UNIV_MEM_ALIGNMENT 8
-
-/* The following alignment is used in aligning lints etc. */
-#define UNIV_WORD_ALIGNMENT UNIV_WORD_SIZE
-
-/*
- DATABASE VERSION CONTROL
- ========================
-*/
-
-/* The universal page size of the database */
-#define UNIV_PAGE_SIZE (2 * 8192) /* NOTE! Currently, this has to be a
- power of 2 */
-/* The 2-logarithm of UNIV_PAGE_SIZE: */
-#define UNIV_PAGE_SIZE_SHIFT 14
-
-/* Maximum number of parallel threads in a parallelized operation */
-#define UNIV_MAX_PARALLELISM 32
-
-/*
- UNIVERSAL TYPE DEFINITIONS
- ==========================
-*/
-
-/* Note that inside MySQL 'byte' is defined as char on Linux! */
-#define byte unsigned char
-
-/* Define an unsigned integer type that is exactly 32 bits. */
-
-#if SIZEOF_INT == 4
-typedef unsigned int ib_uint32_t;
-#elif SIZEOF_LONG == 4
-typedef unsigned long ib_uint32_t;
-#else
-#error "Neither int or long is 4 bytes"
-#endif
-
-/* Another basic type we use is unsigned long integer which should be equal to
-the word size of the machine, that is on a 32-bit platform 32 bits, and on a
-64-bit platform 64 bits. We also give the printf format for the type as a
-macro ULINTPF. */
-
-#ifdef _WIN64
-typedef unsigned __int64 ulint;
-#define ULINTPF "%I64u"
-typedef __int64 lint;
-#else
-typedef unsigned long int ulint;
-#define ULINTPF "%lu"
-typedef long int lint;
-#endif
-
-#ifdef __WIN__
-typedef __int64 ib_longlong;
-typedef unsigned __int64 ib_ulonglong;
-#else
-/* Note: longlong and ulonglong come from MySQL headers. */
-typedef longlong ib_longlong;
-typedef ulonglong ib_ulonglong;
-#endif
-
-typedef unsigned long long int ullint;
-
-#ifndef __WIN__
-#if SIZEOF_LONG != SIZEOF_VOIDP
-#error "Error: InnoDB's ulint must be of the same size as void*"
-#endif
-#endif
-
-/* The 'undefined' value for a ulint */
-#define ULINT_UNDEFINED ((ulint)(-1))
-
-/* The undefined 32-bit unsigned integer */
-#define ULINT32_UNDEFINED 0xFFFFFFFF
-
-/* Maximum value for a ulint */
-#define ULINT_MAX ((ulint)(-2))
-
-/* This 'ibool' type is used within Innobase. Remember that different included
-headers may define 'bool' differently. Do not assume that 'bool' is a ulint! */
-#define ibool ulint
-
-#ifndef TRUE
-
-#define TRUE 1
-#define FALSE 0
-
-#endif
-
-/* The following number as the length of a logical field means that the field
-has the SQL NULL as its value. NOTE that because we assume that the length
-of a field is a 32-bit integer when we store it, for example, to an undo log
-on disk, we must have also this number fit in 32 bits, also in 64-bit
-computers! */
-
-#define UNIV_SQL_NULL ULINT32_UNDEFINED
-
-/* Lengths which are not UNIV_SQL_NULL, but bigger than the following
-number indicate that a field contains a reference to an externally
-stored part of the field in the tablespace. The length field then
-contains the sum of the following flag and the locally stored len. */
-
-#define UNIV_EXTERN_STORAGE_FIELD (UNIV_SQL_NULL - UNIV_PAGE_SIZE)
-
-/* Some macros to improve branch prediction and reduce cache misses */
-#if defined(__GNUC__) && (__GNUC__ > 2) && ! defined(__INTEL_COMPILER)
-/* Tell the compiler that 'expr' probably evaluates to 'constant'. */
-# define UNIV_EXPECT(expr,constant) __builtin_expect(expr, constant)
-/* Tell the compiler that a pointer is likely to be NULL */
-# define UNIV_LIKELY_NULL(ptr) __builtin_expect((ulint) ptr, 0)
-/* Minimize cache-miss latency by moving data at addr into a cache before
-it is read. */
-# define UNIV_PREFETCH_R(addr) __builtin_prefetch(addr, 0, 3)
-/* Minimize cache-miss latency by moving data at addr into a cache before
-it is read or written. */
-# define UNIV_PREFETCH_RW(addr) __builtin_prefetch(addr, 1, 3)
-#elif defined(__SUNPRO_C)
-# define UNIV_EXPECT(expr,value) (expr)
-# define UNIV_LIKELY_NULL(expr) (expr)
-# define UNIV_PREFETCH_R(addr) sun_prefetch_read_many(addr)
-# define UNIV_PREFETCH_RW(addr) sun_prefetch_write_many(addr)
-#else
-/* Dummy versions of the macros */
-# define UNIV_EXPECT(expr,value) (expr)
-# define UNIV_LIKELY_NULL(expr) (expr)
-# define UNIV_PREFETCH_R(addr) ((void) 0)
-# define UNIV_PREFETCH_RW(addr) ((void) 0)
-#endif
-/* Tell the compiler that cond is likely to hold */
-#define UNIV_LIKELY(cond) UNIV_EXPECT(cond, TRUE)
-/* Tell the compiler that cond is unlikely to hold */
-#define UNIV_UNLIKELY(cond) UNIV_EXPECT(cond, FALSE)
-
-/* Compile-time constant of the given array's size. */
-#define UT_ARR_SIZE(a) (sizeof(a) / sizeof((a)[0]))
-
-/* The return type from a thread's start function differs between Unix and
-Windows, so define a typedef for it and a macro to use at the end of such
-functions. */
-
-#ifdef __WIN__
-typedef ulint os_thread_ret_t;
-#define OS_THREAD_DUMMY_RETURN return(0)
-#else
-typedef void* os_thread_ret_t;
-#define OS_THREAD_DUMMY_RETURN return(NULL)
-#endif
-
-#include <stdio.h>
-#include "ut0dbg.h"
-#include "ut0ut.h"
-#include "db0err.h"
-#ifdef UNIV_DEBUG_VALGRIND
-# include <valgrind/memcheck.h>
-# define UNIV_MEM_VALID(addr, size) VALGRIND_MAKE_MEM_DEFINED(addr, size)
-# define UNIV_MEM_INVALID(addr, size) VALGRIND_MAKE_MEM_UNDEFINED(addr, size)
-# define UNIV_MEM_FREE(addr, size) VALGRIND_MAKE_MEM_NOACCESS(addr, size)
-# define UNIV_MEM_ALLOC(addr, size) VALGRIND_MAKE_MEM_UNDEFINED(addr, size)
-# define UNIV_MEM_ASSERT_RW(addr, size) do { \
- const void* _p = (const void*) (ulint) \
- VALGRIND_CHECK_MEM_IS_DEFINED(addr, size); \
- if (UNIV_LIKELY_NULL(_p)) \
- fprintf(stderr, "%s:%d: %p[%u] undefined at %ld\n", \
- __FILE__, __LINE__, \
- (const void*) (addr), (unsigned) (size), (long) \
- (((const char*) _p) - ((const char*) (addr)))); \
- } while (0)
-# define UNIV_MEM_ASSERT_W(addr, size) do { \
- const void* _p = (const void*) (ulint) \
- VALGRIND_CHECK_MEM_IS_ADDRESSABLE(addr, size); \
- if (UNIV_LIKELY_NULL(_p)) \
- fprintf(stderr, "%s:%d: %p[%u] unwritable at %ld\n", \
- __FILE__, __LINE__, \
- (const void*) (addr), (unsigned) (size), (long) \
- (((const char*) _p) - ((const char*) (addr)))); \
- } while (0)
-#else
-# define UNIV_MEM_VALID(addr, size) do {} while(0)
-# define UNIV_MEM_INVALID(addr, size) do {} while(0)
-# define UNIV_MEM_FREE(addr, size) do {} while(0)
-# define UNIV_MEM_ALLOC(addr, size) do {} while(0)
-# define UNIV_MEM_ASSERT_RW(addr, size) do {} while(0)
-# define UNIV_MEM_ASSERT_W(addr, size) do {} while(0)
-#endif
-#define UNIV_MEM_ASSERT_AND_FREE(addr, size) do { \
- UNIV_MEM_ASSERT_W(addr, size); \
- UNIV_MEM_FREE(addr, size); \
-} while (0)
-#define UNIV_MEM_ASSERT_AND_ALLOC(addr, size) do { \
- UNIV_MEM_ASSERT_W(addr, size); \
- UNIV_MEM_ALLOC(addr, size); \
-} while (0)
-
-#endif
diff --git a/storage/innobase/include/usr0sess.h b/storage/innobase/include/usr0sess.h
deleted file mode 100644
index 3ed1ea21a4d..00000000000
--- a/storage/innobase/include/usr0sess.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/******************************************************
-Sessions
-
-(c) 1996 Innobase Oy
-
-Created 6/25/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef usr0sess_h
-#define usr0sess_h
-
-#include "univ.i"
-#include "ut0byte.h"
-#include "trx0types.h"
-#include "srv0srv.h"
-#include "trx0types.h"
-#include "usr0types.h"
-#include "que0types.h"
-#include "data0data.h"
-#include "rem0rec.h"
-
-/*************************************************************************
-Opens a session. */
-
-sess_t*
-sess_open(void);
-/*============*/
- /* out, own: session object */
-/*************************************************************************
-Closes a session, freeing the memory occupied by it, if it is in a state
-where it should be closed. */
-
-ibool
-sess_try_close(
-/*===========*/
- /* out: TRUE if closed */
- sess_t* sess); /* in, own: session object */
-
-/* The session handle. All fields are protected by the kernel mutex */
-struct sess_struct{
- ulint state; /* state of the session */
- trx_t* trx; /* transaction object permanently
- assigned for the session: the
- transaction instance designated by the
- trx id changes, but the memory
- structure is preserved */
- UT_LIST_BASE_NODE_T(que_t)
- graphs; /* query graphs belonging to this
- session */
-};
-
-/* Session states */
-#define SESS_ACTIVE 1
-#define SESS_ERROR 2 /* session contains an error message
- which has not yet been communicated
- to the client */
-#ifndef UNIV_NONINL
-#include "usr0sess.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/usr0sess.ic b/storage/innobase/include/usr0sess.ic
deleted file mode 100644
index c851d5745b9..00000000000
--- a/storage/innobase/include/usr0sess.ic
+++ /dev/null
@@ -1,7 +0,0 @@
-/******************************************************
-Sessions
-
-(c) 1996 Innobase Oy
-
-Created 6/25/1996 Heikki Tuuri
-*******************************************************/
diff --git a/storage/innobase/include/usr0types.h b/storage/innobase/include/usr0types.h
deleted file mode 100644
index 311471c1a0e..00000000000
--- a/storage/innobase/include/usr0types.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/******************************************************
-Users and sessions global types
-
-(c) 1996 Innobase Oy
-
-Created 6/25/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef usr0types_h
-#define usr0types_h
-
-typedef struct sess_struct sess_t;
-
-#endif
diff --git a/storage/innobase/include/ut0byte.h b/storage/innobase/include/ut0byte.h
deleted file mode 100644
index 6533f1166ca..00000000000
--- a/storage/innobase/include/ut0byte.h
+++ /dev/null
@@ -1,250 +0,0 @@
-/**********************************************************************
-Utilities for byte operations
-
-(c) 1994, 1995 Innobase Oy
-
-Created 1/20/1994 Heikki Tuuri
-***********************************************************************/
-
-#ifndef ut0byte_h
-#define ut0byte_h
-
-
-#include "univ.i"
-
-/* Type definition for a 64-bit unsigned integer, which works also
-in 32-bit machines. NOTE! Access the fields only with the accessor
-functions. This definition appears here only for the compiler to
-know the size of a dulint. */
-
-typedef struct dulint_struct dulint;
-struct dulint_struct{
- ulint high; /* most significant 32 bits */
- ulint low; /* least significant 32 bits */
-};
-
-/* Zero value for a dulint */
-extern dulint ut_dulint_zero;
-
-/* Maximum value for a dulint */
-extern dulint ut_dulint_max;
-
-/***********************************************************
-Creates a 64-bit dulint out of two ulints. */
-UNIV_INLINE
-dulint
-ut_dulint_create(
-/*=============*/
- /* out: created dulint */
- ulint high, /* in: high-order 32 bits */
- ulint low); /* in: low-order 32 bits */
-/***********************************************************
-Gets the high-order 32 bits of a dulint. */
-UNIV_INLINE
-ulint
-ut_dulint_get_high(
-/*===============*/
- /* out: 32 bits in ulint */
- dulint d); /* in: dulint */
-/***********************************************************
-Gets the low-order 32 bits of a dulint. */
-UNIV_INLINE
-ulint
-ut_dulint_get_low(
-/*==============*/
- /* out: 32 bits in ulint */
- dulint d); /* in: dulint */
-/***********************************************************
-Converts a dulint (a struct of 2 ulints) to ib_longlong, which is a 64-bit
-integer type. */
-UNIV_INLINE
-ib_longlong
-ut_conv_dulint_to_longlong(
-/*=======================*/
- /* out: value in ib_longlong type */
- dulint d); /* in: dulint */
-/***********************************************************
-Tests if a dulint is zero. */
-UNIV_INLINE
-ibool
-ut_dulint_is_zero(
-/*==============*/
- /* out: TRUE if zero */
- dulint a); /* in: dulint */
-/***********************************************************
-Compares two dulints. */
-UNIV_INLINE
-int
-ut_dulint_cmp(
-/*==========*/
- /* out: -1 if a < b, 0 if a == b,
- 1 if a > b */
- dulint a, /* in: dulint */
- dulint b); /* in: dulint */
-/***********************************************************
-Calculates the max of two dulints. */
-UNIV_INLINE
-dulint
-ut_dulint_get_max(
-/*==============*/
- /* out: max(a, b) */
- dulint a, /* in: dulint */
- dulint b); /* in: dulint */
-/***********************************************************
-Calculates the min of two dulints. */
-UNIV_INLINE
-dulint
-ut_dulint_get_min(
-/*==============*/
- /* out: min(a, b) */
- dulint a, /* in: dulint */
- dulint b); /* in: dulint */
-/***********************************************************
-Adds a ulint to a dulint. */
-UNIV_INLINE
-dulint
-ut_dulint_add(
-/*==========*/
- /* out: sum a + b */
- dulint a, /* in: dulint */
- ulint b); /* in: ulint */
-/***********************************************************
-Subtracts a ulint from a dulint. */
-UNIV_INLINE
-dulint
-ut_dulint_subtract(
-/*===============*/
- /* out: a - b */
- dulint a, /* in: dulint */
- ulint b); /* in: ulint, b <= a */
-/***********************************************************
-Subtracts a dulint from another. NOTE that the difference must be positive
-and smaller that 4G. */
-UNIV_INLINE
-ulint
-ut_dulint_minus(
-/*============*/
- /* out: a - b */
- dulint a, /* in: dulint; NOTE a must be >= b and at most
- 2 to power 32 - 1 greater */
- dulint b); /* in: dulint */
-/************************************************************
-Rounds a dulint downward to a multiple of a power of 2. */
-UNIV_INLINE
-dulint
-ut_dulint_align_down(
-/*=================*/
- /* out: rounded value */
- dulint n, /* in: number to be rounded */
- ulint align_no); /* in: align by this number which must be a
- power of 2 */
-/************************************************************
-Rounds a dulint upward to a multiple of a power of 2. */
-UNIV_INLINE
-dulint
-ut_dulint_align_up(
-/*===============*/
- /* out: rounded value */
- dulint n, /* in: number to be rounded */
- ulint align_no); /* in: align by this number which must be a
- power of 2 */
-/***********************************************************
-Increments a dulint variable by 1. */
-#define UT_DULINT_INC(D)\
-{\
- if ((D).low == 0xFFFFFFFFUL) {\
- (D).high = (D).high + 1;\
- (D).low = 0;\
- } else {\
- (D).low = (D).low + 1;\
- }\
-}
-/***********************************************************
-Tests if two dulints are equal. */
-#define UT_DULINT_EQ(D1, D2) (((D1).low == (D2).low)\
- && ((D1).high == (D2).high))
-/****************************************************************
-Sort function for dulint arrays. */
-void
-ut_dulint_sort(dulint* arr, dulint* aux_arr, ulint low, ulint high);
-/*===============================================================*/
-/************************************************************
-The following function calculates the value of an integer n rounded
-to the least product of align_no which is >= n. align_no has to be a
-power of 2. */
-UNIV_INLINE
-ulint
-ut_calc_align(
-/*==========*/
- /* out: rounded value */
- ulint n, /* in: number to be rounded */
- ulint align_no); /* in: align by this number */
-/************************************************************
-The following function calculates the value of an integer n rounded
-to the biggest product of align_no which is <= n. align_no has to be a
-power of 2. */
-UNIV_INLINE
-ulint
-ut_calc_align_down(
-/*===============*/
- /* out: rounded value */
- ulint n, /* in: number to be rounded */
- ulint align_no); /* in: align by this number */
-/*************************************************************
-The following function rounds up a pointer to the nearest aligned address. */
-UNIV_INLINE
-void*
-ut_align(
-/*=====*/
- /* out: aligned pointer */
- void* ptr, /* in: pointer */
- ulint align_no); /* in: align by this number */
-/*************************************************************
-The following function rounds down a pointer to the nearest
-aligned address. */
-UNIV_INLINE
-void*
-ut_align_down(
-/*==========*/
- /* out: aligned pointer */
- void* ptr, /* in: pointer */
- ulint align_no) /* in: align by this number */
- __attribute__((const));
-/*************************************************************
-The following function computes the offset of a pointer from the nearest
-aligned address. */
-UNIV_INLINE
-ulint
-ut_align_offset(
-/*============*/
- /* out: distance from aligned
- pointer */
- const void* ptr, /* in: pointer */
- ulint align_no) /* in: align by this number */
- __attribute__((const));
-/*********************************************************************
-Gets the nth bit of a ulint. */
-UNIV_INLINE
-ibool
-ut_bit_get_nth(
-/*===========*/
- /* out: TRUE if nth bit is 1; 0th bit is defined to
- be the least significant */
- ulint a, /* in: ulint */
- ulint n); /* in: nth bit requested */
-/*********************************************************************
-Sets the nth bit of a ulint. */
-UNIV_INLINE
-ulint
-ut_bit_set_nth(
-/*===========*/
- /* out: the ulint with the bit set as requested */
- ulint a, /* in: ulint */
- ulint n, /* in: nth bit requested */
- ibool val); /* in: value for the bit to set */
-
-#ifndef UNIV_NONINL
-#include "ut0byte.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/ut0byte.ic b/storage/innobase/include/ut0byte.ic
deleted file mode 100644
index 01b6c29d08f..00000000000
--- a/storage/innobase/include/ut0byte.ic
+++ /dev/null
@@ -1,397 +0,0 @@
-/******************************************************************
-Utilities for byte operations
-
-(c) 1994, 1995 Innobase Oy
-
-Created 5/30/1994 Heikki Tuuri
-*******************************************************************/
-
-/***********************************************************
-Creates a 64-bit dulint out of two ulints. */
-UNIV_INLINE
-dulint
-ut_dulint_create(
-/*=============*/
- /* out: created dulint */
- ulint high, /* in: high-order 32 bits */
- ulint low) /* in: low-order 32 bits */
-{
- dulint res;
-
- ut_ad(high <= 0xFFFFFFFF);
- ut_ad(low <= 0xFFFFFFFF);
-
- res.high = high;
- res.low = low;
-
- return(res);
-}
-
-/***********************************************************
-Gets the high-order 32 bits of a dulint. */
-UNIV_INLINE
-ulint
-ut_dulint_get_high(
-/*===============*/
- /* out: 32 bits in ulint */
- dulint d) /* in: dulint */
-{
- return(d.high);
-}
-
-/***********************************************************
-Gets the low-order 32 bits of a dulint. */
-UNIV_INLINE
-ulint
-ut_dulint_get_low(
-/*==============*/
- /* out: 32 bits in ulint */
- dulint d) /* in: dulint */
-{
- return(d.low);
-}
-
-/***********************************************************
-Converts a dulint (a struct of 2 ulints) to ib_longlong, which is a 64-bit
-integer type. */
-UNIV_INLINE
-ib_longlong
-ut_conv_dulint_to_longlong(
-/*=======================*/
- /* out: value in ib_longlong type */
- dulint d) /* in: dulint */
-{
- return((ib_longlong)d.low
- + (((ib_longlong)d.high) << 32));
-}
-
-/***********************************************************
-Tests if a dulint is zero. */
-UNIV_INLINE
-ibool
-ut_dulint_is_zero(
-/*==============*/
- /* out: TRUE if zero */
- dulint a) /* in: dulint */
-{
- if ((a.low == 0) && (a.high == 0)) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/***********************************************************
-Compares two dulints. */
-UNIV_INLINE
-int
-ut_dulint_cmp(
-/*==========*/
- /* out: -1 if a < b, 0 if a == b,
- 1 if a > b */
- dulint a, /* in: dulint */
- dulint b) /* in: dulint */
-{
- if (a.high > b.high) {
- return(1);
- } else if (a.high < b.high) {
- return(-1);
- } else if (a.low > b.low) {
- return(1);
- } else if (a.low < b.low) {
- return(-1);
- } else {
- return(0);
- }
-}
-
-/***********************************************************
-Calculates the max of two dulints. */
-UNIV_INLINE
-dulint
-ut_dulint_get_max(
-/*==============*/
- /* out: max(a, b) */
- dulint a, /* in: dulint */
- dulint b) /* in: dulint */
-{
- if (ut_dulint_cmp(a, b) > 0) {
-
- return(a);
- }
-
- return(b);
-}
-
-/***********************************************************
-Calculates the min of two dulints. */
-UNIV_INLINE
-dulint
-ut_dulint_get_min(
-/*==============*/
- /* out: min(a, b) */
- dulint a, /* in: dulint */
- dulint b) /* in: dulint */
-{
- if (ut_dulint_cmp(a, b) > 0) {
-
- return(b);
- }
-
- return(a);
-}
-
-/***********************************************************
-Adds a ulint to a dulint. */
-UNIV_INLINE
-dulint
-ut_dulint_add(
-/*==========*/
- /* out: sum a + b */
- dulint a, /* in: dulint */
- ulint b) /* in: ulint */
-{
- if (0xFFFFFFFFUL - b >= a.low) {
- a.low += b;
-
- return(a);
- }
-
- a.low = a.low - (0xFFFFFFFFUL - b) - 1;
-
- a.high++;
-
- return(a);
-}
-
-/***********************************************************
-Subtracts a ulint from a dulint. */
-UNIV_INLINE
-dulint
-ut_dulint_subtract(
-/*===============*/
- /* out: a - b */
- dulint a, /* in: dulint */
- ulint b) /* in: ulint, b <= a */
-{
- if (a.low >= b) {
- a.low -= b;
-
- return(a);
- }
-
- b -= a.low + 1;
-
- a.low = 0xFFFFFFFFUL - b;
-
- ut_ad(a.high > 0);
-
- a.high--;
-
- return(a);
-}
-
-/***********************************************************
-Subtracts a dulint from another. NOTE that the difference must be positive
-and smaller that 4G. */
-UNIV_INLINE
-ulint
-ut_dulint_minus(
-/*============*/
- /* out: a - b */
- dulint a, /* in: dulint; NOTE a must be >= b and at most
- 2 to power 32 - 1 greater */
- dulint b) /* in: dulint */
-{
- ulint diff;
-
- if (a.high == b.high) {
- ut_ad(a.low >= b.low);
-
- return(a.low - b.low);
- }
-
- ut_ad(a.high == b.high + 1);
-
- diff = (ulint)(0xFFFFFFFFUL - b.low);
- diff += 1 + a.low;
-
- ut_ad(diff > a.low);
-
- return(diff);
-}
-
-/************************************************************
-Rounds a dulint downward to a multiple of a power of 2. */
-UNIV_INLINE
-dulint
-ut_dulint_align_down(
-/*=================*/
- /* out: rounded value */
- dulint n, /* in: number to be rounded */
- ulint align_no) /* in: align by this number which must be a
- power of 2 */
-{
- ulint low, high;
-
- ut_ad(align_no > 0);
- ut_ad(((align_no - 1) & align_no) == 0);
-
- low = ut_dulint_get_low(n);
- high = ut_dulint_get_high(n);
-
- low = low & ~(align_no - 1);
-
- return(ut_dulint_create(high, low));
-}
-
-/************************************************************
-Rounds a dulint upward to a multiple of a power of 2. */
-UNIV_INLINE
-dulint
-ut_dulint_align_up(
-/*===============*/
- /* out: rounded value */
- dulint n, /* in: number to be rounded */
- ulint align_no) /* in: align by this number which must be a
- power of 2 */
-{
- return(ut_dulint_align_down(ut_dulint_add(n, align_no - 1), align_no));
-}
-
-/************************************************************
-The following function calculates the value of an integer n rounded
-to the least product of align_no which is >= n. align_no
-has to be a power of 2. */
-UNIV_INLINE
-ulint
-ut_calc_align(
-/*==========*/
- /* out: rounded value */
- ulint n, /* in: number to be rounded */
- ulint align_no) /* in: align by this number */
-{
- ut_ad(align_no > 0);
- ut_ad(((align_no - 1) & align_no) == 0);
-
- return((n + align_no - 1) & ~(align_no - 1));
-}
-
-/*************************************************************
-The following function rounds up a pointer to the nearest aligned address. */
-UNIV_INLINE
-void*
-ut_align(
-/*=====*/
- /* out: aligned pointer */
- void* ptr, /* in: pointer */
- ulint align_no) /* in: align by this number */
-{
- ut_ad(align_no > 0);
- ut_ad(((align_no - 1) & align_no) == 0);
- ut_ad(ptr);
-
- ut_ad(sizeof(void*) == sizeof(ulint));
-
- return((void*)((((ulint)ptr) + align_no - 1) & ~(align_no - 1)));
-}
-
-/************************************************************
-The following function calculates the value of an integer n rounded
-to the biggest product of align_no which is <= n. align_no has to be a
-power of 2. */
-UNIV_INLINE
-ulint
-ut_calc_align_down(
-/*===============*/
- /* out: rounded value */
- ulint n, /* in: number to be rounded */
- ulint align_no) /* in: align by this number */
-{
- ut_ad(align_no > 0);
- ut_ad(((align_no - 1) & align_no) == 0);
-
- return(n & ~(align_no - 1));
-}
-
-/*************************************************************
-The following function rounds down a pointer to the nearest
-aligned address. */
-UNIV_INLINE
-void*
-ut_align_down(
-/*==========*/
- /* out: aligned pointer */
- void* ptr, /* in: pointer */
- ulint align_no) /* in: align by this number */
-{
- ut_ad(align_no > 0);
- ut_ad(((align_no - 1) & align_no) == 0);
- ut_ad(ptr);
-
- ut_ad(sizeof(void*) == sizeof(ulint));
-
- return((void*)((((ulint)ptr)) & ~(align_no - 1)));
-}
-
-/*************************************************************
-The following function computes the offset of a pointer from the nearest
-aligned address. */
-UNIV_INLINE
-ulint
-ut_align_offset(
-/*============*/
- /* out: distance from
- aligned pointer */
- const void* ptr, /* in: pointer */
- ulint align_no) /* in: align by this number */
-{
- ut_ad(align_no > 0);
- ut_ad(((align_no - 1) & align_no) == 0);
- ut_ad(ptr);
-
- ut_ad(sizeof(void*) == sizeof(ulint));
-
- return(((ulint)ptr) & (align_no - 1));
-}
-
-/*********************************************************************
-Gets the nth bit of a ulint. */
-UNIV_INLINE
-ibool
-ut_bit_get_nth(
-/*===========*/
- /* out: TRUE if nth bit is 1; 0th bit is defined to
- be the least significant */
- ulint a, /* in: ulint */
- ulint n) /* in: nth bit requested */
-{
- ut_ad(n < 8 * sizeof(ulint));
-#if TRUE != 1
-# error "TRUE != 1"
-#endif
- return(1 & (a >> n));
-}
-
-/*********************************************************************
-Sets the nth bit of a ulint. */
-UNIV_INLINE
-ulint
-ut_bit_set_nth(
-/*===========*/
- /* out: the ulint with the bit set as requested */
- ulint a, /* in: ulint */
- ulint n, /* in: nth bit requested */
- ibool val) /* in: value for the bit to set */
-{
- ut_ad(n < 8 * sizeof(ulint));
-#if TRUE != 1
-# error "TRUE != 1"
-#endif
- if (val) {
- return(((ulint) 1 << n) | a);
- } else {
- return(~((ulint) 1 << n) & a);
- }
-}
diff --git a/storage/innobase/include/ut0dbg.h b/storage/innobase/include/ut0dbg.h
deleted file mode 100644
index a317f35f4be..00000000000
--- a/storage/innobase/include/ut0dbg.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*********************************************************************
-Debug utilities for Innobase
-
-(c) 1994, 1995 Innobase Oy
-
-Created 1/30/1994 Heikki Tuuri
-**********************************************************************/
-
-#ifndef ut0dbg_h
-#define ut0dbg_h
-
-#include "univ.i"
-#include <stdlib.h>
-#include "os0thread.h"
-
-#if defined(__GNUC__) && (__GNUC__ > 2)
-# define UT_DBG_FAIL(EXPR) UNIV_UNLIKELY(!((ulint)(EXPR)))
-#else
-extern ulint ut_dbg_zero; /* This is used to eliminate
- compiler warnings */
-# define UT_DBG_FAIL(EXPR) !((ulint)(EXPR) + ut_dbg_zero)
-#endif
-
-/*****************************************************************
-Report a failed assertion. */
-
-void
-ut_dbg_assertion_failed(
-/*====================*/
- const char* expr, /* in: the failed assertion */
- const char* file, /* in: source file containing the assertion */
- ulint line); /* in: line number of the assertion */
-
-#ifdef __NETWARE__
-/* Flag for ignoring further assertion failures.
-On NetWare, have a graceful exit rather than a segfault to avoid abends. */
-extern ibool panic_shutdown;
-/* Abort the execution. */
-void ut_dbg_panic(void);
-# define UT_DBG_PANIC ut_dbg_panic()
-/* Stop threads in ut_a(). */
-# define UT_DBG_STOP while (0) /* We do not do this on NetWare */
-#else /* __NETWARE__ */
-# if defined(__WIN__) || defined(__INTEL_COMPILER)
-# undef UT_DBG_USE_ABORT
-# elif defined(__GNUC__) && (__GNUC__ > 2)
-# define UT_DBG_USE_ABORT
-# endif
-
-# ifndef UT_DBG_USE_ABORT
-/* A null pointer that will be dereferenced to trigger a memory trap */
-extern ulint* ut_dbg_null_ptr;
-# endif
-
-# if defined(UNIV_SYNC_DEBUG) || !defined(UT_DBG_USE_ABORT)
-/* Flag for indicating that all threads should stop. This will be set
-by ut_dbg_assertion_failed(). */
-extern ibool ut_dbg_stop_threads;
-
-/*****************************************************************
-Stop a thread after assertion failure. */
-
-void
-ut_dbg_stop_thread(
-/*===============*/
- const char* file,
- ulint line);
-# endif
-
-# ifdef UT_DBG_USE_ABORT
-/* Abort the execution. */
-# define UT_DBG_PANIC abort()
-/* Stop threads (null operation) */
-# define UT_DBG_STOP while (0)
-# else /* UT_DBG_USE_ABORT */
-/* Abort the execution. */
-# define UT_DBG_PANIC \
- if (*(ut_dbg_null_ptr)) ut_dbg_null_ptr = NULL
-/* Stop threads in ut_a(). */
-# define UT_DBG_STOP do \
- if (UNIV_UNLIKELY(ut_dbg_stop_threads)) { \
- ut_dbg_stop_thread(__FILE__, (ulint) __LINE__); \
- } while (0)
-# endif /* UT_DBG_USE_ABORT */
-#endif /* __NETWARE__ */
-
-/* Abort execution if EXPR does not evaluate to nonzero. */
-#define ut_a(EXPR) do { \
- if (UT_DBG_FAIL(EXPR)) { \
- ut_dbg_assertion_failed(#EXPR, \
- __FILE__, (ulint) __LINE__); \
- UT_DBG_PANIC; \
- } \
- UT_DBG_STOP; \
-} while (0)
-
-/* Abort execution. */
-#define ut_error do { \
- ut_dbg_assertion_failed(0, __FILE__, (ulint) __LINE__); \
- UT_DBG_PANIC; \
-} while (0)
-
-#ifdef UNIV_DEBUG
-#define ut_ad(EXPR) ut_a(EXPR)
-#define ut_d(EXPR) do {EXPR;} while (0)
-#else
-#define ut_ad(EXPR)
-#define ut_d(EXPR)
-#endif
-
-#define UT_NOT_USED(A) A = A
-
-#endif
diff --git a/storage/innobase/include/ut0list.h b/storage/innobase/include/ut0list.h
deleted file mode 100644
index c35cf202600..00000000000
--- a/storage/innobase/include/ut0list.h
+++ /dev/null
@@ -1,148 +0,0 @@
-/***********************************************************************
-A double-linked list. This differs from the one in ut0lst.h in that in this
-one, each list node contains a pointer to the data, whereas the one in
-ut0lst.h uses a strategy where the list pointers are embedded in the data
-items themselves.
-
-Use this one when you need to store arbitrary data in the list where you
-can't embed the list pointers in the data, if a data item needs to be
-stored in multiple lists, etc.
-
-Note about the memory management: ib_list_t is a fixed-size struct whose
-allocation/deallocation is done through ib_list_create/ib_list_free, but the
-memory for the list nodes is allocated through a user-given memory heap,
-which can either be the same for all nodes or vary per node. Most users will
-probably want to create a memory heap to store the item-specific data, and
-pass in this same heap to the list node creation functions, thus
-automatically freeing the list node when the item's heap is freed.
-
-************************************************************************/
-
-
-#ifndef IB_LIST_H
-#define IB_LIST_H
-
-#include "mem0mem.h"
-
-typedef struct ib_list_struct ib_list_t;
-typedef struct ib_list_node_struct ib_list_node_t;
-typedef struct ib_list_helper_struct ib_list_helper_t;
-
-/********************************************************************
-Create a new list using mem_alloc. Lists created with this function must be
-freed with ib_list_free. */
-
-ib_list_t*
-ib_list_create(void);
-/*=================*/
- /* out: list */
-
-
-/********************************************************************
-Create a new list using the given heap. ib_list_free MUST NOT BE CALLED for
-lists created with this function. */
-
-ib_list_t*
-ib_list_create_heap(
-/*================*/
- /* out: list */
- mem_heap_t* heap); /* in: memory heap to use */
-
-/********************************************************************
-Free a list. */
-
-void
-ib_list_free(
-/*=========*/
- ib_list_t* list); /* in: list */
-
-/********************************************************************
-Add the data to the start of the list. */
-
-ib_list_node_t*
-ib_list_add_first(
-/*==============*/
- /* out: new list node*/
- ib_list_t* list, /* in: list */
- void* data, /* in: data */
- mem_heap_t* heap); /* in: memory heap to use */
-
-/********************************************************************
-Add the data to the end of the list. */
-
-ib_list_node_t*
-ib_list_add_last(
-/*=============*/
- /* out: new list node*/
- ib_list_t* list, /* in: list */
- void* data, /* in: data */
- mem_heap_t* heap); /* in: memory heap to use */
-
-/********************************************************************
-Add the data after the indicated node. */
-
-ib_list_node_t*
-ib_list_add_after(
-/*==============*/
- /* out: new list node*/
- ib_list_t* list, /* in: list */
- ib_list_node_t* prev_node, /* in: node preceding new node (can
- be NULL) */
- void* data, /* in: data */
- mem_heap_t* heap); /* in: memory heap to use */
-
-/********************************************************************
-Remove the node from the list. */
-
-void
-ib_list_remove(
-/*===========*/
- ib_list_t* list, /* in: list */
- ib_list_node_t* node); /* in: node to remove */
-
-/********************************************************************
-Get the first node in the list. */
-UNIV_INLINE
-ib_list_node_t*
-ib_list_get_first(
-/*==============*/
- /* out: first node, or NULL */
- ib_list_t* list); /* in: list */
-
-/********************************************************************
-Get the last node in the list. */
-UNIV_INLINE
-ib_list_node_t*
-ib_list_get_last(
-/*=============*/
- /* out: last node, or NULL */
- ib_list_t* list); /* in: list */
-
-/* List. */
-struct ib_list_struct {
- ib_list_node_t* first; /* first node */
- ib_list_node_t* last; /* last node */
- ibool is_heap_list; /* TRUE if this list was
- allocated through a heap */
-};
-
-/* A list node. */
-struct ib_list_node_struct {
- ib_list_node_t* prev; /* previous node */
- ib_list_node_t* next; /* next node */
- void* data; /* user data */
-};
-
-/* Quite often, the only additional piece of data you need is the per-item
-memory heap, so we have this generic struct available to use in those
-cases. */
-struct ib_list_helper_struct {
- mem_heap_t* heap; /* memory heap */
- void* data; /* user data */
-};
-
-#ifndef UNIV_NONINL
-#include "ut0list.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/ut0list.ic b/storage/innobase/include/ut0list.ic
deleted file mode 100644
index c2d3e4557f0..00000000000
--- a/storage/innobase/include/ut0list.ic
+++ /dev/null
@@ -1,23 +0,0 @@
-/********************************************************************
-Get the first node in the list. */
-UNIV_INLINE
-ib_list_node_t*
-ib_list_get_first(
-/*==============*/
- /* out: first node, or NULL */
- ib_list_t* list) /* in: list */
-{
- return(list->first);
-}
-
-/********************************************************************
-Get the last node in the list. */
-UNIV_INLINE
-ib_list_node_t*
-ib_list_get_last(
-/*=============*/
- /* out: last node, or NULL */
- ib_list_t* list) /* in: list */
-{
- return(list->last);
-}
diff --git a/storage/innobase/include/ut0lst.h b/storage/innobase/include/ut0lst.h
deleted file mode 100644
index ebe2803fe23..00000000000
--- a/storage/innobase/include/ut0lst.h
+++ /dev/null
@@ -1,227 +0,0 @@
-/**********************************************************************
-List utilities
-
-(c) 1995 Innobase Oy
-
-Created 9/10/1995 Heikki Tuuri
-***********************************************************************/
-
-#ifndef ut0lst_h
-#define ut0lst_h
-
-#include "univ.i"
-
-/* This module implements the two-way linear list which should be used
-if a list is used in the database. Note that a single struct may belong
-to two or more lists, provided that the list are given different names.
-An example of the usage of the lists can be found in fil0fil.c. */
-
-/***********************************************************************
-This macro expands to the unnamed type definition of a struct which acts
-as the two-way list base node. The base node contains pointers
-to both ends of the list and a count of nodes in the list (excluding
-the base node from the count). TYPE should be the list node type name. */
-
-#define UT_LIST_BASE_NODE_T(TYPE)\
-struct {\
- ulint count; /* count of nodes in list */\
- TYPE * start; /* pointer to list start, NULL if empty */\
- TYPE * end; /* pointer to list end, NULL if empty */\
-}\
-
-/***********************************************************************
-This macro expands to the unnamed type definition of a struct which
-should be embedded in the nodes of the list, the node type must be a struct.
-This struct contains the pointers to next and previous nodes in the list.
-The name of the field in the node struct should be the name given
-to the list. TYPE should be the list node type name. Example of usage:
-
-typedef struct LRU_node_struct LRU_node_t;
-struct LRU_node_struct {
- UT_LIST_NODE_T(LRU_node_t) LRU_list;
- ...
-}
-The example implements an LRU list of name LRU_list. Its nodes are of type
-LRU_node_t.
-*/
-
-#define UT_LIST_NODE_T(TYPE)\
-struct {\
- TYPE * prev; /* pointer to the previous node,\
- NULL if start of list */\
- TYPE * next; /* pointer to next node, NULL if end of list */\
-}\
-
-/***********************************************************************
-Initializes the base node of a two-way list. */
-
-#define UT_LIST_INIT(BASE)\
-{\
- (BASE).count = 0;\
- (BASE).start = NULL;\
- (BASE).end = NULL;\
-}\
-
-/***********************************************************************
-Adds the node as the first element in a two-way linked list.
-BASE has to be the base node (not a pointer to it). N has to be
-the pointer to the node to be added to the list. NAME is the list name. */
-
-#define UT_LIST_ADD_FIRST(NAME, BASE, N)\
-{\
- ut_ad(N);\
- ((BASE).count)++;\
- ((N)->NAME).next = (BASE).start;\
- ((N)->NAME).prev = NULL;\
- if ((BASE).start != NULL) {\
- ut_ad((BASE).start != (N));\
- (((BASE).start)->NAME).prev = (N);\
- }\
- (BASE).start = (N);\
- if ((BASE).end == NULL) {\
- (BASE).end = (N);\
- }\
-}\
-
-/***********************************************************************
-Adds the node as the last element in a two-way linked list.
-BASE has to be the base node (not a pointer to it). N has to be
-the pointer to the node to be added to the list. NAME is the list name. */
-
-#define UT_LIST_ADD_LAST(NAME, BASE, N)\
-{\
- ut_ad(N);\
- ((BASE).count)++;\
- ((N)->NAME).prev = (BASE).end;\
- ((N)->NAME).next = NULL;\
- if ((BASE).end != NULL) {\
- ut_ad((BASE).end != (N));\
- (((BASE).end)->NAME).next = (N);\
- }\
- (BASE).end = (N);\
- if ((BASE).start == NULL) {\
- (BASE).start = (N);\
- }\
-}\
-
-/***********************************************************************
-Inserts a NODE2 after NODE1 in a list.
-BASE has to be the base node (not a pointer to it). NAME is the list
-name, NODE1 and NODE2 are pointers to nodes. */
-
-#define UT_LIST_INSERT_AFTER(NAME, BASE, NODE1, NODE2)\
-{\
- ut_ad(NODE1);\
- ut_ad(NODE2);\
- ut_ad((NODE1) != (NODE2));\
- ((BASE).count)++;\
- ((NODE2)->NAME).prev = (NODE1);\
- ((NODE2)->NAME).next = ((NODE1)->NAME).next;\
- if (((NODE1)->NAME).next != NULL) {\
- ((((NODE1)->NAME).next)->NAME).prev = (NODE2);\
- }\
- ((NODE1)->NAME).next = (NODE2);\
- if ((BASE).end == (NODE1)) {\
- (BASE).end = (NODE2);\
- }\
-}\
-
-/* Invalidate the pointers in a list node. */
-#ifdef UNIV_LIST_DEBUG
-# define UT_LIST_REMOVE_CLEAR(NAME, N) \
-((N)->NAME.prev = (N)->NAME.next = (void*) -1)
-#else
-# define UT_LIST_REMOVE_CLEAR(NAME, N) while (0)
-#endif
-
-/***********************************************************************
-Removes a node from a two-way linked list. BASE has to be the base node
-(not a pointer to it). N has to be the pointer to the node to be removed
-from the list. NAME is the list name. */
-
-#define UT_LIST_REMOVE(NAME, BASE, N) \
-do { \
- ut_ad(N); \
- ut_a((BASE).count > 0); \
- ((BASE).count)--; \
- if (((N)->NAME).next != NULL) { \
- ((((N)->NAME).next)->NAME).prev = ((N)->NAME).prev; \
- } else { \
- (BASE).end = ((N)->NAME).prev; \
- } \
- if (((N)->NAME).prev != NULL) { \
- ((((N)->NAME).prev)->NAME).next = ((N)->NAME).next; \
- } else { \
- (BASE).start = ((N)->NAME).next; \
- } \
- UT_LIST_REMOVE_CLEAR(NAME, N); \
-} while (0)
-
-/************************************************************************
-Gets the next node in a two-way list. NAME is the name of the list
-and N is pointer to a node. */
-
-#define UT_LIST_GET_NEXT(NAME, N)\
- (((N)->NAME).next)
-
-/************************************************************************
-Gets the previous node in a two-way list. NAME is the name of the list
-and N is pointer to a node. */
-
-#define UT_LIST_GET_PREV(NAME, N)\
- (((N)->NAME).prev)
-
-/************************************************************************
-Alternative macro to get the number of nodes in a two-way list, i.e.,
-its length. BASE is the base node (not a pointer to it). */
-
-#define UT_LIST_GET_LEN(BASE)\
- (BASE).count
-
-/************************************************************************
-Gets the first node in a two-way list, or returns NULL,
-if the list is empty. BASE is the base node (not a pointer to it). */
-
-#define UT_LIST_GET_FIRST(BASE)\
- (BASE).start
-
-/************************************************************************
-Gets the last node in a two-way list, or returns NULL,
-if the list is empty. BASE is the base node (not a pointer to it). */
-
-#define UT_LIST_GET_LAST(BASE)\
- (BASE).end
-
-/************************************************************************
-Checks the consistency of a two-way list. NAME is the name of the list,
-TYPE is the node type, and BASE is the base node (not a pointer to it). */
-
-#define UT_LIST_VALIDATE(NAME, TYPE, BASE)\
-{\
- ulint ut_list_i_313;\
- TYPE * ut_list_node_313;\
-\
- ut_list_node_313 = (BASE).start;\
-\
- for (ut_list_i_313 = 0; ut_list_i_313 < (BASE).count;\
- ut_list_i_313++) {\
- ut_a(ut_list_node_313);\
- ut_list_node_313 = (ut_list_node_313->NAME).next;\
- }\
-\
- ut_a(ut_list_node_313 == NULL);\
-\
- ut_list_node_313 = (BASE).end;\
-\
- for (ut_list_i_313 = 0; ut_list_i_313 < (BASE).count;\
- ut_list_i_313++) {\
- ut_a(ut_list_node_313);\
- ut_list_node_313 = (ut_list_node_313->NAME).prev;\
- }\
-\
- ut_a(ut_list_node_313 == NULL);\
-}\
-
-
-#endif
-
diff --git a/storage/innobase/include/ut0mem.h b/storage/innobase/include/ut0mem.h
deleted file mode 100644
index e56895bc142..00000000000
--- a/storage/innobase/include/ut0mem.h
+++ /dev/null
@@ -1,212 +0,0 @@
-/***********************************************************************
-Memory primitives
-
-(c) 1994, 1995 Innobase Oy
-
-Created 5/30/1994 Heikki Tuuri
-************************************************************************/
-
-#ifndef ut0mem_h
-#define ut0mem_h
-
-#include "univ.i"
-#include <string.h>
-#include <stdlib.h>
-
-/* The total amount of memory currently allocated from the OS with malloc */
-extern ulint ut_total_allocated_memory;
-
-UNIV_INLINE
-void*
-ut_memcpy(void* dest, const void* sour, ulint n);
-
-UNIV_INLINE
-void*
-ut_memmove(void* dest, const void* sour, ulint n);
-
-UNIV_INLINE
-int
-ut_memcmp(const void* str1, const void* str2, ulint n);
-
-
-/**************************************************************************
-Allocates memory. Sets it also to zero if UNIV_SET_MEM_TO_ZERO is
-defined and set_to_zero is TRUE. */
-
-void*
-ut_malloc_low(
-/*==========*/
- /* out, own: allocated memory */
- ulint n, /* in: number of bytes to allocate */
- ibool set_to_zero, /* in: TRUE if allocated memory
- should be set to zero if
- UNIV_SET_MEM_TO_ZERO is defined */
- ibool assert_on_error); /* in: if TRUE, we crash mysqld if
- the memory cannot be allocated */
-/**************************************************************************
-Allocates memory. Sets it also to zero if UNIV_SET_MEM_TO_ZERO is
-defined. */
-
-void*
-ut_malloc(
-/*======*/
- /* out, own: allocated memory */
- ulint n); /* in: number of bytes to allocate */
-/**************************************************************************
-Tests if malloc of n bytes would succeed. ut_malloc() asserts if memory runs
-out. It cannot be used if we want to return an error message. Prints to
-stderr a message if fails. */
-
-ibool
-ut_test_malloc(
-/*===========*/
- /* out: TRUE if succeeded */
- ulint n); /* in: try to allocate this many bytes */
-/**************************************************************************
-Frees a memory block allocated with ut_malloc. */
-
-void
-ut_free(
-/*====*/
- void* ptr); /* in, own: memory block */
-/**************************************************************************
-Implements realloc. This is needed by /pars/lexyy.c. Otherwise, you should not
-use this function because the allocation functions in mem0mem.h are the
-recommended ones in InnoDB.
-
-man realloc in Linux, 2004:
-
- realloc() changes the size of the memory block pointed to
- by ptr to size bytes. The contents will be unchanged to
- the minimum of the old and new sizes; newly allocated mem­
- ory will be uninitialized. If ptr is NULL, the call is
- equivalent to malloc(size); if size is equal to zero, the
- call is equivalent to free(ptr). Unless ptr is NULL, it
- must have been returned by an earlier call to malloc(),
- calloc() or realloc().
-
-RETURN VALUE
- realloc() returns a pointer to the newly allocated memory,
- which is suitably aligned for any kind of variable and may
- be different from ptr, or NULL if the request fails. If
- size was equal to 0, either NULL or a pointer suitable to
- be passed to free() is returned. If realloc() fails the
- original block is left untouched - it is not freed or
- moved. */
-
-void*
-ut_realloc(
-/*=======*/
- /* out, own: pointer to new mem block or NULL */
- void* ptr, /* in: pointer to old block or NULL */
- ulint size); /* in: desired size */
-/**************************************************************************
-Frees in shutdown all allocated memory not freed yet. */
-
-void
-ut_free_all_mem(void);
-/*=================*/
-
-UNIV_INLINE
-char*
-ut_strcpy(char* dest, const char* sour);
-
-UNIV_INLINE
-ulint
-ut_strlen(const char* str);
-
-UNIV_INLINE
-int
-ut_strcmp(const void* str1, const void* str2);
-
-/**************************************************************************
-Copies up to size - 1 characters from the NUL-terminated string src to
-dst, NUL-terminating the result. Returns strlen(src), so truncation
-occurred if the return value >= size. */
-
-ulint
-ut_strlcpy(
-/*=======*/
- /* out: strlen(src) */
- char* dst, /* in: destination buffer */
- const char* src, /* in: source buffer */
- ulint size); /* in: size of destination buffer */
-
-/**************************************************************************
-Like ut_strlcpy, but if src doesn't fit in dst completely, copies the last
-(size - 1) bytes of src, not the first. */
-
-ulint
-ut_strlcpy_rev(
-/*===========*/
- /* out: strlen(src) */
- char* dst, /* in: destination buffer */
- const char* src, /* in: source buffer */
- ulint size); /* in: size of destination buffer */
-
-/**************************************************************************
-Compute strlen(ut_strcpyq(str, q)). */
-UNIV_INLINE
-ulint
-ut_strlenq(
-/*=======*/
- /* out: length of the string when quoted */
- const char* str, /* in: null-terminated string */
- char q); /* in: the quote character */
-
-/**************************************************************************
-Make a quoted copy of a NUL-terminated string. Leading and trailing
-quotes will not be included; only embedded quotes will be escaped.
-See also ut_strlenq() and ut_memcpyq(). */
-
-char*
-ut_strcpyq(
-/*=======*/
- /* out: pointer to end of dest */
- char* dest, /* in: output buffer */
- char q, /* in: the quote character */
- const char* src); /* in: null-terminated string */
-
-/**************************************************************************
-Make a quoted copy of a fixed-length string. Leading and trailing
-quotes will not be included; only embedded quotes will be escaped.
-See also ut_strlenq() and ut_strcpyq(). */
-
-char*
-ut_memcpyq(
-/*=======*/
- /* out: pointer to end of dest */
- char* dest, /* in: output buffer */
- char q, /* in: the quote character */
- const char* src, /* in: string to be quoted */
- ulint len); /* in: length of src */
-
-/**************************************************************************
-Return the number of times s2 occurs in s1. Overlapping instances of s2
-are only counted once. */
-
-ulint
-ut_strcount(
-/*========*/
- /* out: the number of times s2 occurs in s1 */
- const char* s1, /* in: string to search in */
- const char* s2); /* in: string to search for */
-
-/**************************************************************************
-Replace every occurrence of s1 in str with s2. Overlapping instances of s1
-are only replaced once. */
-
-char *
-ut_strreplace(
-/*==========*/
- /* out, own: modified string, must be
- freed with mem_free() */
- const char* str, /* in: string to operate on */
- const char* s1, /* in: string to replace */
- const char* s2); /* in: string to replace s1 with */
-
-#ifndef UNIV_NONINL
-#include "ut0mem.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/ut0mem.ic b/storage/innobase/include/ut0mem.ic
deleted file mode 100644
index e0253ebf618..00000000000
--- a/storage/innobase/include/ut0mem.ic
+++ /dev/null
@@ -1,70 +0,0 @@
-/***********************************************************************
-Memory primitives
-
-(c) 1994, 1995 Innobase Oy
-
-Created 5/30/1994 Heikki Tuuri
-************************************************************************/
-
-UNIV_INLINE
-void*
-ut_memcpy(void* dest, const void* sour, ulint n)
-{
- return(memcpy(dest, sour, n));
-}
-
-UNIV_INLINE
-void*
-ut_memmove(void* dest, const void* sour, ulint n)
-{
- return(memmove(dest, sour, n));
-}
-
-UNIV_INLINE
-int
-ut_memcmp(const void* str1, const void* str2, ulint n)
-{
- return(memcmp(str1, str2, n));
-}
-
-UNIV_INLINE
-char*
-ut_strcpy(char* dest, const char* sour)
-{
- return(strcpy(dest, sour));
-}
-
-UNIV_INLINE
-ulint
-ut_strlen(const char* str)
-{
- return(strlen(str));
-}
-
-UNIV_INLINE
-int
-ut_strcmp(const void* str1, const void* str2)
-{
- return(strcmp((const char*)str1, (const char*)str2));
-}
-
-/**************************************************************************
-Compute strlen(ut_strcpyq(str, q)). */
-UNIV_INLINE
-ulint
-ut_strlenq(
-/*=======*/
- /* out: length of the string when quoted */
- const char* str, /* in: null-terminated string */
- char q) /* in: the quote character */
-{
- ulint len;
-
- for (len = 0; *str; len++, str++) {
- if (*str == q) {
- len++;
- }
- }
-
- return(len);
-}
diff --git a/storage/innobase/include/ut0rnd.h b/storage/innobase/include/ut0rnd.h
deleted file mode 100644
index 3f3fce1075c..00000000000
--- a/storage/innobase/include/ut0rnd.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/**********************************************************************
-Random numbers and hashing
-
-(c) 1994, 1995 Innobase Oy
-
-Created 1/20/1994 Heikki Tuuri
-***********************************************************************/
-
-#ifndef ut0rnd_h
-#define ut0rnd_h
-
-#include "univ.i"
-
-#include "ut0byte.h"
-
-/* The 'character code' for end of field or string (used
-in folding records */
-#define UT_END_OF_FIELD 257
-
-/************************************************************
-This is used to set the random number seed. */
-UNIV_INLINE
-void
-ut_rnd_set_seed(
-/*============*/
- ulint seed); /* in: seed */
-/************************************************************
-The following function generates a series of 'random' ulint integers. */
-UNIV_INLINE
-ulint
-ut_rnd_gen_next_ulint(
-/*==================*/
- /* out: the next 'random' number */
- ulint rnd); /* in: the previous random number value */
-/*************************************************************
-The following function generates 'random' ulint integers which
-enumerate the value space (let there be N of them) of ulint integers
-in a pseudo-random fashion. Note that the same integer is repeated
-always after N calls to the generator. */
-UNIV_INLINE
-ulint
-ut_rnd_gen_ulint(void);
-/*==================*/
- /* out: the 'random' number */
-/************************************************************
-Generates a random integer from a given interval. */
-UNIV_INLINE
-ulint
-ut_rnd_interval(
-/*============*/
- /* out: the 'random' number */
- ulint low, /* in: low limit; can generate also this value */
- ulint high); /* in: high limit; can generate also this value */
-/*************************************************************
-Generates a random iboolean value. */
-UNIV_INLINE
-ibool
-ut_rnd_gen_ibool(void);
-/*=================*/
- /* out: the random value */
-/***********************************************************
-The following function generates a hash value for a ulint integer
-to a hash table of size table_size, which should be a prime or some
-random number to work reliably. */
-UNIV_INLINE
-ulint
-ut_hash_ulint(
-/*==========*/
- /* out: hash value */
- ulint key, /* in: value to be hashed */
- ulint table_size); /* in: hash table size */
-/*****************************************************************
-Folds a pair of ulints. */
-UNIV_INLINE
-ulint
-ut_fold_ulint_pair(
-/*===============*/
- /* out: folded value */
- ulint n1, /* in: ulint */
- ulint n2); /* in: ulint */
-/*****************************************************************
-Folds a dulint. */
-UNIV_INLINE
-ulint
-ut_fold_dulint(
-/*===========*/
- /* out: folded value */
- dulint d); /* in: dulint */
-/*****************************************************************
-Folds a character string ending in the null character. */
-UNIV_INLINE
-ulint
-ut_fold_string(
-/*===========*/
- /* out: folded value */
- const char* str); /* in: null-terminated string */
-/*****************************************************************
-Folds a binary string. */
-UNIV_INLINE
-ulint
-ut_fold_binary(
-/*===========*/
- /* out: folded value */
- const byte* str, /* in: string of bytes */
- ulint len); /* in: length */
-/***************************************************************
-Looks for a prime number slightly greater than the given argument.
-The prime is chosen so that it is not near any power of 2. */
-
-ulint
-ut_find_prime(
-/*==========*/
- /* out: prime */
- ulint n); /* in: positive number > 100 */
-
-
-#ifndef UNIV_NONINL
-#include "ut0rnd.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/ut0rnd.ic b/storage/innobase/include/ut0rnd.ic
deleted file mode 100644
index 625c378489a..00000000000
--- a/storage/innobase/include/ut0rnd.ic
+++ /dev/null
@@ -1,221 +0,0 @@
-/******************************************************************
-Random numbers and hashing
-
-(c) 1994, 1995 Innobase Oy
-
-Created 5/30/1994 Heikki Tuuri
-*******************************************************************/
-
-#define UT_HASH_RANDOM_MASK 1463735687
-#define UT_HASH_RANDOM_MASK2 1653893711
-#define UT_RND1 151117737
-#define UT_RND2 119785373
-#define UT_RND3 85689495
-#define UT_RND4 76595339
-#define UT_SUM_RND2 98781234
-#define UT_SUM_RND3 126792457
-#define UT_SUM_RND4 63498502
-#define UT_XOR_RND1 187678878
-#define UT_XOR_RND2 143537923
-
-extern ulint ut_rnd_ulint_counter;
-
-/************************************************************
-This is used to set the random number seed. */
-UNIV_INLINE
-void
-ut_rnd_set_seed(
-/*============*/
- ulint seed) /* in: seed */
-{
- ut_rnd_ulint_counter = seed;
-}
-
-/************************************************************
-The following function generates a series of 'random' ulint integers. */
-UNIV_INLINE
-ulint
-ut_rnd_gen_next_ulint(
-/*==================*/
- /* out: the next 'random' number */
- ulint rnd) /* in: the previous random number value */
-{
- ulint n_bits;
-
- n_bits = 8 * sizeof(ulint);
-
- rnd = UT_RND2 * rnd + UT_SUM_RND3;
- rnd = UT_XOR_RND1 ^ rnd;
- rnd = (rnd << 20) + (rnd >> (n_bits - 20));
- rnd = UT_RND3 * rnd + UT_SUM_RND4;
- rnd = UT_XOR_RND2 ^ rnd;
- rnd = (rnd << 20) + (rnd >> (n_bits - 20));
- rnd = UT_RND1 * rnd + UT_SUM_RND2;
-
- return(rnd);
-}
-
-/************************************************************
-The following function generates 'random' ulint integers which
-enumerate the value space of ulint integers in a pseudo random
-fashion. Note that the same integer is repeated always after
-2 to power 32 calls to the generator (if ulint is 32-bit). */
-UNIV_INLINE
-ulint
-ut_rnd_gen_ulint(void)
-/*==================*/
- /* out: the 'random' number */
-{
- ulint rnd;
- ulint n_bits;
-
- n_bits = 8 * sizeof(ulint);
-
- ut_rnd_ulint_counter = UT_RND1 * ut_rnd_ulint_counter + UT_RND2;
-
- rnd = ut_rnd_gen_next_ulint(ut_rnd_ulint_counter);
-
- return(rnd);
-}
-
-/************************************************************
-Generates a random integer from a given interval. */
-UNIV_INLINE
-ulint
-ut_rnd_interval(
-/*============*/
- /* out: the 'random' number */
- ulint low, /* in: low limit; can generate also this value */
- ulint high) /* in: high limit; can generate also this value */
-{
- ulint rnd;
-
- ut_ad(high >= low);
-
- if (low == high) {
-
- return(low);
- }
-
- rnd = ut_rnd_gen_ulint();
-
- return(low + (rnd % (high - low + 1)));
-}
-
-/*************************************************************
-Generates a random iboolean value. */
-UNIV_INLINE
-ibool
-ut_rnd_gen_ibool(void)
-/*=================*/
- /* out: the random value */
-{
- ulint x;
-
- x = ut_rnd_gen_ulint();
-
- if (((x >> 20) + (x >> 15)) & 1) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/***********************************************************
-The following function generates a hash value for a ulint integer
-to a hash table of size table_size, which should be a prime
-or some random number for the hash table to work reliably. */
-UNIV_INLINE
-ulint
-ut_hash_ulint(
-/*==========*/
- /* out: hash value */
- ulint key, /* in: value to be hashed */
- ulint table_size) /* in: hash table size */
-{
- key = key ^ UT_HASH_RANDOM_MASK2;
-
- return(key % table_size);
-}
-
-/*****************************************************************
-Folds a pair of ulints. */
-UNIV_INLINE
-ulint
-ut_fold_ulint_pair(
-/*===============*/
- /* out: folded value */
- ulint n1, /* in: ulint */
- ulint n2) /* in: ulint */
-{
- return(((((n1 ^ n2 ^ UT_HASH_RANDOM_MASK2) << 8) + n1)
- ^ UT_HASH_RANDOM_MASK) + n2);
-}
-
-/*****************************************************************
-Folds a dulint. */
-UNIV_INLINE
-ulint
-ut_fold_dulint(
-/*===========*/
- /* out: folded value */
- dulint d) /* in: dulint */
-{
- return(ut_fold_ulint_pair(ut_dulint_get_low(d),
- ut_dulint_get_high(d)));
-}
-
-/*****************************************************************
-Folds a character string ending in the null character. */
-UNIV_INLINE
-ulint
-ut_fold_string(
-/*===========*/
- /* out: folded value */
- const char* str) /* in: null-terminated string */
-{
-#ifdef UNIV_DEBUG
- ulint i = 0;
-#endif
- ulint fold = 0;
-
- ut_ad(str);
-
- while (*str != '\0') {
-
-#ifdef UNIV_DEBUG
- i++;
- ut_a(i < 100);
-#endif
-
- fold = ut_fold_ulint_pair(fold, (ulint)(*str));
- str++;
- }
-
- return(fold);
-}
-
-/*****************************************************************
-Folds a binary string. */
-UNIV_INLINE
-ulint
-ut_fold_binary(
-/*===========*/
- /* out: folded value */
- const byte* str, /* in: string of bytes */
- ulint len) /* in: length */
-{
- const byte* str_end = str + len;
- ulint fold = 0;
-
- ut_ad(str);
-
- while (str < str_end) {
- fold = ut_fold_ulint_pair(fold, (ulint)(*str));
-
- str++;
- }
-
- return(fold);
-}
diff --git a/storage/innobase/include/ut0sort.h b/storage/innobase/include/ut0sort.h
deleted file mode 100644
index 87d30dee6f2..00000000000
--- a/storage/innobase/include/ut0sort.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/**********************************************************************
-Sort utility
-
-(c) 1995 Innobase Oy
-
-Created 11/9/1995 Heikki Tuuri
-***********************************************************************/
-
-#ifndef ut0sort_h
-#define ut0sort_h
-
-#include "univ.i"
-
-/* This module gives a macro definition of the body of
-a standard sort function for an array of elements of any
-type. The comparison function is given as a parameter to
-the macro. The sort algorithm is mergesort which has logarithmic
-worst case.
-*/
-
-/***********************************************************************
-This macro expands to the body of a standard sort function.
-The sort function uses mergesort and must be defined separately
-for each type of array.
-Also the comparison function has to be defined individually
-for each array cell type. SORT_FUN is the sort function name.
-The function takes the array to be sorted (ARR),
-the array of auxiliary space (AUX_ARR) of same size,
-and the low (LOW), inclusive, and high (HIGH), noninclusive,
-limits for the sort interval as arguments.
-CMP_FUN is the comparison function name. It takes as arguments
-two elements from the array and returns 1, if the first is bigger,
-0 if equal, and -1 if the second bigger. For an eaxmaple of use
-see test program in tsut.c. */
-
-#define UT_SORT_FUNCTION_BODY(SORT_FUN, ARR, AUX_ARR, LOW, HIGH, CMP_FUN)\
-{\
- ulint ut_sort_mid77;\
- ulint ut_sort_i77;\
- ulint ut_sort_low77;\
- ulint ut_sort_high77;\
-\
- ut_ad((LOW) < (HIGH));\
- ut_ad(ARR);\
- ut_ad(AUX_ARR);\
-\
- if ((LOW) == (HIGH) - 1) {\
- return;\
- } else if ((LOW) == (HIGH) - 2) {\
- if (CMP_FUN((ARR)[LOW], (ARR)[(HIGH) - 1]) > 0) {\
- (AUX_ARR)[LOW] = (ARR)[LOW];\
- (ARR)[LOW] = (ARR)[(HIGH) - 1];\
- (ARR)[(HIGH) - 1] = (AUX_ARR)[LOW];\
- }\
- return;\
- }\
-\
- ut_sort_mid77 = ((LOW) + (HIGH)) / 2;\
-\
- SORT_FUN((ARR), (AUX_ARR), (LOW), ut_sort_mid77);\
- SORT_FUN((ARR), (AUX_ARR), ut_sort_mid77, (HIGH));\
-\
- ut_sort_low77 = (LOW);\
- ut_sort_high77 = ut_sort_mid77;\
-\
- for (ut_sort_i77 = (LOW); ut_sort_i77 < (HIGH); ut_sort_i77++) {\
-\
- if (ut_sort_low77 >= ut_sort_mid77) {\
- (AUX_ARR)[ut_sort_i77] = (ARR)[ut_sort_high77];\
- ut_sort_high77++;\
- } else if (ut_sort_high77 >= (HIGH)) {\
- (AUX_ARR)[ut_sort_i77] = (ARR)[ut_sort_low77];\
- ut_sort_low77++;\
- } else if (CMP_FUN((ARR)[ut_sort_low77],\
- (ARR)[ut_sort_high77]) > 0) {\
- (AUX_ARR)[ut_sort_i77] = (ARR)[ut_sort_high77];\
- ut_sort_high77++;\
- } else {\
- (AUX_ARR)[ut_sort_i77] = (ARR)[ut_sort_low77];\
- ut_sort_low77++;\
- }\
- }\
-\
- for (ut_sort_i77 = (LOW); ut_sort_i77 < (HIGH); ut_sort_i77++) {\
- (ARR)[ut_sort_i77] = (AUX_ARR)[ut_sort_i77];\
- }\
-}\
-
-
-#endif
-
diff --git a/storage/innobase/include/ut0ut.h b/storage/innobase/include/ut0ut.h
deleted file mode 100644
index 95d7ba017f1..00000000000
--- a/storage/innobase/include/ut0ut.h
+++ /dev/null
@@ -1,323 +0,0 @@
-/**********************************************************************
-Various utilities
-
-(c) 1994, 1995 Innobase Oy
-
-Created 1/20/1994 Heikki Tuuri
-***********************************************************************/
-
-#ifndef ut0ut_h
-#define ut0ut_h
-
-#include "univ.i"
-#include <time.h>
-#ifndef MYSQL_SERVER
-#include <ctype.h>
-#endif
-
-typedef time_t ib_time_t;
-
-#ifdef HAVE_PAUSE_INSTRUCTION
-#define PAUSE_INSTRUCTION() {__asm__ __volatile__ ("pause");}
-#else
-#ifdef HAVE_FAKE_PAUSE_INSTRUCTION
-#define PAUSE_INSTRUCTION() {__asm__ __volatile__ ("rep; nop");}
-#else
-#ifdef UNIV_SYNC_ATOMIC
-#define PAUSE_INSTRUCTION() \
- { \
- volatile lint volatile_var; \
- os_compare_and_swap(&volatile_var, 0, 1); \
- }
-#else
-#define PAUSE_INSTRUCTION()
-#endif
-#endif
-#endif
-
-/************************************************************
-Gets the high 32 bits in a ulint. That is makes a shift >> 32,
-but since there seem to be compiler bugs in both gcc and Visual C++,
-we do this by a special conversion. */
-
-ulint
-ut_get_high32(
-/*==========*/
- /* out: a >> 32 */
- ulint a); /* in: ulint */
-/**********************************************************
-Calculates the minimum of two ulints. */
-UNIV_INLINE
-ulint
-ut_min(
-/*===*/
- /* out: minimum */
- ulint n1, /* in: first number */
- ulint n2); /* in: second number */
-/**********************************************************
-Calculates the maximum of two ulints. */
-UNIV_INLINE
-ulint
-ut_max(
-/*===*/
- /* out: maximum */
- ulint n1, /* in: first number */
- ulint n2); /* in: second number */
-/********************************************************************
-Calculates minimum of two ulint-pairs. */
-UNIV_INLINE
-void
-ut_pair_min(
-/*========*/
- ulint* a, /* out: more significant part of minimum */
- ulint* b, /* out: less significant part of minimum */
- ulint a1, /* in: more significant part of first pair */
- ulint b1, /* in: less significant part of first pair */
- ulint a2, /* in: more significant part of second pair */
- ulint b2); /* in: less significant part of second pair */
-/**********************************************************
-Compares two ulints. */
-UNIV_INLINE
-int
-ut_ulint_cmp(
-/*=========*/
- /* out: 1 if a > b, 0 if a == b, -1 if a < b */
- ulint a, /* in: ulint */
- ulint b); /* in: ulint */
-/***********************************************************
-Compares two pairs of ulints. */
-UNIV_INLINE
-int
-ut_pair_cmp(
-/*========*/
- /* out: -1 if a < b, 0 if a == b,
- 1 if a > b */
- ulint a1, /* in: more significant part of first pair */
- ulint a2, /* in: less significant part of first pair */
- ulint b1, /* in: more significant part of second pair */
- ulint b2); /* in: less significant part of second pair */
-/*****************************************************************
-Calculates fast the remainder when divided by a power of two. */
-UNIV_INLINE
-ulint
-ut_2pow_remainder(
-/*==============*/ /* out: remainder */
- ulint n, /* in: number to be divided */
- ulint m); /* in: divisor; power of 2 */
-/*****************************************************************
-Calculates fast value rounded to a multiple of a power of 2. */
-UNIV_INLINE
-ulint
-ut_2pow_round(
-/*==========*/ /* out: value of n rounded down to nearest
- multiple of m */
- ulint n, /* in: number to be rounded */
- ulint m); /* in: divisor; power of 2 */
-/*****************************************************************
-Calculates fast the 2-logarithm of a number, rounded upward to an
-integer. */
-UNIV_INLINE
-ulint
-ut_2_log(
-/*=====*/
- /* out: logarithm in the base 2, rounded upward */
- ulint n); /* in: number */
-/*****************************************************************
-Calculates 2 to power n. */
-UNIV_INLINE
-ulint
-ut_2_exp(
-/*=====*/
- /* out: 2 to power n */
- ulint n); /* in: number */
-/*****************************************************************
-Calculates fast the number rounded up to the nearest power of 2. */
-
-ulint
-ut_2_power_up(
-/*==========*/
- /* out: first power of 2 which is >= n */
- ulint n) /* in: number != 0 */
- __attribute__((const));
-
-/* Determine how many bytes (groups of 8 bits) are needed to
-store the given number of bits. */
-#define UT_BITS_IN_BYTES(b) (((b) + 7) / 8)
-
-/****************************************************************
-Sort function for ulint arrays. */
-
-void
-ut_ulint_sort(ulint* arr, ulint* aux_arr, ulint low, ulint high);
-/*============================================================*/
-/************************************************************
-The following function returns elapsed CPU time in milliseconds. */
-
-ulint
-ut_clock(void);
-/**************************************************************
-Returns system time. We do not specify the format of the time returned:
-the only way to manipulate it is to use the function ut_difftime. */
-
-ib_time_t
-ut_time(void);
-/*=========*/
-/**************************************************************
-Returns system time.
-Upon successful completion, the value 0 is returned; otherwise the
-value -1 is returned and the global variable errno is set to indicate the
-error. */
-
-int
-ut_usectime(
-/*========*/
- /* out: 0 on success, -1 otherwise */
- ulint* sec, /* out: seconds since the Epoch */
- ulint* ms); /* out: microseconds since the Epoch+*sec */
-
-/**************************************************************
-Returns diff in microseconds (end_sec,end_ms) - (start_sec,start_ms). */
-
-ib_longlong
-ut_usecdiff(
-/*========*/
- ulint end_sec, /* in: seconds since the Epoch */
- ulint end_ms, /* in: microseconds since the Epoch+*sec1 */
- ulint start_sec, /* in: seconds since the Epoch */
- ulint start_ms); /* in: microseconds since the Epoch+*sec2 */
-
-/**************************************************************
-Returns the difference of two times in seconds. */
-
-double
-ut_difftime(
-/*========*/
- /* out: time2 - time1 expressed in seconds */
- ib_time_t time2, /* in: time */
- ib_time_t time1); /* in: time */
-/**************************************************************
-Prints a timestamp to a file. */
-
-void
-ut_print_timestamp(
-/*===============*/
- FILE* file); /* in: file where to print */
-/**************************************************************
-Sprintfs a timestamp to a buffer, 13..14 chars plus terminating NUL. */
-
-void
-ut_sprintf_timestamp(
-/*=================*/
- char* buf); /* in: buffer where to sprintf */
-/**************************************************************
-Sprintfs a timestamp to a buffer with no spaces and with ':' characters
-replaced by '_'. */
-
-void
-ut_sprintf_timestamp_without_extra_chars(
-/*=====================================*/
- char* buf); /* in: buffer where to sprintf */
-/**************************************************************
-Returns current year, month, day. */
-
-void
-ut_get_year_month_day(
-/*==================*/
- ulint* year, /* out: current year */
- ulint* month, /* out: month */
- ulint* day); /* out: day */
-/*****************************************************************
-Runs an idle loop on CPU. The argument gives the desired delay
-in microseconds on 100 MHz Pentium + Visual C++. */
-
-ulint
-ut_delay(
-/*=====*/
- /* out: dummy value */
- ulint delay); /* in: delay in microseconds on 100 MHz Pentium */
-/*****************************************************************
-Prints the contents of a memory buffer in hex and ascii. */
-
-void
-ut_print_buf(
-/*=========*/
- FILE* file, /* in: file where to print */
- const void* buf, /* in: memory buffer */
- ulint len); /* in: length of the buffer */
-
-/**************************************************************************
-Outputs a NUL-terminated file name, quoted with apostrophes. */
-
-void
-ut_print_filename(
-/*==============*/
- FILE* f, /* in: output stream */
- const char* name); /* in: name to print */
-
-/* Forward declaration of transaction handle */
-struct trx_struct;
-
-/**************************************************************************
-Outputs a fixed-length string, quoted as an SQL identifier.
-If the string contains a slash '/', the string will be
-output as two identifiers separated by a period (.),
-as in SQL database_name.identifier. */
-
-void
-ut_print_name(
-/*==========*/
- FILE* f, /* in: output stream */
- struct trx_struct*trx, /* in: transaction */
- ibool table_id,/* in: TRUE=print a table name,
- FALSE=print other identifier */
- const char* name); /* in: name to print */
-
-/**************************************************************************
-Outputs a fixed-length string, quoted as an SQL identifier.
-If the string contains a slash '/', the string will be
-output as two identifiers separated by a period (.),
-as in SQL database_name.identifier. */
-
-void
-ut_print_namel(
-/*===========*/
- FILE* f, /* in: output stream */
- struct trx_struct*trx, /* in: transaction (NULL=no quotes) */
- ibool table_id,/* in: TRUE=print a table name,
- FALSE=print other identifier */
- const char* name, /* in: name to print */
- ulint namelen);/* in: length of name */
-
-/**************************************************************************
-Catenate files. */
-
-void
-ut_copy_file(
-/*=========*/
- FILE* dest, /* in: output file */
- FILE* src); /* in: input file to be appended to output */
-
-/**************************************************************************
-snprintf(). */
-
-#ifdef __WIN__
-int
-ut_snprintf(
- /* out: number of characters that would
- have been printed if the size were
- unlimited, not including the terminating
- '\0'. */
- char* str, /* out: string */
- size_t size, /* in: str size */
- const char* fmt, /* in: format */
- ...); /* in: format values */
-#else
-#define ut_snprintf snprintf
-#endif /* __WIN__ */
-
-#ifndef UNIV_NONINL
-#include "ut0ut.ic"
-#endif
-
-#endif
-
diff --git a/storage/innobase/include/ut0ut.ic b/storage/innobase/include/ut0ut.ic
deleted file mode 100644
index 412717a094e..00000000000
--- a/storage/innobase/include/ut0ut.ic
+++ /dev/null
@@ -1,174 +0,0 @@
-/******************************************************************
-Various utilities
-
-(c) 1994, 1995 Innobase Oy
-
-Created 5/30/1994 Heikki Tuuri
-*******************************************************************/
-
-/**********************************************************
-Calculates the minimum of two ulints. */
-UNIV_INLINE
-ulint
-ut_min(
-/*===*/
- /* out: minimum */
- ulint n1, /* in: first number */
- ulint n2) /* in: second number */
-{
- return((n1 <= n2) ? n1 : n2);
-}
-
-/**********************************************************
-Calculates the maximum of two ulints. */
-UNIV_INLINE
-ulint
-ut_max(
-/*===*/
- /* out: maximum */
- ulint n1, /* in: first number */
- ulint n2) /* in: second number */
-{
- return((n1 <= n2) ? n2 : n1);
-}
-
-/********************************************************************
-Calculates minimum of two ulint-pairs. */
-UNIV_INLINE
-void
-ut_pair_min(
-/*========*/
- ulint* a, /* out: more significant part of minimum */
- ulint* b, /* out: less significant part of minimum */
- ulint a1, /* in: more significant part of first pair */
- ulint b1, /* in: less significant part of first pair */
- ulint a2, /* in: more significant part of second pair */
- ulint b2) /* in: less significant part of second pair */
-{
- if (a1 == a2) {
- *a = a1;
- *b = ut_min(b1, b2);
- } else if (a1 < a2) {
- *a = a1;
- *b = b1;
- } else {
- *a = a2;
- *b = b2;
- }
-}
-
-/**********************************************************
-Compares two ulints. */
-UNIV_INLINE
-int
-ut_ulint_cmp(
-/*=========*/
- /* out: 1 if a > b, 0 if a == b, -1 if a < b */
- ulint a, /* in: ulint */
- ulint b) /* in: ulint */
-{
- if (a < b) {
- return(-1);
- } else if (a == b) {
- return(0);
- } else {
- return(1);
- }
-}
-
-/***********************************************************
-Compares two pairs of ulints. */
-UNIV_INLINE
-int
-ut_pair_cmp(
-/*========*/
- /* out: -1 if a < b, 0 if a == b, 1 if a > b */
- ulint a1, /* in: more significant part of first pair */
- ulint a2, /* in: less significant part of first pair */
- ulint b1, /* in: more significant part of second pair */
- ulint b2) /* in: less significant part of second pair */
-{
- if (a1 > b1) {
- return(1);
- } else if (a1 < b1) {
- return(-1);
- } else if (a2 > b2) {
- return(1);
- } else if (a2 < b2) {
- return(-1);
- } else {
- return(0);
- }
-}
-
-/*****************************************************************
-Calculates fast the remainder when divided by a power of two. */
-UNIV_INLINE
-ulint
-ut_2pow_remainder(
-/*==============*/ /* out: remainder */
- ulint n, /* in: number to be divided */
- ulint m) /* in: divisor; power of 2 */
-{
- ut_ad(0x80000000UL % m == 0);
-
- return(n & (m - 1));
-}
-
-/*****************************************************************
-Calculates fast a value rounded to a multiple of a power of 2. */
-UNIV_INLINE
-ulint
-ut_2pow_round(
-/*==========*/ /* out: value of n rounded down to nearest
- multiple of m */
- ulint n, /* in: number to be rounded */
- ulint m) /* in: divisor; power of 2 */
-{
- ut_ad(0x80000000UL % m == 0);
-
- return(n & ~(m - 1));
-}
-
-/*****************************************************************
-Calculates fast the 2-logarithm of a number, rounded upward to an
-integer. */
-UNIV_INLINE
-ulint
-ut_2_log(
-/*=====*/
- /* out: logarithm in the base 2, rounded upward */
- ulint n) /* in: number != 0 */
-{
- ulint res;
-
- res = 0;
-
- ut_ad(n > 0);
-
- n = n - 1;
-
- for (;;) {
- n = n / 2;
-
- if (n == 0) {
- break;
- }
-
- res++;
- }
-
- return(res + 1);
-}
-
-/*****************************************************************
-Calculates 2 to power n. */
-UNIV_INLINE
-ulint
-ut_2_exp(
-/*=====*/
- /* out: 2 to power n */
- ulint n) /* in: number */
-{
- return((ulint) 1 << n);
-}
diff --git a/storage/innobase/include/ut0vec.h b/storage/innobase/include/ut0vec.h
deleted file mode 100644
index e0cc4dfb009..00000000000
--- a/storage/innobase/include/ut0vec.h
+++ /dev/null
@@ -1,73 +0,0 @@
-#ifndef IB_VECTOR_H
-#define IB_VECTOR_H
-
-#include "univ.i"
-#include "mem0mem.h"
-
-typedef struct ib_vector_struct ib_vector_t;
-
-/* An automatically resizing vector datatype with the following properties:
-
- -Contains void* items.
-
- -The items are owned by the caller.
-
- -All memory allocation is done through a heap owned by the caller, who is
- responsible for freeing it when done with the vector.
-
- -When the vector is resized, the old memory area is left allocated since it
- uses the same heap as the new memory area, so this is best used for
- relatively small or short-lived uses.
-*/
-
-/********************************************************************
-Create a new vector with the given initial size. */
-
-ib_vector_t*
-ib_vector_create(
-/*=============*/
- /* out: vector */
- mem_heap_t* heap, /* in: heap */
- ulint size); /* in: initial size */
-
-/********************************************************************
-Push a new element to the vector, increasing its size if necessary. */
-
-void
-ib_vector_push(
-/*===========*/
- ib_vector_t* vec, /* in: vector */
- void* elem); /* in: data element */
-
-/********************************************************************
-Get the number of elements in the vector. */
-UNIV_INLINE
-ulint
-ib_vector_size(
-/*===========*/
- /* out: number of elements in vector */
- ib_vector_t* vec); /* in: vector */
-
-/********************************************************************
-Get the n'th element. */
-UNIV_INLINE
-void*
-ib_vector_get(
-/*==========*/
- /* out: n'th element */
- ib_vector_t* vec, /* in: vector */
- ulint n); /* in: element index to get */
-
-/* See comment at beginning of file. */
-struct ib_vector_struct {
- mem_heap_t* heap; /* heap */
- void** data; /* data elements */
- ulint used; /* number of elements currently used */
- ulint total; /* number of elements allocated */
-};
-
-#ifndef UNIV_NONINL
-#include "ut0vec.ic"
-#endif
-
-#endif
diff --git a/storage/innobase/include/ut0vec.ic b/storage/innobase/include/ut0vec.ic
deleted file mode 100644
index 417a17d951f..00000000000
--- a/storage/innobase/include/ut0vec.ic
+++ /dev/null
@@ -1,26 +0,0 @@
-/********************************************************************
-Get number of elements in vector. */
-UNIV_INLINE
-ulint
-ib_vector_size(
-/*===========*/
- /* out: number of elements in vector */
- ib_vector_t* vec) /* in: vector */
-{
- return(vec->used);
-}
-
-/********************************************************************
-Get n'th element. */
-UNIV_INLINE
-void*
-ib_vector_get(
-/*==========*/
- /* out: n'th element */
- ib_vector_t* vec, /* in: vector */
- ulint n) /* in: element index to get */
-{
- ut_a(n < vec->used);
-
- return(vec->data[n]);
-}
diff --git a/storage/innobase/include/ut0wqueue.h b/storage/innobase/include/ut0wqueue.h
deleted file mode 100644
index 57f2297beee..00000000000
--- a/storage/innobase/include/ut0wqueue.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/***********************************************************************
-A Work queue. Threads can add work items to the queue and other threads can
-wait for work items to be available and take them off the queue for
-processing.
-
-************************************************************************/
-
-#ifndef IB_WORK_QUEUE_H
-#define IB_WORK_QUEUE_H
-
-#include "ut0list.h"
-#include "mem0mem.h"
-#include "os0sync.h"
-#include "sync0types.h"
-
-typedef struct ib_wqueue_struct ib_wqueue_t;
-
-/********************************************************************
-Create a new work queue. */
-
-ib_wqueue_t*
-ib_wqueue_create(void);
-/*===================*/
- /* out: work queue */
-
-/********************************************************************
-Free a work queue. */
-
-void
-ib_wqueue_free(
-/*===========*/
- ib_wqueue_t* wq); /* in: work queue */
-
-/********************************************************************
-Add a work item to the queue. */
-
-void
-ib_wqueue_add(
-/*==========*/
- ib_wqueue_t* wq, /* in: work queue */
- void* item, /* in: work item */
- mem_heap_t* heap); /* in: memory heap to use for allocating the
- list node */
-
-/********************************************************************
-Wait for a work item to appear in the queue. */
-
-void*
-ib_wqueue_wait(
- /* out: work item */
- ib_wqueue_t* wq); /* in: work queue */
-
-/* Work queue. */
-struct ib_wqueue_struct {
- mutex_t mutex; /* mutex protecting everything */
- ib_list_t* items; /* work item list */
- os_event_t event; /* event we use to signal additions to list */
-};
-
-#endif
diff --git a/storage/innobase/lock/lock0iter.c b/storage/innobase/lock/lock0iter.c
deleted file mode 100644
index 0afa7019c86..00000000000
--- a/storage/innobase/lock/lock0iter.c
+++ /dev/null
@@ -1,90 +0,0 @@
-/******************************************************
-Lock queue iterator. Can iterate over table and record
-lock queues.
-
-(c) 2007 Innobase Oy
-
-Created July 16, 2007 Vasil Dimov
-*******************************************************/
-
-#define LOCK_MODULE_IMPLEMENTATION
-
-#include "univ.i"
-#include "lock0iter.h"
-#include "lock0lock.h"
-#include "lock0priv.h"
-#include "ut0dbg.h"
-#include "ut0lst.h"
-
-/***********************************************************************
-Initialize lock queue iterator so that it starts to iterate from
-"lock". bit_no specifies the record number within the heap where the
-record is stored. It can be undefined (ULINT_UNDEFINED) in two cases:
-1. If the lock is a table lock, thus we have a table lock queue;
-2. If the lock is a record lock and it is a wait lock. In this case
- bit_no is calculated in this function by using
- lock_rec_find_set_bit(). There is exactly one bit set in the bitmap
- of a wait lock. */
-
-void
-lock_queue_iterator_reset(
-/*======================*/
- lock_queue_iterator_t* iter, /* out: iterator */
- lock_t* lock, /* in: lock to start from */
- ulint bit_no) /* in: record number in the
- heap */
-{
- iter->current_lock = lock;
-
- if (bit_no != ULINT_UNDEFINED) {
-
- iter->bit_no = bit_no;
- } else {
-
- switch (lock_get_type(lock)) {
- case LOCK_TABLE:
- iter->bit_no = ULINT_UNDEFINED;
- break;
- case LOCK_REC:
- iter->bit_no = lock_rec_find_set_bit(lock);
- ut_a(iter->bit_no != ULINT_UNDEFINED);
- break;
- default:
- ut_error;
- }
- }
-}
-
-/***********************************************************************
-Gets the previous lock in the lock queue, returns NULL if there are no
-more locks (i.e. the current lock is the first one). The iterator is
-receded (if not-NULL is returned). */
-
-lock_t*
-lock_queue_iterator_get_prev(
-/*=========================*/
- /* out: previous lock or NULL */
- lock_queue_iterator_t* iter) /* in/out: iterator */
-{
- lock_t* prev_lock;
-
- switch (lock_get_type(iter->current_lock)) {
- case LOCK_REC:
- prev_lock = lock_rec_get_prev(
- iter->current_lock, iter->bit_no);
- break;
- case LOCK_TABLE:
- prev_lock = UT_LIST_GET_PREV(
- un_member.tab_lock.locks, iter->current_lock);
- break;
- default:
- ut_error;
- }
-
- if (prev_lock != NULL) {
-
- iter->current_lock = prev_lock;
- }
-
- return(prev_lock);
-}
diff --git a/storage/innobase/lock/lock0lock.c b/storage/innobase/lock/lock0lock.c
deleted file mode 100644
index 5afd19aa7e7..00000000000
--- a/storage/innobase/lock/lock0lock.c
+++ /dev/null
@@ -1,5189 +0,0 @@
-/******************************************************
-The transaction lock system
-
-(c) 1996 Innobase Oy
-
-Created 5/7/1996 Heikki Tuuri
-*******************************************************/
-
-#define LOCK_MODULE_IMPLEMENTATION
-
-#include "lock0lock.h"
-#include "lock0priv.h"
-
-#ifdef UNIV_NONINL
-#include "lock0lock.ic"
-#include "lock0priv.ic"
-#endif
-
-#include "usr0sess.h"
-#include "trx0purge.h"
-#include "dict0mem.h"
-#include "trx0sys.h"
-
-
-/* 2 function prototypes copied from ha_innodb.cc: */
-
-/*****************************************************************
-If you want to print a thd that is not associated with the current thread,
-you must call this function before reserving the InnoDB kernel_mutex, to
-protect MySQL from setting thd->query NULL. If you print a thd of the current
-thread, we know that MySQL cannot modify thd->query, and it is not necessary
-to call this. Call innobase_mysql_end_print_arbitrary_thd() after you release
-the kernel_mutex.
-NOTE that /mysql/innobase/lock/lock0lock.c must contain the prototype for this
-function! */
-
-void
-innobase_mysql_prepare_print_arbitrary_thd(void);
-/*============================================*/
-
-/*****************************************************************
-Relases the mutex reserved by innobase_mysql_prepare_print_arbitrary_thd().
-NOTE that /mysql/innobase/lock/lock0lock.c must contain the prototype for this
-function! */
-
-void
-innobase_mysql_end_print_arbitrary_thd(void);
-/*========================================*/
-
-/* Restricts the length of search we will do in the waits-for
-graph of transactions */
-#define LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK 1000000
-
-/* Restricts the recursion depth of the search we will do in the waits-for
-graph of transactions */
-#define LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK 200
-
-/* When releasing transaction locks, this specifies how often we release
-the kernel mutex for a moment to give also others access to it */
-
-#define LOCK_RELEASE_KERNEL_INTERVAL 1000
-
-/* Safety margin when creating a new record lock: this many extra records
-can be inserted to the page without need to create a lock with a bigger
-bitmap */
-
-#define LOCK_PAGE_BITMAP_MARGIN 64
-
-/* An explicit record lock affects both the record and the gap before it.
-An implicit x-lock does not affect the gap, it only locks the index
-record from read or update.
-
-If a transaction has modified or inserted an index record, then
-it owns an implicit x-lock on the record. On a secondary index record,
-a transaction has an implicit x-lock also if it has modified the
-clustered index record, the max trx id of the page where the secondary
-index record resides is >= trx id of the transaction (or database recovery
-is running), and there are no explicit non-gap lock requests on the
-secondary index record.
-
-This complicated definition for a secondary index comes from the
-implementation: we want to be able to determine if a secondary index
-record has an implicit x-lock, just by looking at the present clustered
-index record, not at the historical versions of the record. The
-complicated definition can be explained to the user so that there is
-nondeterminism in the access path when a query is answered: we may,
-or may not, access the clustered index record and thus may, or may not,
-bump into an x-lock set there.
-
-Different transaction can have conflicting locks set on the gap at the
-same time. The locks on the gap are purely inhibitive: an insert cannot
-be made, or a select cursor may have to wait if a different transaction
-has a conflicting lock on the gap. An x-lock on the gap does not give
-the right to insert into the gap.
-
-An explicit lock can be placed on a user record or the supremum record of
-a page. The locks on the supremum record are always thought to be of the gap
-type, though the gap bit is not set. When we perform an update of a record
-where the size of the record changes, we may temporarily store its explicit
-locks on the infimum record of the page, though the infimum otherwise never
-carries locks.
-
-A waiting record lock can also be of the gap type. A waiting lock request
-can be granted when there is no conflicting mode lock request by another
-transaction ahead of it in the explicit lock queue.
-
-In version 4.0.5 we added yet another explicit lock type: LOCK_REC_NOT_GAP.
-It only locks the record it is placed on, not the gap before the record.
-This lock type is necessary to emulate an Oracle-like READ COMMITTED isolation
-level.
-
--------------------------------------------------------------------------
-RULE 1: If there is an implicit x-lock on a record, and there are non-gap
--------
-lock requests waiting in the queue, then the transaction holding the implicit
-x-lock also has an explicit non-gap record x-lock. Therefore, as locks are
-released, we can grant locks to waiting lock requests purely by looking at
-the explicit lock requests in the queue.
-
-RULE 3: Different transactions cannot have conflicting granted non-gap locks
--------
-on a record at the same time. However, they can have conflicting granted gap
-locks.
-RULE 4: If a there is a waiting lock request in a queue, no lock request,
--------
-gap or not, can be inserted ahead of it in the queue. In record deletes
-and page splits new gap type locks can be created by the database manager
-for a transaction, and without rule 4, the waits-for graph of transactions
-might become cyclic without the database noticing it, as the deadlock check
-is only performed when a transaction itself requests a lock!
--------------------------------------------------------------------------
-
-An insert is allowed to a gap if there are no explicit lock requests by
-other transactions on the next record. It does not matter if these lock
-requests are granted or waiting, gap bit set or not, with the exception
-that a gap type request set by another transaction to wait for
-its turn to do an insert is ignored. On the other hand, an
-implicit x-lock by another transaction does not prevent an insert, which
-allows for more concurrency when using an Oracle-style sequence number
-generator for the primary key with many transactions doing inserts
-concurrently.
-
-A modify of a record is allowed if the transaction has an x-lock on the
-record, or if other transactions do not have any non-gap lock requests on the
-record.
-
-A read of a single user record with a cursor is allowed if the transaction
-has a non-gap explicit, or an implicit lock on the record, or if the other
-transactions have no x-lock requests on the record. At a page supremum a
-read is always allowed.
-
-In summary, an implicit lock is seen as a granted x-lock only on the
-record, not on the gap. An explicit lock with no gap bit set is a lock
-both on the record and the gap. If the gap bit is set, the lock is only
-on the gap. Different transaction cannot own conflicting locks on the
-record at the same time, but they may own conflicting locks on the gap.
-Granted locks on a record give an access right to the record, but gap type
-locks just inhibit operations.
-
-NOTE: Finding out if some transaction has an implicit x-lock on a secondary
-index record can be cumbersome. We may have to look at previous versions of
-the corresponding clustered index record to find out if a delete marked
-secondary index record was delete marked by an active transaction, not by
-a committed one.
-
-FACT A: If a transaction has inserted a row, it can delete it any time
-without need to wait for locks.
-
-PROOF: The transaction has an implicit x-lock on every index record inserted
-for the row, and can thus modify each record without the need to wait. Q.E.D.
-
-FACT B: If a transaction has read some result set with a cursor, it can read
-it again, and retrieves the same result set, if it has not modified the
-result set in the meantime. Hence, there is no phantom problem. If the
-biggest record, in the alphabetical order, touched by the cursor is removed,
-a lock wait may occur, otherwise not.
-
-PROOF: When a read cursor proceeds, it sets an s-lock on each user record
-it passes, and a gap type s-lock on each page supremum. The cursor must
-wait until it has these locks granted. Then no other transaction can
-have a granted x-lock on any of the user records, and therefore cannot
-modify the user records. Neither can any other transaction insert into
-the gaps which were passed over by the cursor. Page splits and merges,
-and removal of obsolete versions of records do not affect this, because
-when a user record or a page supremum is removed, the next record inherits
-its locks as gap type locks, and therefore blocks inserts to the same gap.
-Also, if a page supremum is inserted, it inherits its locks from the successor
-record. When the cursor is positioned again at the start of the result set,
-the records it will touch on its course are either records it touched
-during the last pass or new inserted page supremums. It can immediately
-access all these records, and when it arrives at the biggest record, it
-notices that the result set is complete. If the biggest record was removed,
-lock wait can occur because the next record only inherits a gap type lock,
-and a wait may be needed. Q.E.D. */
-
-/* If an index record should be changed or a new inserted, we must check
-the lock on the record or the next. When a read cursor starts reading,
-we will set a record level s-lock on each record it passes, except on the
-initial record on which the cursor is positioned before we start to fetch
-records. Our index tree search has the convention that the B-tree
-cursor is positioned BEFORE the first possibly matching record in
-the search. Optimizations are possible here: if the record is searched
-on an equality condition to a unique key, we could actually set a special
-lock on the record, a lock which would not prevent any insert before
-this record. In the next key locking an x-lock set on a record also
-prevents inserts just before that record.
- There are special infimum and supremum records on each page.
-A supremum record can be locked by a read cursor. This records cannot be
-updated but the lock prevents insert of a user record to the end of
-the page.
- Next key locks will prevent the phantom problem where new rows
-could appear to SELECT result sets after the select operation has been
-performed. Prevention of phantoms ensures the serilizability of
-transactions.
- What should we check if an insert of a new record is wanted?
-Only the lock on the next record on the same page, because also the
-supremum record can carry a lock. An s-lock prevents insertion, but
-what about an x-lock? If it was set by a searched update, then there
-is implicitly an s-lock, too, and the insert should be prevented.
-What if our transaction owns an x-lock to the next record, but there is
-a waiting s-lock request on the next record? If this s-lock was placed
-by a read cursor moving in the ascending order in the index, we cannot
-do the insert immediately, because when we finally commit our transaction,
-the read cursor should see also the new inserted record. So we should
-move the read cursor backward from the the next record for it to pass over
-the new inserted record. This move backward may be too cumbersome to
-implement. If we in this situation just enqueue a second x-lock request
-for our transaction on the next record, then the deadlock mechanism
-notices a deadlock between our transaction and the s-lock request
-transaction. This seems to be an ok solution.
- We could have the convention that granted explicit record locks,
-lock the corresponding records from changing, and also lock the gaps
-before them from inserting. A waiting explicit lock request locks the gap
-before from inserting. Implicit record x-locks, which we derive from the
-transaction id in the clustered index record, only lock the record itself
-from modification, not the gap before it from inserting.
- How should we store update locks? If the search is done by a unique
-key, we could just modify the record trx id. Otherwise, we could put a record
-x-lock on the record. If the update changes ordering fields of the
-clustered index record, the inserted new record needs no record lock in
-lock table, the trx id is enough. The same holds for a secondary index
-record. Searched delete is similar to update.
-
-PROBLEM:
-What about waiting lock requests? If a transaction is waiting to make an
-update to a record which another modified, how does the other transaction
-know to send the end-lock-wait signal to the waiting transaction? If we have
-the convention that a transaction may wait for just one lock at a time, how
-do we preserve it if lock wait ends?
-
-PROBLEM:
-Checking the trx id label of a secondary index record. In the case of a
-modification, not an insert, is this necessary? A secondary index record
-is modified only by setting or resetting its deleted flag. A secondary index
-record contains fields to uniquely determine the corresponding clustered
-index record. A secondary index record is therefore only modified if we
-also modify the clustered index record, and the trx id checking is done
-on the clustered index record, before we come to modify the secondary index
-record. So, in the case of delete marking or unmarking a secondary index
-record, we do not have to care about trx ids, only the locks in the lock
-table must be checked. In the case of a select from a secondary index, the
-trx id is relevant, and in this case we may have to search the clustered
-index record.
-
-PROBLEM: How to update record locks when page is split or merged, or
---------------------------------------------------------------------
-a record is deleted or updated?
-If the size of fields in a record changes, we perform the update by
-a delete followed by an insert. How can we retain the locks set or
-waiting on the record? Because a record lock is indexed in the bitmap
-by the heap number of the record, when we remove the record from the
-record list, it is possible still to keep the lock bits. If the page
-is reorganized, we could make a table of old and new heap numbers,
-and permute the bitmaps in the locks accordingly. We can add to the
-table a row telling where the updated record ended. If the update does
-not require a reorganization of the page, we can simply move the lock
-bits for the updated record to the position determined by its new heap
-number (we may have to allocate a new lock, if we run out of the bitmap
-in the old one).
- A more complicated case is the one where the reinsertion of the
-updated record is done pessimistically, because the structure of the
-tree may change.
-
-PROBLEM: If a supremum record is removed in a page merge, or a record
----------------------------------------------------------------------
-removed in a purge, what to do to the waiting lock requests? In a split to
-the right, we just move the lock requests to the new supremum. If a record
-is removed, we could move the waiting lock request to its inheritor, the
-next record in the index. But, the next record may already have lock
-requests on its own queue. A new deadlock check should be made then. Maybe
-it is easier just to release the waiting transactions. They can then enqueue
-new lock requests on appropriate records.
-
-PROBLEM: When a record is inserted, what locks should it inherit from the
--------------------------------------------------------------------------
-upper neighbor? An insert of a new supremum record in a page split is
-always possible, but an insert of a new user record requires that the upper
-neighbor does not have any lock requests by other transactions, granted or
-waiting, in its lock queue. Solution: We can copy the locks as gap type
-locks, so that also the waiting locks are transformed to granted gap type
-locks on the inserted record. */
-
-/* LOCK COMPATIBILITY MATRIX
- * IS IX S X AI
- * IS + + + - +
- * IX + + - - +
- * S + - + - -
- * X - - - - -
- * AI + + - - -
- *
- * Note that for rows, InnoDB only acquires S or X locks.
- * For tables, InnoDB normally acquires IS or IX locks.
- * S or X table locks are only acquired for LOCK TABLES.
- * Auto-increment (AI) locks are needed because of
- * statement-level MySQL binlog.
- * See also lock_mode_compatible().
- */
-
-#ifdef UNIV_DEBUG
-ibool lock_print_waits = FALSE;
-#endif /* UNIV_DEBUG */
-
-/* The lock system */
-lock_sys_t* lock_sys = NULL;
-
-/* We store info on the latest deadlock error to this buffer. InnoDB
-Monitor will then fetch it and print */
-ibool lock_deadlock_found = FALSE;
-FILE* lock_latest_err_file;
-
-/* Flags for recursive deadlock search */
-#define LOCK_VICTIM_IS_START 1
-#define LOCK_VICTIM_IS_OTHER 2
-
-/************************************************************************
-Checks if a lock request results in a deadlock. */
-static
-ibool
-lock_deadlock_occurs(
-/*=================*/
- /* out: TRUE if a deadlock was detected and we
- chose trx as a victim; FALSE if no deadlock, or
- there was a deadlock, but we chose other
- transaction(s) as victim(s) */
- lock_t* lock, /* in: lock the transaction is requesting */
- trx_t* trx); /* in: transaction */
-/************************************************************************
-Looks recursively for a deadlock. */
-static
-ulint
-lock_deadlock_recursive(
-/*====================*/
- /* out: 0 if no deadlock found,
- LOCK_VICTIM_IS_START if there was a deadlock
- and we chose 'start' as the victim,
- LOCK_VICTIM_IS_OTHER if a deadlock
- was found and we chose some other trx as a
- victim: we must do the search again in this
- last case because there may be another
- deadlock! */
- trx_t* start, /* in: recursion starting point */
- trx_t* trx, /* in: a transaction waiting for a lock */
- lock_t* wait_lock, /* in: the lock trx is waiting to be granted */
- ulint* cost, /* in/out: number of calculation steps thus
- far: if this exceeds LOCK_MAX_N_STEPS_...
- we return LOCK_VICTIM_IS_START */
- ulint depth); /* in: recursion depth: if this exceeds
- LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK, we
- return LOCK_VICTIM_IS_START */
-
-/*************************************************************************
-Gets the nth bit of a record lock. */
-UNIV_INLINE
-ibool
-lock_rec_get_nth_bit(
-/*=================*/
- /* out: TRUE if bit set */
- lock_t* lock, /* in: record lock */
- ulint i) /* in: index of the bit */
-{
- ulint byte_index;
- ulint bit_index;
- ulint b;
-
- ut_ad(lock);
- ut_ad(lock_get_type(lock) == LOCK_REC);
-
- if (i >= lock->un_member.rec_lock.n_bits) {
-
- return(FALSE);
- }
-
- byte_index = i / 8;
- bit_index = i % 8;
-
- b = (ulint)*((byte*)lock + sizeof(lock_t) + byte_index);
-
- return(ut_bit_get_nth(b, bit_index));
-}
-
-/*************************************************************************/
-
-#define lock_mutex_enter_kernel() mutex_enter(&kernel_mutex)
-#define lock_mutex_exit_kernel() mutex_exit(&kernel_mutex)
-
-/*************************************************************************
-Checks that a transaction id is sensible, i.e., not in the future. */
-
-ibool
-lock_check_trx_id_sanity(
-/*=====================*/
- /* out: TRUE if ok */
- dulint trx_id, /* in: trx id */
- rec_t* rec, /* in: user record */
- dict_index_t* index, /* in: index */
- const ulint* offsets, /* in: rec_get_offsets(rec, index) */
- ibool has_kernel_mutex)/* in: TRUE if the caller owns the
- kernel mutex */
-{
- ibool is_ok = TRUE;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- if (!has_kernel_mutex) {
- mutex_enter(&kernel_mutex);
- }
-
- /* A sanity check: the trx_id in rec must be smaller than the global
- trx id counter */
-
- if (ut_dulint_cmp(trx_id, trx_sys->max_trx_id) >= 0) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: transaction id associated"
- " with record\n",
- stderr);
- rec_print_new(stderr, rec, offsets);
- fputs("InnoDB: in ", stderr);
- dict_index_name_print(stderr, NULL, index);
- fprintf(stderr, "\n"
- "InnoDB: is %lu %lu which is higher than the"
- " global trx id counter %lu %lu!\n"
- "InnoDB: The table is corrupt. You have to do"
- " dump + drop + reimport.\n",
- (ulong) ut_dulint_get_high(trx_id),
- (ulong) ut_dulint_get_low(trx_id),
- (ulong) ut_dulint_get_high(trx_sys->max_trx_id),
- (ulong) ut_dulint_get_low(trx_sys->max_trx_id));
-
- is_ok = FALSE;
- }
-
- if (!has_kernel_mutex) {
- mutex_exit(&kernel_mutex);
- }
-
- return(is_ok);
-}
-
-/*************************************************************************
-Checks that a record is seen in a consistent read. */
-
-ibool
-lock_clust_rec_cons_read_sees(
-/*==========================*/
- /* out: TRUE if sees, or FALSE if an earlier
- version of the record should be retrieved */
- rec_t* rec, /* in: user record which should be read or
- passed over by a read cursor */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- read_view_t* view) /* in: consistent read view */
-{
- dulint trx_id;
-
- ut_ad(index->type & DICT_CLUSTERED);
- ut_ad(page_rec_is_user_rec(rec));
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- /* NOTE that we call this function while holding the search
- system latch. To obey the latching order we must NOT reserve the
- kernel mutex here! */
-
- trx_id = row_get_rec_trx_id(rec, index, offsets);
-
- return(read_view_sees_trx_id(view, trx_id));
-}
-
-/*************************************************************************
-Checks that a non-clustered index record is seen in a consistent read. */
-
-ulint
-lock_sec_rec_cons_read_sees(
-/*========================*/
- /* out: TRUE if certainly sees, or FALSE if an
- earlier version of the clustered index record
- might be needed: NOTE that a non-clustered
- index page contains so little information on
- its modifications that also in the case FALSE,
- the present version of rec may be the right,
- but we must check this from the clustered
- index record */
- rec_t* rec, /* in: user record which should be read or
- passed over by a read cursor */
- dict_index_t* index, /* in: non-clustered index */
- read_view_t* view) /* in: consistent read view */
-{
- dulint max_trx_id;
-
- UT_NOT_USED(index);
-
- ut_ad(!(index->type & DICT_CLUSTERED));
- ut_ad(page_rec_is_user_rec(rec));
-
- /* NOTE that we might call this function while holding the search
- system latch. To obey the latching order we must NOT reserve the
- kernel mutex here! */
-
- if (recv_recovery_is_on()) {
-
- return(FALSE);
- }
-
- max_trx_id = page_get_max_trx_id(buf_frame_align(rec));
-
- if (ut_dulint_cmp(max_trx_id, view->up_limit_id) >= 0) {
-
- return(FALSE);
- }
-
- return(TRUE);
-}
-
-/*************************************************************************
-Creates the lock system at database start. */
-
-void
-lock_sys_create(
-/*============*/
- ulint n_cells) /* in: number of slots in lock hash table */
-{
- lock_sys = mem_alloc(sizeof(lock_sys_t));
-
- lock_sys->rec_hash = hash_create(n_cells);
-
- /* hash_create_mutexes(lock_sys->rec_hash, 2, SYNC_REC_LOCK); */
-
- lock_latest_err_file = os_file_create_tmpfile();
- ut_a(lock_latest_err_file);
-}
-
-/*************************************************************************
-Gets the size of a lock struct. */
-
-ulint
-lock_get_size(void)
-/*===============*/
- /* out: size in bytes */
-{
- return((ulint)sizeof(lock_t));
-}
-
-/*************************************************************************
-Gets the mode of a lock. */
-UNIV_INLINE
-ulint
-lock_get_mode(
-/*==========*/
- /* out: mode */
- const lock_t* lock) /* in: lock */
-{
- ut_ad(lock);
-
- return(lock->type_mode & LOCK_MODE_MASK);
-}
-
-/*************************************************************************
-Gets the wait flag of a lock. */
-UNIV_INLINE
-ibool
-lock_get_wait(
-/*==========*/
- /* out: TRUE if waiting */
- lock_t* lock) /* in: lock */
-{
- ut_ad(lock);
-
- if (lock->type_mode & LOCK_WAIT) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*************************************************************************
-Gets the source table of an ALTER TABLE transaction. The table must be
-covered by an IX or IS table lock. */
-
-dict_table_t*
-lock_get_src_table(
-/*===============*/
- /* out: the source table of transaction,
- if it is covered by an IX or IS table lock;
- dest if there is no source table, and
- NULL if the transaction is locking more than
- two tables or an inconsistency is found */
- trx_t* trx, /* in: transaction */
- dict_table_t* dest, /* in: destination of ALTER TABLE */
- ulint* mode) /* out: lock mode of the source table */
-{
- dict_table_t* src;
- lock_t* lock;
-
- src = NULL;
- *mode = LOCK_NONE;
-
- for (lock = UT_LIST_GET_FIRST(trx->trx_locks);
- lock;
- lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
- lock_table_t* tab_lock;
- ulint lock_mode;
- if (!(lock_get_type(lock) & LOCK_TABLE)) {
- /* We are only interested in table locks. */
- continue;
- }
- tab_lock = &lock->un_member.tab_lock;
- if (dest == tab_lock->table) {
- /* We are not interested in the destination table. */
- continue;
- } else if (!src) {
- /* This presumably is the source table. */
- src = tab_lock->table;
- if (UT_LIST_GET_LEN(src->locks) != 1
- || UT_LIST_GET_FIRST(src->locks) != lock) {
- /* We only support the case when
- there is only one lock on this table. */
- return(NULL);
- }
- } else if (src != tab_lock->table) {
- /* The transaction is locking more than
- two tables (src and dest): abort */
- return(NULL);
- }
-
- /* Check that the source table is locked by
- LOCK_IX or LOCK_IS. */
- lock_mode = lock_get_mode(lock);
- switch (lock_mode) {
- case LOCK_IX:
- case LOCK_IS:
- if (*mode != LOCK_NONE && *mode != lock_mode) {
- /* There are multiple locks on src. */
- return(NULL);
- }
- *mode = lock_mode;
- break;
- }
- }
-
- if (!src) {
- /* No source table lock found: flag the situation to caller */
- src = dest;
- }
-
- return(src);
-}
-
-/*************************************************************************
-Determine if the given table is exclusively "owned" by the given
-transaction, i.e., transaction holds LOCK_IX and possibly LOCK_AUTO_INC
-on the table. */
-
-ibool
-lock_is_table_exclusive(
-/*====================*/
- /* out: TRUE if table is only locked by trx,
- with LOCK_IX, and possibly LOCK_AUTO_INC */
- dict_table_t* table, /* in: table */
- trx_t* trx) /* in: transaction */
-{
- lock_t* lock;
- ibool ok = FALSE;
-
- ut_ad(table);
- ut_ad(trx);
-
- lock_mutex_enter_kernel();
-
- for (lock = UT_LIST_GET_FIRST(table->locks);
- lock;
- lock = UT_LIST_GET_NEXT(locks, &lock->un_member.tab_lock)) {
- if (lock->trx != trx) {
- /* A lock on the table is held
- by some other transaction. */
- goto not_ok;
- }
-
- if (!(lock_get_type(lock) & LOCK_TABLE)) {
- /* We are interested in table locks only. */
- continue;
- }
-
- switch (lock_get_mode(lock)) {
- case LOCK_IX:
- ok = TRUE;
- break;
- case LOCK_AUTO_INC:
- /* It is allowed for trx to hold an
- auto_increment lock. */
- break;
- default:
-not_ok:
- /* Other table locks than LOCK_IX are not allowed. */
- ok = FALSE;
- goto func_exit;
- }
- }
-
-func_exit:
- lock_mutex_exit_kernel();
-
- return(ok);
-}
-
-/*************************************************************************
-Sets the wait flag of a lock and the back pointer in trx to lock. */
-UNIV_INLINE
-void
-lock_set_lock_and_trx_wait(
-/*=======================*/
- lock_t* lock, /* in: lock */
- trx_t* trx) /* in: trx */
-{
- ut_ad(lock);
- ut_ad(trx->wait_lock == NULL);
-
- trx->wait_lock = lock;
- lock->type_mode = lock->type_mode | LOCK_WAIT;
-}
-
-/**************************************************************************
-The back pointer to a waiting lock request in the transaction is set to NULL
-and the wait bit in lock type_mode is reset. */
-UNIV_INLINE
-void
-lock_reset_lock_and_trx_wait(
-/*=========================*/
- lock_t* lock) /* in: record lock */
-{
- ut_ad((lock->trx)->wait_lock == lock);
- ut_ad(lock_get_wait(lock));
-
- /* Reset the back pointer in trx to this waiting lock request */
-
- (lock->trx)->wait_lock = NULL;
- lock->type_mode = lock->type_mode & ~LOCK_WAIT;
-}
-
-/*************************************************************************
-Gets the gap flag of a record lock. */
-UNIV_INLINE
-ibool
-lock_rec_get_gap(
-/*=============*/
- /* out: TRUE if gap flag set */
- lock_t* lock) /* in: record lock */
-{
- ut_ad(lock);
- ut_ad(lock_get_type(lock) == LOCK_REC);
-
- if (lock->type_mode & LOCK_GAP) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*************************************************************************
-Gets the LOCK_REC_NOT_GAP flag of a record lock. */
-UNIV_INLINE
-ibool
-lock_rec_get_rec_not_gap(
-/*=====================*/
- /* out: TRUE if LOCK_REC_NOT_GAP flag set */
- lock_t* lock) /* in: record lock */
-{
- ut_ad(lock);
- ut_ad(lock_get_type(lock) == LOCK_REC);
-
- if (lock->type_mode & LOCK_REC_NOT_GAP) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*************************************************************************
-Gets the waiting insert flag of a record lock. */
-UNIV_INLINE
-ibool
-lock_rec_get_insert_intention(
-/*==========================*/
- /* out: TRUE if gap flag set */
- lock_t* lock) /* in: record lock */
-{
- ut_ad(lock);
- ut_ad(lock_get_type(lock) == LOCK_REC);
-
- if (lock->type_mode & LOCK_INSERT_INTENTION) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*************************************************************************
-Calculates if lock mode 1 is stronger or equal to lock mode 2. */
-UNIV_INLINE
-ibool
-lock_mode_stronger_or_eq(
-/*=====================*/
- /* out: TRUE if mode1 stronger or equal to mode2 */
- ulint mode1, /* in: lock mode */
- ulint mode2) /* in: lock mode */
-{
- ut_ad(mode1 == LOCK_X || mode1 == LOCK_S || mode1 == LOCK_IX
- || mode1 == LOCK_IS || mode1 == LOCK_AUTO_INC);
- ut_ad(mode2 == LOCK_X || mode2 == LOCK_S || mode2 == LOCK_IX
- || mode2 == LOCK_IS || mode2 == LOCK_AUTO_INC);
- if (mode1 == LOCK_X) {
-
- return(TRUE);
-
- } else if (mode1 == LOCK_AUTO_INC && mode2 == LOCK_AUTO_INC) {
-
- return(TRUE);
-
- } else if (mode1 == LOCK_S
- && (mode2 == LOCK_S || mode2 == LOCK_IS)) {
- return(TRUE);
-
- } else if (mode1 == LOCK_IS && mode2 == LOCK_IS) {
-
- return(TRUE);
-
- } else if (mode1 == LOCK_IX && (mode2 == LOCK_IX
- || mode2 == LOCK_IS)) {
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*************************************************************************
-Calculates if lock mode 1 is compatible with lock mode 2. */
-UNIV_INLINE
-ibool
-lock_mode_compatible(
-/*=================*/
- /* out: TRUE if mode1 compatible with mode2 */
- ulint mode1, /* in: lock mode */
- ulint mode2) /* in: lock mode */
-{
- ut_ad(mode1 == LOCK_X || mode1 == LOCK_S || mode1 == LOCK_IX
- || mode1 == LOCK_IS || mode1 == LOCK_AUTO_INC);
- ut_ad(mode2 == LOCK_X || mode2 == LOCK_S || mode2 == LOCK_IX
- || mode2 == LOCK_IS || mode2 == LOCK_AUTO_INC);
-
- if (mode1 == LOCK_S && (mode2 == LOCK_IS || mode2 == LOCK_S)) {
-
- return(TRUE);
-
- } else if (mode1 == LOCK_X) {
-
- return(FALSE);
-
- } else if (mode1 == LOCK_AUTO_INC && (mode2 == LOCK_IS
- || mode2 == LOCK_IX)) {
- return(TRUE);
-
- } else if (mode1 == LOCK_IS && (mode2 == LOCK_IS
- || mode2 == LOCK_IX
- || mode2 == LOCK_AUTO_INC
- || mode2 == LOCK_S)) {
- return(TRUE);
-
- } else if (mode1 == LOCK_IX && (mode2 == LOCK_IS
- || mode2 == LOCK_AUTO_INC
- || mode2 == LOCK_IX)) {
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*************************************************************************
-Checks if a lock request for a new lock has to wait for request lock2. */
-UNIV_INLINE
-ibool
-lock_rec_has_to_wait(
-/*=================*/
- /* out: TRUE if new lock has to wait for lock2 to be
- removed */
- trx_t* trx, /* in: trx of new lock */
- ulint type_mode,/* in: precise mode of the new lock to set:
- LOCK_S or LOCK_X, possibly ORed to
- LOCK_GAP or LOCK_REC_NOT_GAP, LOCK_INSERT_INTENTION */
- lock_t* lock2, /* in: another record lock; NOTE that it is assumed
- that this has a lock bit set on the same record as
- in the new lock we are setting */
- ibool lock_is_on_supremum) /* in: TRUE if we are setting the lock
- on the 'supremum' record of an index
- page: we know then that the lock request
- is really for a 'gap' type lock */
-{
- ut_ad(trx && lock2);
- ut_ad(lock_get_type(lock2) == LOCK_REC);
-
- if (trx != lock2->trx
- && !lock_mode_compatible(LOCK_MODE_MASK & type_mode,
- lock_get_mode(lock2))) {
-
- /* We have somewhat complex rules when gap type record locks
- cause waits */
-
- if ((lock_is_on_supremum || (type_mode & LOCK_GAP))
- && !(type_mode & LOCK_INSERT_INTENTION)) {
-
- /* Gap type locks without LOCK_INSERT_INTENTION flag
- do not need to wait for anything. This is because
- different users can have conflicting lock types
- on gaps. */
-
- return(FALSE);
- }
-
- if (!(type_mode & LOCK_INSERT_INTENTION)
- && lock_rec_get_gap(lock2)) {
-
- /* Record lock (LOCK_ORDINARY or LOCK_REC_NOT_GAP
- does not need to wait for a gap type lock */
-
- return(FALSE);
- }
-
- if ((type_mode & LOCK_GAP)
- && lock_rec_get_rec_not_gap(lock2)) {
-
- /* Lock on gap does not need to wait for
- a LOCK_REC_NOT_GAP type lock */
-
- return(FALSE);
- }
-
- if (lock_rec_get_insert_intention(lock2)) {
-
- /* No lock request needs to wait for an insert
- intention lock to be removed. This is ok since our
- rules allow conflicting locks on gaps. This eliminates
- a spurious deadlock caused by a next-key lock waiting
- for an insert intention lock; when the insert
- intention lock was granted, the insert deadlocked on
- the waiting next-key lock.
-
- Also, insert intention locks do not disturb each
- other. */
-
- return(FALSE);
- }
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*************************************************************************
-Checks if a lock request lock1 has to wait for request lock2. */
-
-ibool
-lock_has_to_wait(
-/*=============*/
- /* out: TRUE if lock1 has to wait for lock2 to be
- removed */
- lock_t* lock1, /* in: waiting lock */
- lock_t* lock2) /* in: another lock; NOTE that it is assumed that this
- has a lock bit set on the same record as in lock1 if
- the locks are record locks */
-{
- ut_ad(lock1 && lock2);
-
- if (lock1->trx != lock2->trx
- && !lock_mode_compatible(lock_get_mode(lock1),
- lock_get_mode(lock2))) {
- if (lock_get_type(lock1) == LOCK_REC) {
- ut_ad(lock_get_type(lock2) == LOCK_REC);
-
- /* If this lock request is for a supremum record
- then the second bit on the lock bitmap is set */
-
- return(lock_rec_has_to_wait(lock1->trx,
- lock1->type_mode, lock2,
- lock_rec_get_nth_bit(
- lock1, 1)));
- }
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*============== RECORD LOCK BASIC FUNCTIONS ============================*/
-
-/*************************************************************************
-Gets the number of bits in a record lock bitmap. */
-UNIV_INLINE
-ulint
-lock_rec_get_n_bits(
-/*================*/
- /* out: number of bits */
- lock_t* lock) /* in: record lock */
-{
- return(lock->un_member.rec_lock.n_bits);
-}
-
-/**************************************************************************
-Sets the nth bit of a record lock to TRUE. */
-UNIV_INLINE
-void
-lock_rec_set_nth_bit(
-/*=================*/
- lock_t* lock, /* in: record lock */
- ulint i) /* in: index of the bit */
-{
- ulint byte_index;
- ulint bit_index;
- byte* ptr;
- ulint b;
-
- ut_ad(lock);
- ut_ad(lock_get_type(lock) == LOCK_REC);
- ut_ad(i < lock->un_member.rec_lock.n_bits);
-
- byte_index = i / 8;
- bit_index = i % 8;
-
- ptr = (byte*)lock + sizeof(lock_t) + byte_index;
-
- b = (ulint)*ptr;
-
- b = ut_bit_set_nth(b, bit_index, TRUE);
-
- *ptr = (byte)b;
-}
-
-/**************************************************************************
-Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED,
-if none found. */
-
-ulint
-lock_rec_find_set_bit(
-/*==================*/
- /* out: bit index == heap number of the record, or
- ULINT_UNDEFINED if none found */
- lock_t* lock) /* in: record lock with at least one bit set */
-{
- ulint i;
-
- for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
-
- if (lock_rec_get_nth_bit(lock, i)) {
-
- return(i);
- }
- }
-
- return(ULINT_UNDEFINED);
-}
-
-/**************************************************************************
-Resets the nth bit of a record lock. */
-UNIV_INLINE
-void
-lock_rec_reset_nth_bit(
-/*===================*/
- lock_t* lock, /* in: record lock */
- ulint i) /* in: index of the bit which must be set to TRUE
- when this function is called */
-{
- ulint byte_index;
- ulint bit_index;
- byte* ptr;
- ulint b;
-
- ut_ad(lock);
- ut_ad(lock_get_type(lock) == LOCK_REC);
- ut_ad(i < lock->un_member.rec_lock.n_bits);
-
- byte_index = i / 8;
- bit_index = i % 8;
-
- ptr = (byte*)lock + sizeof(lock_t) + byte_index;
-
- b = (ulint)*ptr;
-
- b = ut_bit_set_nth(b, bit_index, FALSE);
-
- *ptr = (byte)b;
-}
-
-/*************************************************************************
-Gets the first or next record lock on a page. */
-UNIV_INLINE
-lock_t*
-lock_rec_get_next_on_page(
-/*======================*/
- /* out: next lock, NULL if none exists */
- lock_t* lock) /* in: a record lock */
-{
- ulint space;
- ulint page_no;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(lock_get_type(lock) == LOCK_REC);
-
- space = lock->un_member.rec_lock.space;
- page_no = lock->un_member.rec_lock.page_no;
-
- for (;;) {
- lock = HASH_GET_NEXT(hash, lock);
-
- if (!lock) {
-
- break;
- }
-
- if ((lock->un_member.rec_lock.space == space)
- && (lock->un_member.rec_lock.page_no == page_no)) {
-
- break;
- }
- }
-
- return(lock);
-}
-
-/*************************************************************************
-Gets the first record lock on a page, where the page is identified by its
-file address. */
-UNIV_INLINE
-lock_t*
-lock_rec_get_first_on_page_addr(
-/*============================*/
- /* out: first lock, NULL if none exists */
- ulint space, /* in: space */
- ulint page_no)/* in: page number */
-{
- lock_t* lock;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- lock = HASH_GET_FIRST(lock_sys->rec_hash,
- lock_rec_hash(space, page_no));
- while (lock) {
- if ((lock->un_member.rec_lock.space == space)
- && (lock->un_member.rec_lock.page_no == page_no)) {
-
- break;
- }
-
- lock = HASH_GET_NEXT(hash, lock);
- }
-
- return(lock);
-}
-
-/*************************************************************************
-Returns TRUE if there are explicit record locks on a page. */
-
-ibool
-lock_rec_expl_exist_on_page(
-/*========================*/
- /* out: TRUE if there are explicit record locks on
- the page */
- ulint space, /* in: space id */
- ulint page_no)/* in: page number */
-{
- ibool ret;
-
- mutex_enter(&kernel_mutex);
-
- if (lock_rec_get_first_on_page_addr(space, page_no)) {
- ret = TRUE;
- } else {
- ret = FALSE;
- }
-
- mutex_exit(&kernel_mutex);
-
- return(ret);
-}
-
-/*************************************************************************
-Gets the first record lock on a page, where the page is identified by a
-pointer to it. */
-UNIV_INLINE
-lock_t*
-lock_rec_get_first_on_page(
-/*=======================*/
- /* out: first lock, NULL if none exists */
- byte* ptr) /* in: pointer to somewhere on the page */
-{
- ulint hash;
- lock_t* lock;
- ulint space;
- ulint page_no;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- hash = buf_frame_get_lock_hash_val(ptr);
-
- lock = HASH_GET_FIRST(lock_sys->rec_hash, hash);
-
- while (lock) {
- space = buf_frame_get_space_id(ptr);
- page_no = buf_frame_get_page_no(ptr);
-
- if ((lock->un_member.rec_lock.space == space)
- && (lock->un_member.rec_lock.page_no == page_no)) {
-
- break;
- }
-
- lock = HASH_GET_NEXT(hash, lock);
- }
-
- return(lock);
-}
-
-/*************************************************************************
-Gets the next explicit lock request on a record. */
-UNIV_INLINE
-lock_t*
-lock_rec_get_next(
-/*==============*/
- /* out: next lock, NULL if none exists */
- rec_t* rec, /* in: record on a page */
- lock_t* lock) /* in: lock */
-{
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(lock_get_type(lock) == LOCK_REC);
-
- if (page_rec_is_comp(rec)) {
- do {
- lock = lock_rec_get_next_on_page(lock);
- } while (lock && !lock_rec_get_nth_bit(
- lock, rec_get_heap_no(rec, TRUE)));
- } else {
- do {
- lock = lock_rec_get_next_on_page(lock);
- } while (lock && !lock_rec_get_nth_bit(
- lock, rec_get_heap_no(rec, FALSE)));
- }
-
- return(lock);
-}
-
-/*************************************************************************
-Gets the first explicit lock request on a record. */
-UNIV_INLINE
-lock_t*
-lock_rec_get_first(
-/*===============*/
- /* out: first lock, NULL if none exists */
- rec_t* rec) /* in: record on a page */
-{
- lock_t* lock;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- lock = lock_rec_get_first_on_page(rec);
- if (UNIV_LIKELY_NULL(lock)) {
- ulint heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec));
-
- while (lock && !lock_rec_get_nth_bit(lock, heap_no)) {
- lock = lock_rec_get_next_on_page(lock);
- }
- }
-
- return(lock);
-}
-
-/*************************************************************************
-Resets the record lock bitmap to zero. NOTE: does not touch the wait_lock
-pointer in the transaction! This function is used in lock object creation
-and resetting. */
-static
-void
-lock_rec_bitmap_reset(
-/*==================*/
- lock_t* lock) /* in: record lock */
-{
- byte* ptr;
- ulint n_bytes;
- ulint i;
-
- ut_ad(lock_get_type(lock) == LOCK_REC);
-
- /* Reset to zero the bitmap which resides immediately after the lock
- struct */
-
- ptr = (byte*)lock + sizeof(lock_t);
-
- n_bytes = lock_rec_get_n_bits(lock) / 8;
-
- ut_ad((lock_rec_get_n_bits(lock) % 8) == 0);
-
- for (i = 0; i < n_bytes; i++) {
-
- *ptr = 0;
- ptr++;
- }
-}
-
-/*************************************************************************
-Copies a record lock to heap. */
-static
-lock_t*
-lock_rec_copy(
-/*==========*/
- /* out: copy of lock */
- lock_t* lock, /* in: record lock */
- mem_heap_t* heap) /* in: memory heap */
-{
- lock_t* dupl_lock;
- ulint size;
-
- ut_ad(lock_get_type(lock) == LOCK_REC);
-
- size = sizeof(lock_t) + lock_rec_get_n_bits(lock) / 8;
-
- dupl_lock = mem_heap_alloc(heap, size);
-
- ut_memcpy(dupl_lock, lock, size);
-
- return(dupl_lock);
-}
-
-/*************************************************************************
-Gets the previous record lock set on a record. */
-
-lock_t*
-lock_rec_get_prev(
-/*==============*/
- /* out: previous lock on the same record, NULL if
- none exists */
- lock_t* in_lock,/* in: record lock */
- ulint heap_no)/* in: heap number of the record */
-{
- lock_t* lock;
- ulint space;
- ulint page_no;
- lock_t* found_lock = NULL;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(lock_get_type(in_lock) == LOCK_REC);
-
- space = in_lock->un_member.rec_lock.space;
- page_no = in_lock->un_member.rec_lock.page_no;
-
- lock = lock_rec_get_first_on_page_addr(space, page_no);
-
- for (;;) {
- ut_ad(lock);
-
- if (lock == in_lock) {
-
- return(found_lock);
- }
-
- if (lock_rec_get_nth_bit(lock, heap_no)) {
-
- found_lock = lock;
- }
-
- lock = lock_rec_get_next_on_page(lock);
- }
-}
-
-/*============= FUNCTIONS FOR ANALYZING TABLE LOCK QUEUE ================*/
-
-/*************************************************************************
-Checks if a transaction has the specified table lock, or stronger. */
-UNIV_INLINE
-lock_t*
-lock_table_has(
-/*===========*/
- /* out: lock or NULL */
- trx_t* trx, /* in: transaction */
- dict_table_t* table, /* in: table */
- ulint mode) /* in: lock mode */
-{
- lock_t* lock;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- /* Look for stronger locks the same trx already has on the table */
-
- lock = UT_LIST_GET_LAST(table->locks);
-
- while (lock != NULL) {
-
- if (lock->trx == trx
- && lock_mode_stronger_or_eq(lock_get_mode(lock), mode)) {
-
- /* The same trx already has locked the table in
- a mode stronger or equal to the mode given */
-
- ut_ad(!lock_get_wait(lock));
-
- return(lock);
- }
-
- lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock);
- }
-
- return(NULL);
-}
-
-/*============= FUNCTIONS FOR ANALYZING RECORD LOCK QUEUE ================*/
-
-/*************************************************************************
-Checks if a transaction has a GRANTED explicit lock on rec stronger or equal
-to precise_mode. */
-UNIV_INLINE
-lock_t*
-lock_rec_has_expl(
-/*==============*/
- /* out: lock or NULL */
- ulint precise_mode,/* in: LOCK_S or LOCK_X possibly ORed to
- LOCK_GAP or LOCK_REC_NOT_GAP,
- for a supremum record we regard this always a gap
- type request */
- rec_t* rec, /* in: record */
- trx_t* trx) /* in: transaction */
-{
- lock_t* lock;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad((precise_mode & LOCK_MODE_MASK) == LOCK_S
- || (precise_mode & LOCK_MODE_MASK) == LOCK_X);
- ut_ad(!(precise_mode & LOCK_INSERT_INTENTION));
-
- lock = lock_rec_get_first(rec);
-
- while (lock) {
- if (lock->trx == trx
- && lock_mode_stronger_or_eq(lock_get_mode(lock),
- precise_mode & LOCK_MODE_MASK)
- && !lock_get_wait(lock)
- && (!lock_rec_get_rec_not_gap(lock)
- || (precise_mode & LOCK_REC_NOT_GAP)
- || page_rec_is_supremum(rec))
- && (!lock_rec_get_gap(lock)
- || (precise_mode & LOCK_GAP)
- || page_rec_is_supremum(rec))
- && (!lock_rec_get_insert_intention(lock))) {
-
- return(lock);
- }
-
- lock = lock_rec_get_next(rec, lock);
- }
-
- return(NULL);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*************************************************************************
-Checks if some other transaction has a lock request in the queue. */
-static
-lock_t*
-lock_rec_other_has_expl_req(
-/*========================*/
- /* out: lock or NULL */
- ulint mode, /* in: LOCK_S or LOCK_X */
- ulint gap, /* in: LOCK_GAP if also gap locks are taken
- into account, or 0 if not */
- ulint wait, /* in: LOCK_WAIT if also waiting locks are
- taken into account, or 0 if not */
- rec_t* rec, /* in: record to look at */
- trx_t* trx) /* in: transaction, or NULL if requests by all
- transactions are taken into account */
-{
- lock_t* lock;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(mode == LOCK_X || mode == LOCK_S);
- ut_ad(gap == 0 || gap == LOCK_GAP);
- ut_ad(wait == 0 || wait == LOCK_WAIT);
-
- lock = lock_rec_get_first(rec);
-
- while (lock) {
- if (lock->trx != trx
- && (gap
- || !(lock_rec_get_gap(lock)
- || page_rec_is_supremum(rec)))
- && (wait || !lock_get_wait(lock))
- && lock_mode_stronger_or_eq(lock_get_mode(lock), mode)) {
-
- return(lock);
- }
-
- lock = lock_rec_get_next(rec, lock);
- }
-
- return(NULL);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*************************************************************************
-Checks if some other transaction has a conflicting explicit lock request
-in the queue, so that we have to wait. */
-static
-lock_t*
-lock_rec_other_has_conflicting(
-/*===========================*/
- /* out: lock or NULL */
- ulint mode, /* in: LOCK_S or LOCK_X,
- possibly ORed to LOCK_GAP or LOC_REC_NOT_GAP,
- LOCK_INSERT_INTENTION */
- rec_t* rec, /* in: record to look at */
- trx_t* trx) /* in: our transaction */
-{
- lock_t* lock;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- lock = lock_rec_get_first(rec);
-
- while (lock) {
- if (lock_rec_has_to_wait(trx, mode, lock,
- page_rec_is_supremum(rec))) {
-
- return(lock);
- }
-
- lock = lock_rec_get_next(rec, lock);
- }
-
- return(NULL);
-}
-
-/*************************************************************************
-Looks for a suitable type record lock struct by the same trx on the same page.
-This can be used to save space when a new record lock should be set on a page:
-no new struct is needed, if a suitable old is found. */
-UNIV_INLINE
-lock_t*
-lock_rec_find_similar_on_page(
-/*==========================*/
- /* out: lock or NULL */
- ulint type_mode, /* in: lock type_mode field */
- rec_t* rec, /* in: record */
- trx_t* trx) /* in: transaction */
-{
- lock_t* lock;
- ulint heap_no;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec));
- lock = lock_rec_get_first_on_page(rec);
-
- while (lock != NULL) {
- if (lock->trx == trx
- && lock->type_mode == type_mode
- && lock_rec_get_n_bits(lock) > heap_no) {
-
- return(lock);
- }
-
- lock = lock_rec_get_next_on_page(lock);
- }
-
- return(NULL);
-}
-
-/*************************************************************************
-Checks if some transaction has an implicit x-lock on a record in a secondary
-index. */
-
-trx_t*
-lock_sec_rec_some_has_impl_off_kernel(
-/*==================================*/
- /* out: transaction which has the x-lock, or
- NULL */
- rec_t* rec, /* in: user record */
- dict_index_t* index, /* in: secondary index */
- const ulint* offsets)/* in: rec_get_offsets(rec, index) */
-{
- page_t* page;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(!(index->type & DICT_CLUSTERED));
- ut_ad(page_rec_is_user_rec(rec));
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- page = buf_frame_align(rec);
-
- /* Some transaction may have an implicit x-lock on the record only
- if the max trx id for the page >= min trx id for the trx list, or
- database recovery is running. We do not write the changes of a page
- max trx id to the log, and therefore during recovery, this value
- for a page may be incorrect. */
-
- if (!(ut_dulint_cmp(page_get_max_trx_id(page),
- trx_list_get_min_trx_id()) >= 0)
- && !recv_recovery_is_on()) {
-
- return(NULL);
- }
-
- /* Ok, in this case it is possible that some transaction has an
- implicit x-lock. We have to look in the clustered index. */
-
- if (!lock_check_trx_id_sanity(page_get_max_trx_id(page),
- rec, index, offsets, TRUE)) {
- buf_page_print(page);
-
- /* The page is corrupt: try to avoid a crash by returning
- NULL */
- return(NULL);
- }
-
- return(row_vers_impl_x_locked_off_kernel(rec, index, offsets));
-}
-
-/*************************************************************************
-Return approximate number or record locks (bits set in the bitmap) for
-this transaction. Since delete-marked records may be removed, the
-record count will not be precise. */
-
-ulint
-lock_number_of_rows_locked(
-/*=======================*/
- trx_t* trx) /* in: transaction */
-{
- lock_t* lock;
- ulint n_records = 0;
- ulint n_bits;
- ulint n_bit;
-
- lock = UT_LIST_GET_FIRST(trx->trx_locks);
-
- while (lock) {
- if (lock_get_type(lock) == LOCK_REC) {
- n_bits = lock_rec_get_n_bits(lock);
-
- for (n_bit = 0; n_bit < n_bits; n_bit++) {
- if (lock_rec_get_nth_bit(lock, n_bit)) {
- n_records++;
- }
- }
- }
-
- lock = UT_LIST_GET_NEXT(trx_locks, lock);
- }
-
- return (n_records);
-}
-
-/*============== RECORD LOCK CREATION AND QUEUE MANAGEMENT =============*/
-
-/*************************************************************************
-Creates a new record lock and inserts it to the lock queue. Does NOT check
-for deadlocks or lock compatibility! */
-static
-lock_t*
-lock_rec_create(
-/*============*/
- /* out: created lock */
- ulint type_mode,/* in: lock mode and wait flag, type is
- ignored and replaced by LOCK_REC */
- rec_t* rec, /* in: record on page */
- dict_index_t* index, /* in: index of record */
- trx_t* trx) /* in: transaction */
-{
- page_t* page;
- lock_t* lock;
- ulint page_no;
- ulint heap_no;
- ulint space;
- ulint n_bits;
- ulint n_bytes;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- page = buf_frame_align(rec);
- space = buf_frame_get_space_id(page);
- page_no = buf_frame_get_page_no(page);
- heap_no = rec_get_heap_no(rec, page_is_comp(page));
-
- ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
-
- /* If rec is the supremum record, then we reset the gap and
- LOCK_REC_NOT_GAP bits, as all locks on the supremum are
- automatically of the gap type */
-
- if (rec == page_get_supremum_rec(page)) {
- ut_ad(!(type_mode & LOCK_REC_NOT_GAP));
-
- type_mode = type_mode & ~(LOCK_GAP | LOCK_REC_NOT_GAP);
- }
-
- /* Make lock bitmap bigger by a safety margin */
- n_bits = page_dir_get_n_heap(page) + LOCK_PAGE_BITMAP_MARGIN;
- n_bytes = 1 + n_bits / 8;
-
- lock = mem_heap_alloc(trx->lock_heap, sizeof(lock_t) + n_bytes);
-
- UT_LIST_ADD_LAST(trx_locks, trx->trx_locks, lock);
-
- lock->trx = trx;
-
- lock->type_mode = (type_mode & ~LOCK_TYPE_MASK) | LOCK_REC;
- lock->index = index;
-
- lock->un_member.rec_lock.space = space;
- lock->un_member.rec_lock.page_no = page_no;
- lock->un_member.rec_lock.n_bits = n_bytes * 8;
-
- /* Reset to zero the bitmap which resides immediately after the
- lock struct */
-
- lock_rec_bitmap_reset(lock);
-
- /* Set the bit corresponding to rec */
- lock_rec_set_nth_bit(lock, heap_no);
-
- HASH_INSERT(lock_t, hash, lock_sys->rec_hash,
- lock_rec_fold(space, page_no), lock);
- if (type_mode & LOCK_WAIT) {
-
- lock_set_lock_and_trx_wait(lock, trx);
- }
-
- return(lock);
-}
-
-/*************************************************************************
-Enqueues a waiting request for a lock which cannot be granted immediately.
-Checks for deadlocks. */
-static
-ulint
-lock_rec_enqueue_waiting(
-/*=====================*/
- /* out: DB_LOCK_WAIT, DB_DEADLOCK, or
- DB_QUE_THR_SUSPENDED, or DB_SUCCESS;
- DB_SUCCESS means that there was a deadlock,
- but another transaction was chosen as a
- victim, and we got the lock immediately:
- no need to wait then */
- ulint type_mode,/* in: lock mode this transaction is
- requesting: LOCK_S or LOCK_X, possibly ORed
- with LOCK_GAP or LOCK_REC_NOT_GAP, ORed
- with LOCK_INSERT_INTENTION if this waiting
- lock request is set when performing an
- insert of an index record */
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: index of record */
- que_thr_t* thr) /* in: query thread */
-{
- lock_t* lock;
- trx_t* trx;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- /* Test if there already is some other reason to suspend thread:
- we do not enqueue a lock request if the query thread should be
- stopped anyway */
-
- if (que_thr_stop(thr)) {
-
- ut_error;
-
- return(DB_QUE_THR_SUSPENDED);
- }
-
- trx = thr_get_trx(thr);
-
- if (trx->dict_operation) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: a record lock wait happens"
- " in a dictionary operation!\n"
- "InnoDB: Table name ", stderr);
- ut_print_name(stderr, trx, TRUE, index->table_name);
- fputs(".\n"
- "InnoDB: Submit a detailed bug report"
- " to http://bugs.mysql.com\n",
- stderr);
- }
-
- /* Enqueue the lock request that will wait to be granted */
- lock = lock_rec_create(type_mode | LOCK_WAIT, rec, index, trx);
-
- /* Check if a deadlock occurs: if yes, remove the lock request and
- return an error code */
-
- if (lock_deadlock_occurs(lock, trx)) {
-
- lock_reset_lock_and_trx_wait(lock);
- lock_rec_reset_nth_bit(lock, rec_get_heap_no(
- rec, page_rec_is_comp(rec)));
-
- return(DB_DEADLOCK);
- }
-
- /* If there was a deadlock but we chose another transaction as a
- victim, it is possible that we already have the lock now granted! */
-
- if (trx->wait_lock == NULL) {
-
- return(DB_SUCCESS);
- }
-
- trx->que_state = TRX_QUE_LOCK_WAIT;
- trx->was_chosen_as_deadlock_victim = FALSE;
- trx->wait_started = time(NULL);
-
- ut_a(que_thr_stop(thr));
-
-#ifdef UNIV_DEBUG
- if (lock_print_waits) {
- fprintf(stderr, "Lock wait for trx %lu in index ",
- (ulong) ut_dulint_get_low(trx->id));
- ut_print_name(stderr, trx, FALSE, index->name);
- }
-#endif /* UNIV_DEBUG */
-
- return(DB_LOCK_WAIT);
-}
-
-/*************************************************************************
-Adds a record lock request in the record queue. The request is normally
-added as the last in the queue, but if there are no waiting lock requests
-on the record, and the request to be added is not a waiting request, we
-can reuse a suitable record lock object already existing on the same page,
-just setting the appropriate bit in its bitmap. This is a low-level function
-which does NOT check for deadlocks or lock compatibility! */
-static
-lock_t*
-lock_rec_add_to_queue(
-/*==================*/
- /* out: lock where the bit was set */
- ulint type_mode,/* in: lock mode, wait, gap etc. flags;
- type is ignored and replaced by LOCK_REC */
- rec_t* rec, /* in: record on page */
- dict_index_t* index, /* in: index of record */
- trx_t* trx) /* in: transaction */
-{
- lock_t* lock;
- lock_t* similar_lock = NULL;
- ulint heap_no;
- ibool somebody_waits = FALSE;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad((type_mode & (LOCK_WAIT | LOCK_GAP))
- || ((type_mode & LOCK_MODE_MASK) != LOCK_S)
- || !lock_rec_other_has_expl_req(LOCK_X, 0, LOCK_WAIT,
- rec, trx));
- ut_ad((type_mode & (LOCK_WAIT | LOCK_GAP))
- || ((type_mode & LOCK_MODE_MASK) != LOCK_X)
- || !lock_rec_other_has_expl_req(LOCK_S, 0, LOCK_WAIT,
- rec, trx));
-
- type_mode = type_mode | LOCK_REC;
-
- /* If rec is the supremum record, then we can reset the gap bit, as
- all locks on the supremum are automatically of the gap type, and we
- try to avoid unnecessary memory consumption of a new record lock
- struct for a gap type lock */
-
- if (page_rec_is_supremum(rec)) {
- ut_ad(!(type_mode & LOCK_REC_NOT_GAP));
-
- /* There should never be LOCK_REC_NOT_GAP on a supremum
- record, but let us play safe */
-
- type_mode = type_mode & ~(LOCK_GAP | LOCK_REC_NOT_GAP);
- }
-
- /* Look for a waiting lock request on the same record or on a gap */
-
- heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec));
- lock = lock_rec_get_first_on_page(rec);
-
- while (lock != NULL) {
- if (lock_get_wait(lock)
- && (lock_rec_get_nth_bit(lock, heap_no))) {
-
- somebody_waits = TRUE;
- }
-
- lock = lock_rec_get_next_on_page(lock);
- }
-
- /* Look for a similar record lock on the same page: if one is found
- and there are no waiting lock requests, we can just set the bit */
-
- similar_lock = lock_rec_find_similar_on_page(type_mode, rec, trx);
-
- if (similar_lock && !somebody_waits && !(type_mode & LOCK_WAIT)) {
-
- lock_rec_set_nth_bit(similar_lock, heap_no);
-
- return(similar_lock);
- }
-
- return(lock_rec_create(type_mode, rec, index, trx));
-}
-
-/*************************************************************************
-This is a fast routine for locking a record in the most common cases:
-there are no explicit locks on the page, or there is just one lock, owned
-by this transaction, and of the right type_mode. This is a low-level function
-which does NOT look at implicit locks! Checks lock compatibility within
-explicit locks. This function sets a normal next-key lock, or in the case of
-a page supremum record, a gap type lock. */
-UNIV_INLINE
-ibool
-lock_rec_lock_fast(
-/*===============*/
- /* out: TRUE if locking succeeded */
- ibool impl, /* in: if TRUE, no lock is set if no wait
- is necessary: we assume that the caller will
- set an implicit lock */
- ulint mode, /* in: lock mode: LOCK_X or LOCK_S possibly
- ORed to either LOCK_GAP or LOCK_REC_NOT_GAP */
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: index of record */
- que_thr_t* thr) /* in: query thread */
-{
- lock_t* lock;
- ulint heap_no;
- trx_t* trx;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
- || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
- ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
- || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
- ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
- || (LOCK_MODE_MASK & mode) == LOCK_X);
- ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
- || mode - (LOCK_MODE_MASK & mode) == 0
- || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
-
- heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec));
-
- lock = lock_rec_get_first_on_page(rec);
-
- trx = thr_get_trx(thr);
-
- if (lock == NULL) {
- if (!impl) {
- lock_rec_create(mode, rec, index, trx);
- }
-
- return(TRUE);
- }
-
- if (lock_rec_get_next_on_page(lock)) {
-
- return(FALSE);
- }
-
- if (lock->trx != trx
- || lock->type_mode != (mode | LOCK_REC)
- || lock_rec_get_n_bits(lock) <= heap_no) {
-
- return(FALSE);
- }
-
- if (!impl) {
- /* If the nth bit of the record lock is already set then we
- do not set a new lock bit, otherwise we do set */
-
- if (!lock_rec_get_nth_bit(lock, heap_no)) {
- lock_rec_set_nth_bit(lock, heap_no);
- }
- }
-
- return(TRUE);
-}
-
-/*************************************************************************
-This is the general, and slower, routine for locking a record. This is a
-low-level function which does NOT look at implicit locks! Checks lock
-compatibility within explicit locks. This function sets a normal next-key
-lock, or in the case of a page supremum record, a gap type lock. */
-static
-ulint
-lock_rec_lock_slow(
-/*===============*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT, or error
- code */
- ibool impl, /* in: if TRUE, no lock is set if no wait is
- necessary: we assume that the caller will set
- an implicit lock */
- ulint mode, /* in: lock mode: LOCK_X or LOCK_S possibly
- ORed to either LOCK_GAP or LOCK_REC_NOT_GAP */
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: index of record */
- que_thr_t* thr) /* in: query thread */
-{
- trx_t* trx;
- ulint err;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
- || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
- ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
- || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
- ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
- || (LOCK_MODE_MASK & mode) == LOCK_X);
- ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
- || mode - (LOCK_MODE_MASK & mode) == 0
- || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
-
- trx = thr_get_trx(thr);
-
- if (lock_rec_has_expl(mode, rec, trx)) {
- /* The trx already has a strong enough lock on rec: do
- nothing */
-
- err = DB_SUCCESS;
- } else if (lock_rec_other_has_conflicting(mode, rec, trx)) {
-
- /* If another transaction has a non-gap conflicting request in
- the queue, as this transaction does not have a lock strong
- enough already granted on the record, we have to wait. */
-
- err = lock_rec_enqueue_waiting(mode, rec, index, thr);
- } else {
- if (!impl) {
- /* Set the requested lock on the record */
-
- lock_rec_add_to_queue(LOCK_REC | mode, rec, index,
- trx);
- }
-
- err = DB_SUCCESS;
- }
-
- return(err);
-}
-
-/*************************************************************************
-Tries to lock the specified record in the mode requested. If not immediately
-possible, enqueues a waiting lock request. This is a low-level function
-which does NOT look at implicit locks! Checks lock compatibility within
-explicit locks. This function sets a normal next-key lock, or in the case
-of a page supremum record, a gap type lock. */
-static
-ulint
-lock_rec_lock(
-/*==========*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT, or error
- code */
- ibool impl, /* in: if TRUE, no lock is set if no wait is
- necessary: we assume that the caller will set
- an implicit lock */
- ulint mode, /* in: lock mode: LOCK_X or LOCK_S possibly
- ORed to either LOCK_GAP or LOCK_REC_NOT_GAP */
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: index of record */
- que_thr_t* thr) /* in: query thread */
-{
- ulint err;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
- || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
- ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
- || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
- ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
- || (LOCK_MODE_MASK & mode) == LOCK_X);
- ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
- || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP
- || mode - (LOCK_MODE_MASK & mode) == 0);
-
- if (lock_rec_lock_fast(impl, mode, rec, index, thr)) {
-
- /* We try a simplified and faster subroutine for the most
- common cases */
-
- err = DB_SUCCESS;
- } else {
- err = lock_rec_lock_slow(impl, mode, rec, index, thr);
- }
-
- return(err);
-}
-
-/*************************************************************************
-Checks if a waiting record lock request still has to wait in a queue. */
-static
-ibool
-lock_rec_has_to_wait_in_queue(
-/*==========================*/
- /* out: TRUE if still has to wait */
- lock_t* wait_lock) /* in: waiting record lock */
-{
- lock_t* lock;
- ulint space;
- ulint page_no;
- ulint heap_no;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(lock_get_wait(wait_lock));
- ut_ad(lock_get_type(wait_lock) == LOCK_REC);
-
- space = wait_lock->un_member.rec_lock.space;
- page_no = wait_lock->un_member.rec_lock.page_no;
- heap_no = lock_rec_find_set_bit(wait_lock);
-
- lock = lock_rec_get_first_on_page_addr(space, page_no);
-
- while (lock != wait_lock) {
-
- if (lock_rec_get_nth_bit(lock, heap_no)
- && lock_has_to_wait(wait_lock, lock)) {
-
- return(TRUE);
- }
-
- lock = lock_rec_get_next_on_page(lock);
- }
-
- return(FALSE);
-}
-
-/*****************************************************************
-Grants a lock to a waiting lock request and releases the waiting
-transaction. */
-static
-void
-lock_grant(
-/*=======*/
- lock_t* lock) /* in: waiting lock request */
-{
- ut_ad(mutex_own(&kernel_mutex));
-
- lock_reset_lock_and_trx_wait(lock);
-
- if (lock_get_mode(lock) == LOCK_AUTO_INC) {
-
- if (lock->trx->auto_inc_lock != NULL) {
- fprintf(stderr,
- "InnoDB: Error: trx already had"
- " an AUTO-INC lock!\n");
- }
-
- /* Store pointer to lock to trx so that we know to
- release it at the end of the SQL statement */
-
- lock->trx->auto_inc_lock = lock;
- }
-
-#ifdef UNIV_DEBUG
- if (lock_print_waits) {
- fprintf(stderr, "Lock wait for trx %lu ends\n",
- (ulong) ut_dulint_get_low(lock->trx->id));
- }
-#endif /* UNIV_DEBUG */
-
- /* If we are resolving a deadlock by choosing another transaction
- as a victim, then our original transaction may not be in the
- TRX_QUE_LOCK_WAIT state, and there is no need to end the lock wait
- for it */
-
- if (lock->trx->que_state == TRX_QUE_LOCK_WAIT) {
- trx_end_lock_wait(lock->trx);
- }
-}
-
-/*****************************************************************
-Cancels a waiting record lock request and releases the waiting transaction
-that requested it. NOTE: does NOT check if waiting lock requests behind this
-one can now be granted! */
-static
-void
-lock_rec_cancel(
-/*============*/
- lock_t* lock) /* in: waiting record lock request */
-{
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(lock_get_type(lock) == LOCK_REC);
-
- /* Reset the bit (there can be only one set bit) in the lock bitmap */
- lock_rec_reset_nth_bit(lock, lock_rec_find_set_bit(lock));
-
- /* Reset the wait flag and the back pointer to lock in trx */
-
- lock_reset_lock_and_trx_wait(lock);
-
- /* The following function releases the trx from lock wait */
-
- trx_end_lock_wait(lock->trx);
-}
-
-/*****************************************************************
-Removes a record lock request, waiting or granted, from the queue and
-grants locks to other transactions in the queue if they now are entitled
-to a lock. NOTE: all record locks contained in in_lock are removed. */
-static
-void
-lock_rec_dequeue_from_page(
-/*=======================*/
- lock_t* in_lock)/* in: record lock object: all record locks which
- are contained in this lock object are removed;
- transactions waiting behind will get their lock
- requests granted, if they are now qualified to it */
-{
- ulint space;
- ulint page_no;
- lock_t* lock;
- trx_t* trx;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(lock_get_type(in_lock) == LOCK_REC);
-
- trx = in_lock->trx;
-
- space = in_lock->un_member.rec_lock.space;
- page_no = in_lock->un_member.rec_lock.page_no;
-
- HASH_DELETE(lock_t, hash, lock_sys->rec_hash,
- lock_rec_fold(space, page_no), in_lock);
-
- UT_LIST_REMOVE(trx_locks, trx->trx_locks, in_lock);
-
- /* Check if waiting locks in the queue can now be granted: grant
- locks if there are no conflicting locks ahead. */
-
- lock = lock_rec_get_first_on_page_addr(space, page_no);
-
- while (lock != NULL) {
- if (lock_get_wait(lock)
- && !lock_rec_has_to_wait_in_queue(lock)) {
-
- /* Grant the lock */
- lock_grant(lock);
- }
-
- lock = lock_rec_get_next_on_page(lock);
- }
-}
-
-/*****************************************************************
-Removes a record lock request, waiting or granted, from the queue. */
-static
-void
-lock_rec_discard(
-/*=============*/
- lock_t* in_lock)/* in: record lock object: all record locks which
- are contained in this lock object are removed */
-{
- ulint space;
- ulint page_no;
- trx_t* trx;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(lock_get_type(in_lock) == LOCK_REC);
-
- trx = in_lock->trx;
-
- space = in_lock->un_member.rec_lock.space;
- page_no = in_lock->un_member.rec_lock.page_no;
-
- HASH_DELETE(lock_t, hash, lock_sys->rec_hash,
- lock_rec_fold(space, page_no), in_lock);
-
- UT_LIST_REMOVE(trx_locks, trx->trx_locks, in_lock);
-}
-
-/*****************************************************************
-Removes record lock objects set on an index page which is discarded. This
-function does not move locks, or check for waiting locks, therefore the
-lock bitmaps must already be reset when this function is called. */
-static
-void
-lock_rec_free_all_from_discard_page(
-/*================================*/
- page_t* page) /* in: page to be discarded */
-{
- ulint space;
- ulint page_no;
- lock_t* lock;
- lock_t* next_lock;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- space = buf_frame_get_space_id(page);
- page_no = buf_frame_get_page_no(page);
-
- lock = lock_rec_get_first_on_page_addr(space, page_no);
-
- while (lock != NULL) {
- ut_ad(lock_rec_find_set_bit(lock) == ULINT_UNDEFINED);
- ut_ad(!lock_get_wait(lock));
-
- next_lock = lock_rec_get_next_on_page(lock);
-
- lock_rec_discard(lock);
-
- lock = next_lock;
- }
-}
-
-/*============= RECORD LOCK MOVING AND INHERITING ===================*/
-
-/*****************************************************************
-Resets the lock bits for a single record. Releases transactions waiting for
-lock requests here. */
-static
-void
-lock_rec_reset_and_release_wait(
-/*============================*/
- rec_t* rec) /* in: record whose locks bits should be reset */
-{
- lock_t* lock;
- ulint heap_no;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec));
-
- lock = lock_rec_get_first(rec);
-
- while (lock != NULL) {
- if (lock_get_wait(lock)) {
- lock_rec_cancel(lock);
- } else {
- lock_rec_reset_nth_bit(lock, heap_no);
- }
-
- lock = lock_rec_get_next(rec, lock);
- }
-}
-
-/*****************************************************************
-Makes a record to inherit the locks (except LOCK_INSERT_INTENTION type)
-of another record as gap type locks, but does not reset the lock bits of
-the other record. Also waiting lock requests on rec are inherited as
-GRANTED gap locks. */
-
-void
-lock_rec_inherit_to_gap(
-/*====================*/
- rec_t* heir, /* in: record which inherits */
- rec_t* rec) /* in: record from which inherited; does NOT reset
- the locks on this record */
-{
- lock_t* lock;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- lock = lock_rec_get_first(rec);
-
- /* If srv_locks_unsafe_for_binlog is TRUE or session is using
- READ COMMITTED isolation level, we do not want locks set
- by an UPDATE or a DELETE to be inherited as gap type locks. But we
- DO want S-locks set by a consistency constraint to be inherited also
- then. */
-
- while (lock != NULL) {
- if (!lock_rec_get_insert_intention(lock)
- && !((srv_locks_unsafe_for_binlog
- || lock->trx->isolation_level
- == TRX_ISO_READ_COMMITTED)
- && lock_get_mode(lock) == LOCK_X)) {
-
- lock_rec_add_to_queue(LOCK_REC | lock_get_mode(lock)
- | LOCK_GAP,
- heir, lock->index, lock->trx);
- }
-
- lock = lock_rec_get_next(rec, lock);
- }
-}
-
-/*****************************************************************
-Makes a record to inherit the gap locks (except LOCK_INSERT_INTENTION type)
-of another record as gap type locks, but does not reset the lock bits of the
-other record. Also waiting lock requests are inherited as GRANTED gap locks. */
-static
-void
-lock_rec_inherit_to_gap_if_gap_lock(
-/*================================*/
- rec_t* heir, /* in: record which inherits */
- rec_t* rec) /* in: record from which inherited; does NOT reset
- the locks on this record */
-{
- lock_t* lock;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- lock = lock_rec_get_first(rec);
-
- while (lock != NULL) {
- if (!lock_rec_get_insert_intention(lock)
- && (page_rec_is_supremum(rec)
- || !lock_rec_get_rec_not_gap(lock))) {
-
- lock_rec_add_to_queue(LOCK_REC | lock_get_mode(lock)
- | LOCK_GAP,
- heir, lock->index, lock->trx);
- }
-
- lock = lock_rec_get_next(rec, lock);
- }
-}
-
-/*****************************************************************
-Moves the locks of a record to another record and resets the lock bits of
-the donating record. */
-static
-void
-lock_rec_move(
-/*==========*/
- rec_t* receiver, /* in: record which gets locks; this record
- must have no lock requests on it! */
- rec_t* donator, /* in: record which gives locks */
- ulint comp) /* in: nonzero=compact page format */
-{
- lock_t* lock;
- ulint heap_no;
- ulint type_mode;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- heap_no = rec_get_heap_no(donator, comp);
-
- lock = lock_rec_get_first(donator);
-
- ut_ad(lock_rec_get_first(receiver) == NULL);
-
- while (lock != NULL) {
- type_mode = lock->type_mode;
-
- lock_rec_reset_nth_bit(lock, heap_no);
-
- if (lock_get_wait(lock)) {
- lock_reset_lock_and_trx_wait(lock);
- }
-
- /* Note that we FIRST reset the bit, and then set the lock:
- the function works also if donator == receiver */
-
- lock_rec_add_to_queue(type_mode, receiver, lock->index,
- lock->trx);
- lock = lock_rec_get_next(donator, lock);
- }
-
- ut_ad(lock_rec_get_first(donator) == NULL);
-}
-
-/*****************************************************************
-Updates the lock table when we have reorganized a page. NOTE: we copy
-also the locks set on the infimum of the page; the infimum may carry
-locks if an update of a record is occurring on the page, and its locks
-were temporarily stored on the infimum. */
-
-void
-lock_move_reorganize_page(
-/*======================*/
- page_t* page, /* in: old index page, now reorganized */
- page_t* old_page) /* in: copy of the old, not reorganized page */
-{
- lock_t* lock;
- lock_t* old_lock;
- page_cur_t cur1;
- page_cur_t cur2;
- ulint old_heap_no;
- UT_LIST_BASE_NODE_T(lock_t) old_locks;
- mem_heap_t* heap = NULL;
- rec_t* sup;
- ulint comp;
-
- lock_mutex_enter_kernel();
-
- lock = lock_rec_get_first_on_page(page);
-
- if (lock == NULL) {
- lock_mutex_exit_kernel();
-
- return;
- }
-
- heap = mem_heap_create(256);
-
- /* Copy first all the locks on the page to heap and reset the
- bitmaps in the original locks; chain the copies of the locks
- using the trx_locks field in them. */
-
- UT_LIST_INIT(old_locks);
-
- while (lock != NULL) {
-
- /* Make a copy of the lock */
- old_lock = lock_rec_copy(lock, heap);
-
- UT_LIST_ADD_LAST(trx_locks, old_locks, old_lock);
-
- /* Reset bitmap of lock */
- lock_rec_bitmap_reset(lock);
-
- if (lock_get_wait(lock)) {
- lock_reset_lock_and_trx_wait(lock);
- }
-
- lock = lock_rec_get_next_on_page(lock);
- }
-
- sup = page_get_supremum_rec(page);
-
- lock = UT_LIST_GET_FIRST(old_locks);
-
- comp = page_is_comp(page);
- ut_ad(comp == page_is_comp(old_page));
-
- while (lock) {
- /* NOTE: we copy also the locks set on the infimum and
- supremum of the page; the infimum may carry locks if an
- update of a record is occurring on the page, and its locks
- were temporarily stored on the infimum */
-
- page_cur_set_before_first(page, &cur1);
- page_cur_set_before_first(old_page, &cur2);
-
- /* Set locks according to old locks */
- for (;;) {
- ut_ad(comp || !memcmp(page_cur_get_rec(&cur1),
- page_cur_get_rec(&cur2),
- rec_get_data_size_old(
- page_cur_get_rec(
- &cur2))));
- old_heap_no = rec_get_heap_no(page_cur_get_rec(&cur2),
- comp);
-
- if (lock_rec_get_nth_bit(lock, old_heap_no)) {
-
- /* NOTE that the old lock bitmap could be too
- small for the new heap number! */
-
- lock_rec_add_to_queue(lock->type_mode,
- page_cur_get_rec(&cur1),
- lock->index, lock->trx);
-
- /* if ((page_cur_get_rec(&cur1) == sup)
- && lock_get_wait(lock)) {
- fprintf(stderr,
- "---\n--\n!!!Lock reorg: supr type %lu\n",
- lock->type_mode);
- } */
- }
-
- if (page_cur_get_rec(&cur1) == sup) {
-
- break;
- }
-
- page_cur_move_to_next(&cur1);
- page_cur_move_to_next(&cur2);
- }
-
- /* Remember that we chained old locks on the trx_locks field */
-
- lock = UT_LIST_GET_NEXT(trx_locks, lock);
- }
-
- lock_mutex_exit_kernel();
-
- mem_heap_free(heap);
-
-#if 0
- ut_ad(lock_rec_validate_page(buf_frame_get_space_id(page),
- buf_frame_get_page_no(page)));
-#endif
-}
-
-/*****************************************************************
-Moves the explicit locks on user records to another page if a record
-list end is moved to another page. */
-
-void
-lock_move_rec_list_end(
-/*===================*/
- page_t* new_page, /* in: index page to move to */
- page_t* page, /* in: index page */
- rec_t* rec) /* in: record on page: this is the
- first record moved */
-{
- lock_t* lock;
- page_cur_t cur1;
- page_cur_t cur2;
- ulint heap_no;
- rec_t* sup;
- ulint type_mode;
- ulint comp;
- ut_ad(page == buf_frame_align(rec));
-
- lock_mutex_enter_kernel();
-
- /* Note: when we move locks from record to record, waiting locks
- and possible granted gap type locks behind them are enqueued in
- the original order, because new elements are inserted to a hash
- table to the end of the hash chain, and lock_rec_add_to_queue
- does not reuse locks if there are waiters in the queue. */
-
- sup = page_get_supremum_rec(page);
-
- lock = lock_rec_get_first_on_page(page);
-
- comp = page_is_comp(page);
-
- while (lock != NULL) {
-
- page_cur_position(rec, &cur1);
-
- if (page_cur_is_before_first(&cur1)) {
- page_cur_move_to_next(&cur1);
- }
-
- page_cur_set_before_first(new_page, &cur2);
- page_cur_move_to_next(&cur2);
-
- /* Copy lock requests on user records to new page and
- reset the lock bits on the old */
-
- while (page_cur_get_rec(&cur1) != sup) {
- ut_ad(comp || !memcmp(page_cur_get_rec(&cur1),
- page_cur_get_rec(&cur2),
- rec_get_data_size_old(
- page_cur_get_rec(
- &cur2))));
- heap_no = rec_get_heap_no(page_cur_get_rec(&cur1),
- comp);
-
- if (lock_rec_get_nth_bit(lock, heap_no)) {
- type_mode = lock->type_mode;
-
- lock_rec_reset_nth_bit(lock, heap_no);
-
- if (lock_get_wait(lock)) {
- lock_reset_lock_and_trx_wait(lock);
- }
-
- lock_rec_add_to_queue(type_mode,
- page_cur_get_rec(&cur2),
- lock->index, lock->trx);
- }
-
- page_cur_move_to_next(&cur1);
- page_cur_move_to_next(&cur2);
- }
-
- lock = lock_rec_get_next_on_page(lock);
- }
-
- lock_mutex_exit_kernel();
-
-#if 0
- ut_ad(lock_rec_validate_page(buf_frame_get_space_id(page),
- buf_frame_get_page_no(page)));
- ut_ad(lock_rec_validate_page(buf_frame_get_space_id(new_page),
- buf_frame_get_page_no(new_page)));
-#endif
-}
-
-/*****************************************************************
-Moves the explicit locks on user records to another page if a record
-list start is moved to another page. */
-
-void
-lock_move_rec_list_start(
-/*=====================*/
- page_t* new_page, /* in: index page to move to */
- page_t* page, /* in: index page */
- rec_t* rec, /* in: record on page: this is the
- first record NOT copied */
- rec_t* old_end) /* in: old previous-to-last record on
- new_page before the records were copied */
-{
- lock_t* lock;
- page_cur_t cur1;
- page_cur_t cur2;
- ulint heap_no;
- ulint type_mode;
- ulint comp;
-
- ut_a(new_page);
-
- lock_mutex_enter_kernel();
-
- lock = lock_rec_get_first_on_page(page);
- comp = page_is_comp(page);
- ut_ad(comp == page_is_comp(new_page));
- ut_ad(page == buf_frame_align(rec));
-
- while (lock != NULL) {
-
- page_cur_set_before_first(page, &cur1);
- page_cur_move_to_next(&cur1);
-
- page_cur_position(old_end, &cur2);
- page_cur_move_to_next(&cur2);
-
- /* Copy lock requests on user records to new page and
- reset the lock bits on the old */
-
- while (page_cur_get_rec(&cur1) != rec) {
- ut_ad(comp || !memcmp(page_cur_get_rec(&cur1),
- page_cur_get_rec(&cur2),
- rec_get_data_size_old(
- page_cur_get_rec(
- &cur2))));
- heap_no = rec_get_heap_no(page_cur_get_rec(&cur1),
- comp);
-
- if (lock_rec_get_nth_bit(lock, heap_no)) {
- type_mode = lock->type_mode;
-
- lock_rec_reset_nth_bit(lock, heap_no);
-
- if (lock_get_wait(lock)) {
- lock_reset_lock_and_trx_wait(lock);
- }
-
- lock_rec_add_to_queue(type_mode,
- page_cur_get_rec(&cur2),
- lock->index, lock->trx);
- }
-
- page_cur_move_to_next(&cur1);
- page_cur_move_to_next(&cur2);
- }
-
- lock = lock_rec_get_next_on_page(lock);
- }
-
- lock_mutex_exit_kernel();
-#if 0
- ut_ad(lock_rec_validate_page(buf_frame_get_space_id(page),
- buf_frame_get_page_no(page)));
- ut_ad(lock_rec_validate_page(buf_frame_get_space_id(new_page),
- buf_frame_get_page_no(new_page)));
-#endif
-}
-
-/*****************************************************************
-Updates the lock table when a page is split to the right. */
-
-void
-lock_update_split_right(
-/*====================*/
- page_t* right_page, /* in: right page */
- page_t* left_page) /* in: left page */
-{
- ulint comp;
- lock_mutex_enter_kernel();
- comp = page_is_comp(left_page);
- ut_ad(comp == page_is_comp(right_page));
-
- /* Move the locks on the supremum of the left page to the supremum
- of the right page */
-
- lock_rec_move(page_get_supremum_rec(right_page),
- page_get_supremum_rec(left_page), comp);
-
- /* Inherit the locks to the supremum of left page from the successor
- of the infimum on right page */
-
- lock_rec_inherit_to_gap(page_get_supremum_rec(left_page),
- page_rec_get_next(
- page_get_infimum_rec(right_page)));
-
- lock_mutex_exit_kernel();
-}
-
-/*****************************************************************
-Updates the lock table when a page is merged to the right. */
-
-void
-lock_update_merge_right(
-/*====================*/
- rec_t* orig_succ, /* in: original successor of infimum
- on the right page before merge */
- page_t* left_page) /* in: merged index page which will be
- discarded */
-{
- lock_mutex_enter_kernel();
-
- /* Inherit the locks from the supremum of the left page to the
- original successor of infimum on the right page, to which the left
- page was merged */
-
- lock_rec_inherit_to_gap(orig_succ, page_get_supremum_rec(left_page));
-
- /* Reset the locks on the supremum of the left page, releasing
- waiting transactions */
-
- lock_rec_reset_and_release_wait(page_get_supremum_rec(left_page));
-
- lock_rec_free_all_from_discard_page(left_page);
-
- lock_mutex_exit_kernel();
-}
-
-/*****************************************************************
-Updates the lock table when the root page is copied to another in
-btr_root_raise_and_insert. Note that we leave lock structs on the
-root page, even though they do not make sense on other than leaf
-pages: the reason is that in a pessimistic update the infimum record
-of the root page will act as a dummy carrier of the locks of the record
-to be updated. */
-
-void
-lock_update_root_raise(
-/*===================*/
- page_t* new_page, /* in: index page to which copied */
- page_t* root) /* in: root page */
-{
- ulint comp;
- lock_mutex_enter_kernel();
- comp = page_is_comp(root);
- ut_ad(comp == page_is_comp(new_page));
-
- /* Move the locks on the supremum of the root to the supremum
- of new_page */
-
- lock_rec_move(page_get_supremum_rec(new_page),
- page_get_supremum_rec(root), comp);
- lock_mutex_exit_kernel();
-}
-
-/*****************************************************************
-Updates the lock table when a page is copied to another and the original page
-is removed from the chain of leaf pages, except if page is the root! */
-
-void
-lock_update_copy_and_discard(
-/*=========================*/
- page_t* new_page, /* in: index page to which copied */
- page_t* page) /* in: index page; NOT the root! */
-{
- ulint comp;
- lock_mutex_enter_kernel();
- comp = page_is_comp(page);
- ut_ad(comp == page_is_comp(new_page));
-
- /* Move the locks on the supremum of the old page to the supremum
- of new_page */
-
- lock_rec_move(page_get_supremum_rec(new_page),
- page_get_supremum_rec(page), comp);
- lock_rec_free_all_from_discard_page(page);
-
- lock_mutex_exit_kernel();
-}
-
-/*****************************************************************
-Updates the lock table when a page is split to the left. */
-
-void
-lock_update_split_left(
-/*===================*/
- page_t* right_page, /* in: right page */
- page_t* left_page) /* in: left page */
-{
- lock_mutex_enter_kernel();
-
- /* Inherit the locks to the supremum of the left page from the
- successor of the infimum on the right page */
-
- lock_rec_inherit_to_gap(page_get_supremum_rec(left_page),
- page_rec_get_next(
- page_get_infimum_rec(right_page)));
-
- lock_mutex_exit_kernel();
-}
-
-/*****************************************************************
-Updates the lock table when a page is merged to the left. */
-
-void
-lock_update_merge_left(
-/*===================*/
- page_t* left_page, /* in: left page to which merged */
- rec_t* orig_pred, /* in: original predecessor of supremum
- on the left page before merge */
- page_t* right_page) /* in: merged index page which will be
- discarded */
-{
- rec_t* left_next_rec;
- rec_t* left_supremum;
- ulint comp;
- lock_mutex_enter_kernel();
- comp = page_is_comp(left_page);
- ut_ad(comp == page_is_comp(right_page));
- ut_ad(left_page == buf_frame_align(orig_pred));
-
- left_next_rec = page_rec_get_next(orig_pred);
- left_supremum = page_get_supremum_rec(left_page);
-
- if (UNIV_LIKELY(left_next_rec != left_supremum)) {
-
- /* Inherit the locks on the supremum of the left page to the
- first record which was moved from the right page */
-
- lock_rec_inherit_to_gap(left_next_rec, left_supremum);
-
- /* Reset the locks on the supremum of the left page,
- releasing waiting transactions */
-
- lock_rec_reset_and_release_wait(left_supremum);
- }
-
- /* Move the locks from the supremum of right page to the supremum
- of the left page */
-
- lock_rec_move(left_supremum, page_get_supremum_rec(right_page), comp);
-
- lock_rec_free_all_from_discard_page(right_page);
-
- lock_mutex_exit_kernel();
-}
-
-/*****************************************************************
-Resets the original locks on heir and replaces them with gap type locks
-inherited from rec. */
-
-void
-lock_rec_reset_and_inherit_gap_locks(
-/*=================================*/
- rec_t* heir, /* in: heir record */
- rec_t* rec) /* in: record */
-{
- mutex_enter(&kernel_mutex);
-
- lock_rec_reset_and_release_wait(heir);
-
- lock_rec_inherit_to_gap(heir, rec);
-
- mutex_exit(&kernel_mutex);
-}
-
-/*****************************************************************
-Updates the lock table when a page is discarded. */
-
-void
-lock_update_discard(
-/*================*/
- rec_t* heir, /* in: record which will inherit the locks */
- page_t* page) /* in: index page which will be discarded */
-{
- rec_t* rec;
-
- lock_mutex_enter_kernel();
-
- if (NULL == lock_rec_get_first_on_page(page)) {
- /* No locks exist on page, nothing to do */
-
- lock_mutex_exit_kernel();
-
- return;
- }
-
- /* Inherit all the locks on the page to the record and reset all
- the locks on the page */
-
- rec = page_get_infimum_rec(page);
-
- for (;;) {
- lock_rec_inherit_to_gap(heir, rec);
-
- /* Reset the locks on rec, releasing waiting transactions */
-
- lock_rec_reset_and_release_wait(rec);
-
- if (page_rec_is_supremum(rec)) {
-
- break;
- }
-
- rec = page_rec_get_next(rec);
- }
-
- lock_rec_free_all_from_discard_page(page);
-
- lock_mutex_exit_kernel();
-}
-
-/*****************************************************************
-Updates the lock table when a new user record is inserted. */
-
-void
-lock_update_insert(
-/*===============*/
- rec_t* rec) /* in: the inserted record */
-{
- lock_mutex_enter_kernel();
-
- /* Inherit the gap-locking locks for rec, in gap mode, from the next
- record */
-
- lock_rec_inherit_to_gap_if_gap_lock(rec, page_rec_get_next(rec));
-
- lock_mutex_exit_kernel();
-}
-
-/*****************************************************************
-Updates the lock table when a record is removed. */
-
-void
-lock_update_delete(
-/*===============*/
- rec_t* rec) /* in: the record to be removed */
-{
- lock_mutex_enter_kernel();
-
- /* Let the next record inherit the locks from rec, in gap mode */
-
- lock_rec_inherit_to_gap(page_rec_get_next(rec), rec);
-
- /* Reset the lock bits on rec and release waiting transactions */
-
- lock_rec_reset_and_release_wait(rec);
-
- lock_mutex_exit_kernel();
-}
-
-/*************************************************************************
-Stores on the page infimum record the explicit locks of another record.
-This function is used to store the lock state of a record when it is
-updated and the size of the record changes in the update. The record
-is moved in such an update, perhaps to another page. The infimum record
-acts as a dummy carrier record, taking care of lock releases while the
-actual record is being moved. */
-
-void
-lock_rec_store_on_page_infimum(
-/*===========================*/
- page_t* page, /* in: page containing the record */
- rec_t* rec) /* in: record whose lock state is stored
- on the infimum record of the same page; lock
- bits are reset on the record */
-{
- ut_ad(page == buf_frame_align(rec));
-
- lock_mutex_enter_kernel();
-
- lock_rec_move(page_get_infimum_rec(page), rec, page_is_comp(page));
-
- lock_mutex_exit_kernel();
-}
-
-/*************************************************************************
-Restores the state of explicit lock requests on a single record, where the
-state was stored on the infimum of the page. */
-
-void
-lock_rec_restore_from_page_infimum(
-/*===============================*/
- rec_t* rec, /* in: record whose lock state is restored */
- page_t* page) /* in: page (rec is not necessarily on this page)
- whose infimum stored the lock state; lock bits are
- reset on the infimum */
-{
- ulint comp;
- lock_mutex_enter_kernel();
- comp = page_is_comp(page);
- ut_ad(!comp == !page_rec_is_comp(rec));
-
- lock_rec_move(rec, page_get_infimum_rec(page), comp);
-
- lock_mutex_exit_kernel();
-}
-
-/*=========== DEADLOCK CHECKING ======================================*/
-
-/************************************************************************
-Checks if a lock request results in a deadlock. */
-static
-ibool
-lock_deadlock_occurs(
-/*=================*/
- /* out: TRUE if a deadlock was detected and we
- chose trx as a victim; FALSE if no deadlock, or
- there was a deadlock, but we chose other
- transaction(s) as victim(s) */
- lock_t* lock, /* in: lock the transaction is requesting */
- trx_t* trx) /* in: transaction */
-{
- dict_table_t* table;
- dict_index_t* index;
- trx_t* mark_trx;
- ulint ret;
- ulint cost = 0;
-
- ut_ad(trx);
- ut_ad(lock);
- ut_ad(mutex_own(&kernel_mutex));
-retry:
- /* We check that adding this trx to the waits-for graph
- does not produce a cycle. First mark all active transactions
- with 0: */
-
- mark_trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
-
- while (mark_trx) {
- mark_trx->deadlock_mark = 0;
- mark_trx = UT_LIST_GET_NEXT(trx_list, mark_trx);
- }
-
- ret = lock_deadlock_recursive(trx, trx, lock, &cost, 0);
-
- if (ret == LOCK_VICTIM_IS_OTHER) {
- /* We chose some other trx as a victim: retry if there still
- is a deadlock */
-
- goto retry;
- }
-
- if (ret == LOCK_VICTIM_IS_START) {
- if (lock_get_type(lock) & LOCK_TABLE) {
- table = lock->un_member.tab_lock.table;
- index = NULL;
- } else {
- index = lock->index;
- table = index->table;
- }
-
- lock_deadlock_found = TRUE;
-
- fputs("*** WE ROLL BACK TRANSACTION (2)\n",
- lock_latest_err_file);
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/************************************************************************
-Looks recursively for a deadlock. */
-static
-ulint
-lock_deadlock_recursive(
-/*====================*/
- /* out: 0 if no deadlock found,
- LOCK_VICTIM_IS_START if there was a deadlock
- and we chose 'start' as the victim,
- LOCK_VICTIM_IS_OTHER if a deadlock
- was found and we chose some other trx as a
- victim: we must do the search again in this
- last case because there may be another
- deadlock! */
- trx_t* start, /* in: recursion starting point */
- trx_t* trx, /* in: a transaction waiting for a lock */
- lock_t* wait_lock, /* in: the lock trx is waiting to be granted */
- ulint* cost, /* in/out: number of calculation steps thus
- far: if this exceeds LOCK_MAX_N_STEPS_...
- we return LOCK_VICTIM_IS_START */
- ulint depth) /* in: recursion depth: if this exceeds
- LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK, we
- return LOCK_VICTIM_IS_START */
-{
- lock_t* lock;
- ulint bit_no = ULINT_UNDEFINED;
- trx_t* lock_trx;
- ulint ret;
-
- ut_a(trx);
- ut_a(start);
- ut_a(wait_lock);
- ut_ad(mutex_own(&kernel_mutex));
-
- if (trx->deadlock_mark == 1) {
- /* We have already exhaustively searched the subtree starting
- from this trx */
-
- return(0);
- }
-
- *cost = *cost + 1;
-
- lock = wait_lock;
-
- if (lock_get_type(wait_lock) == LOCK_REC) {
-
- bit_no = lock_rec_find_set_bit(wait_lock);
-
- ut_a(bit_no != ULINT_UNDEFINED);
- }
-
- /* Look at the locks ahead of wait_lock in the lock queue */
-
- for (;;) {
- if (lock_get_type(lock) & LOCK_TABLE) {
-
- lock = UT_LIST_GET_PREV(un_member.tab_lock.locks,
- lock);
- } else {
- ut_ad(lock_get_type(lock) == LOCK_REC);
- ut_a(bit_no != ULINT_UNDEFINED);
-
- lock = lock_rec_get_prev(lock, bit_no);
- }
-
- if (lock == NULL) {
- /* We can mark this subtree as searched */
- trx->deadlock_mark = 1;
-
- return(FALSE);
- }
-
- if (lock_has_to_wait(wait_lock, lock)) {
-
- ibool too_far
- = depth > LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK
- || *cost > LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK;
-
- lock_trx = lock->trx;
-
- if (lock_trx == start || too_far) {
-
- /* We came back to the recursion starting
- point: a deadlock detected; or we have
- searched the waits-for graph too long */
-
- FILE* ef = lock_latest_err_file;
-
- rewind(ef);
- ut_print_timestamp(ef);
-
- fputs("\n*** (1) TRANSACTION:\n", ef);
-
- trx_print(ef, wait_lock->trx, 3000);
-
- fputs("*** (1) WAITING FOR THIS LOCK"
- " TO BE GRANTED:\n", ef);
-
- if (lock_get_type(wait_lock) == LOCK_REC) {
- lock_rec_print(ef, wait_lock);
- } else {
- lock_table_print(ef, wait_lock);
- }
-
- fputs("*** (2) TRANSACTION:\n", ef);
-
- trx_print(ef, lock->trx, 3000);
-
- fputs("*** (2) HOLDS THE LOCK(S):\n", ef);
-
- if (lock_get_type(lock) == LOCK_REC) {
- lock_rec_print(ef, lock);
- } else {
- lock_table_print(ef, lock);
- }
-
- fputs("*** (2) WAITING FOR THIS LOCK"
- " TO BE GRANTED:\n", ef);
-
- if (lock_get_type(start->wait_lock)
- == LOCK_REC) {
- lock_rec_print(ef, start->wait_lock);
- } else {
- lock_table_print(ef, start->wait_lock);
- }
-#ifdef UNIV_DEBUG
- if (lock_print_waits) {
- fputs("Deadlock detected"
- " or too long search\n",
- stderr);
- }
-#endif /* UNIV_DEBUG */
- if (too_far) {
-
- fputs("TOO DEEP OR LONG SEARCH"
- " IN THE LOCK TABLE"
- " WAITS-FOR GRAPH\n", ef);
-
- return(LOCK_VICTIM_IS_START);
- }
-
- if (trx_weight_cmp(wait_lock->trx,
- start) >= 0) {
- /* Our recursion starting point
- transaction is 'smaller', let us
- choose 'start' as the victim and roll
- back it */
-
- return(LOCK_VICTIM_IS_START);
- }
-
- lock_deadlock_found = TRUE;
-
- /* Let us choose the transaction of wait_lock
- as a victim to try to avoid deadlocking our
- recursion starting point transaction */
-
- fputs("*** WE ROLL BACK TRANSACTION (1)\n",
- ef);
-
- wait_lock->trx->was_chosen_as_deadlock_victim
- = TRUE;
-
- lock_cancel_waiting_and_release(wait_lock);
-
- /* Since trx and wait_lock are no longer
- in the waits-for graph, we can return FALSE;
- note that our selective algorithm can choose
- several transactions as victims, but still
- we may end up rolling back also the recursion
- starting point transaction! */
-
- return(LOCK_VICTIM_IS_OTHER);
- }
-
- if (lock_trx->que_state == TRX_QUE_LOCK_WAIT) {
-
- /* Another trx ahead has requested lock in an
- incompatible mode, and is itself waiting for
- a lock */
-
- ret = lock_deadlock_recursive(
- start, lock_trx,
- lock_trx->wait_lock, cost, depth + 1);
- if (ret != 0) {
-
- return(ret);
- }
- }
- }
- }/* end of the 'for (;;)'-loop */
-}
-
-/*========================= TABLE LOCKS ==============================*/
-
-/*************************************************************************
-Creates a table lock object and adds it as the last in the lock queue
-of the table. Does NOT check for deadlocks or lock compatibility. */
-UNIV_INLINE
-lock_t*
-lock_table_create(
-/*==============*/
- /* out, own: new lock object */
- dict_table_t* table, /* in: database table in dictionary cache */
- ulint type_mode,/* in: lock mode possibly ORed with
- LOCK_WAIT */
- trx_t* trx) /* in: trx */
-{
- lock_t* lock;
-
- ut_ad(table && trx);
- ut_ad(mutex_own(&kernel_mutex));
-
- if ((type_mode & LOCK_MODE_MASK) == LOCK_AUTO_INC) {
- ++table->n_waiting_or_granted_auto_inc_locks;
- }
-
- if (type_mode == LOCK_AUTO_INC) {
- /* Only one trx can have the lock on the table
- at a time: we may use the memory preallocated
- to the table object */
-
- lock = table->auto_inc_lock;
-
- ut_a(trx->auto_inc_lock == NULL);
- trx->auto_inc_lock = lock;
- } else {
- lock = mem_heap_alloc(trx->lock_heap, sizeof(lock_t));
- }
-
- UT_LIST_ADD_LAST(trx_locks, trx->trx_locks, lock);
-
- lock->type_mode = type_mode | LOCK_TABLE;
- lock->trx = trx;
-
- lock->un_member.tab_lock.table = table;
-
- UT_LIST_ADD_LAST(un_member.tab_lock.locks, table->locks, lock);
-
- if (type_mode & LOCK_WAIT) {
-
- lock_set_lock_and_trx_wait(lock, trx);
- }
-
- return(lock);
-}
-
-/*****************************************************************
-Removes a table lock request from the queue and the trx list of locks;
-this is a low-level function which does NOT check if waiting requests
-can now be granted. */
-UNIV_INLINE
-void
-lock_table_remove_low(
-/*==================*/
- lock_t* lock) /* in: table lock */
-{
- dict_table_t* table;
- trx_t* trx;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- table = lock->un_member.tab_lock.table;
- trx = lock->trx;
-
- if (lock == trx->auto_inc_lock) {
- trx->auto_inc_lock = NULL;
-
- ut_a(table->n_waiting_or_granted_auto_inc_locks > 0);
- --table->n_waiting_or_granted_auto_inc_locks;
- }
-
- UT_LIST_REMOVE(trx_locks, trx->trx_locks, lock);
- UT_LIST_REMOVE(un_member.tab_lock.locks, table->locks, lock);
-}
-
-/*************************************************************************
-Enqueues a waiting request for a table lock which cannot be granted
-immediately. Checks for deadlocks. */
-static
-ulint
-lock_table_enqueue_waiting(
-/*=======================*/
- /* out: DB_LOCK_WAIT, DB_DEADLOCK, or
- DB_QUE_THR_SUSPENDED, or DB_SUCCESS;
- DB_SUCCESS means that there was a deadlock,
- but another transaction was chosen as a
- victim, and we got the lock immediately:
- no need to wait then */
- ulint mode, /* in: lock mode this transaction is
- requesting */
- dict_table_t* table, /* in: table */
- que_thr_t* thr) /* in: query thread */
-{
- lock_t* lock;
- trx_t* trx;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- /* Test if there already is some other reason to suspend thread:
- we do not enqueue a lock request if the query thread should be
- stopped anyway */
-
- if (que_thr_stop(thr)) {
- ut_error;
-
- return(DB_QUE_THR_SUSPENDED);
- }
-
- trx = thr_get_trx(thr);
-
- if (trx->dict_operation) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: a table lock wait happens"
- " in a dictionary operation!\n"
- "InnoDB: Table name ", stderr);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs(".\n"
- "InnoDB: Submit a detailed bug report"
- " to http://bugs.mysql.com\n",
- stderr);
- }
-
- /* Enqueue the lock request that will wait to be granted */
-
- lock = lock_table_create(table, mode | LOCK_WAIT, trx);
-
- /* Check if a deadlock occurs: if yes, remove the lock request and
- return an error code */
-
- if (lock_deadlock_occurs(lock, trx)) {
-
- lock_reset_lock_and_trx_wait(lock);
- lock_table_remove_low(lock);
-
- return(DB_DEADLOCK);
- }
-
- if (trx->wait_lock == NULL) {
- /* Deadlock resolution chose another transaction as a victim,
- and we accidentally got our lock granted! */
-
- return(DB_SUCCESS);
- }
-
- trx->que_state = TRX_QUE_LOCK_WAIT;
- trx->was_chosen_as_deadlock_victim = FALSE;
- trx->wait_started = time(NULL);
-
- ut_a(que_thr_stop(thr));
-
- return(DB_LOCK_WAIT);
-}
-
-/*************************************************************************
-Checks if other transactions have an incompatible mode lock request in
-the lock queue. */
-UNIV_INLINE
-ibool
-lock_table_other_has_incompatible(
-/*==============================*/
- trx_t* trx, /* in: transaction, or NULL if all
- transactions should be included */
- ulint wait, /* in: LOCK_WAIT if also waiting locks are
- taken into account, or 0 if not */
- dict_table_t* table, /* in: table */
- ulint mode) /* in: lock mode */
-{
- lock_t* lock;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- lock = UT_LIST_GET_LAST(table->locks);
-
- while (lock != NULL) {
-
- if ((lock->trx != trx)
- && (!lock_mode_compatible(lock_get_mode(lock), mode))
- && (wait || !(lock_get_wait(lock)))) {
-
- return(TRUE);
- }
-
- lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock);
- }
-
- return(FALSE);
-}
-
-/*************************************************************************
-Locks the specified database table in the mode given. If the lock cannot
-be granted immediately, the query thread is put to wait. */
-
-ulint
-lock_table(
-/*=======*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT,
- DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
- ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
- does nothing */
- dict_table_t* table, /* in: database table in dictionary cache */
- ulint mode, /* in: lock mode */
- que_thr_t* thr) /* in: query thread */
-{
- trx_t* trx;
- ulint err;
-
- ut_ad(table && thr);
-
- if (flags & BTR_NO_LOCKING_FLAG) {
-
- return(DB_SUCCESS);
- }
-
- ut_a(flags == 0);
-
- trx = thr_get_trx(thr);
-
- lock_mutex_enter_kernel();
-
- /* Look for stronger locks the same trx already has on the table */
-
- if (lock_table_has(trx, table, mode)) {
-
- lock_mutex_exit_kernel();
-
- return(DB_SUCCESS);
- }
-
- /* We have to check if the new lock is compatible with any locks
- other transactions have in the table lock queue. */
-
- if (lock_table_other_has_incompatible(trx, LOCK_WAIT, table, mode)) {
-
- /* Another trx has a request on the table in an incompatible
- mode: this trx may have to wait */
-
- err = lock_table_enqueue_waiting(mode | flags, table, thr);
-
- lock_mutex_exit_kernel();
-
- return(err);
- }
-
- lock_table_create(table, mode | flags, trx);
-
- ut_a(!flags || mode == LOCK_S || mode == LOCK_X);
-
- lock_mutex_exit_kernel();
-
- return(DB_SUCCESS);
-}
-
-/*************************************************************************
-Checks if there are any locks set on the table. */
-
-ibool
-lock_is_on_table(
-/*=============*/
- /* out: TRUE if there are lock(s) */
- dict_table_t* table) /* in: database table in dictionary cache */
-{
- ibool ret;
-
- ut_ad(table);
-
- lock_mutex_enter_kernel();
-
- if (UT_LIST_GET_LAST(table->locks)) {
- ret = TRUE;
- } else {
- ret = FALSE;
- }
-
- lock_mutex_exit_kernel();
-
- return(ret);
-}
-
-/*************************************************************************
-Checks if a waiting table lock request still has to wait in a queue. */
-static
-ibool
-lock_table_has_to_wait_in_queue(
-/*============================*/
- /* out: TRUE if still has to wait */
- lock_t* wait_lock) /* in: waiting table lock */
-{
- dict_table_t* table;
- lock_t* lock;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(lock_get_wait(wait_lock));
-
- table = wait_lock->un_member.tab_lock.table;
-
- lock = UT_LIST_GET_FIRST(table->locks);
-
- while (lock != wait_lock) {
-
- if (lock_has_to_wait(wait_lock, lock)) {
-
- return(TRUE);
- }
-
- lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock);
- }
-
- return(FALSE);
-}
-
-/*****************************************************************
-Removes a table lock request, waiting or granted, from the queue and grants
-locks to other transactions in the queue, if they now are entitled to a
-lock. */
-static
-void
-lock_table_dequeue(
-/*===============*/
- lock_t* in_lock)/* in: table lock object; transactions waiting
- behind will get their lock requests granted, if
- they are now qualified to it */
-{
- lock_t* lock;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_a(lock_get_type(in_lock) == LOCK_TABLE);
-
- lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, in_lock);
-
- lock_table_remove_low(in_lock);
-
- /* Check if waiting locks in the queue can now be granted: grant
- locks if there are no conflicting locks ahead. */
-
- while (lock != NULL) {
-
- if (lock_get_wait(lock)
- && !lock_table_has_to_wait_in_queue(lock)) {
-
- /* Grant the lock */
- lock_grant(lock);
- }
-
- lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock);
- }
-}
-
-/*=========================== LOCK RELEASE ==============================*/
-
-/*****************************************************************
-Removes a granted record lock of a transaction from the queue and grants
-locks to other transactions waiting in the queue if they now are entitled
-to a lock. */
-
-void
-lock_rec_unlock(
-/*============*/
- trx_t* trx, /* in: transaction that has set a record
- lock */
- rec_t* rec, /* in: record */
- ulint lock_mode) /* in: LOCK_S or LOCK_X */
-{
- lock_t* lock;
- lock_t* release_lock = NULL;
- ulint heap_no;
-
- ut_ad(trx && rec);
-
- mutex_enter(&kernel_mutex);
-
- heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec));
-
- lock = lock_rec_get_first(rec);
-
- /* Find the last lock with the same lock_mode and transaction
- from the record. */
-
- while (lock != NULL) {
- if (lock->trx == trx && lock_get_mode(lock) == lock_mode) {
- release_lock = lock;
- ut_a(!lock_get_wait(lock));
- }
-
- lock = lock_rec_get_next(rec, lock);
- }
-
- /* If a record lock is found, release the record lock */
-
- if (UNIV_LIKELY(release_lock != NULL)) {
- lock_rec_reset_nth_bit(release_lock, heap_no);
- } else {
- mutex_exit(&kernel_mutex);
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: unlock row could not"
- " find a %lu mode lock on the record\n",
- (ulong) lock_mode);
-
- return;
- }
-
- /* Check if we can now grant waiting lock requests */
-
- lock = lock_rec_get_first(rec);
-
- while (lock != NULL) {
- if (lock_get_wait(lock)
- && !lock_rec_has_to_wait_in_queue(lock)) {
-
- /* Grant the lock */
- lock_grant(lock);
- }
-
- lock = lock_rec_get_next(rec, lock);
- }
-
- mutex_exit(&kernel_mutex);
-}
-
-/*************************************************************************
-Releases a table lock.
-Releases possible other transactions waiting for this lock. */
-
-void
-lock_table_unlock(
-/*==============*/
- lock_t* lock) /* in: lock */
-{
- mutex_enter(&kernel_mutex);
-
- lock_table_dequeue(lock);
-
- mutex_exit(&kernel_mutex);
-}
-
-/*************************************************************************
-Releases an auto-inc lock a transaction possibly has on a table.
-Releases possible other transactions waiting for this lock. */
-
-void
-lock_table_unlock_auto_inc(
-/*=======================*/
- trx_t* trx) /* in: transaction */
-{
- if (trx->auto_inc_lock) {
- mutex_enter(&kernel_mutex);
-
- lock_table_dequeue(trx->auto_inc_lock);
-
- mutex_exit(&kernel_mutex);
- }
-}
-
-/*************************************************************************
-Releases transaction locks, and releases possible other transactions waiting
-because of these locks. */
-
-void
-lock_release_off_kernel(
-/*====================*/
- trx_t* trx) /* in: transaction */
-{
- dict_table_t* table;
- ulint count;
- lock_t* lock;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- lock = UT_LIST_GET_LAST(trx->trx_locks);
-
- count = 0;
-
- while (lock != NULL) {
-
- count++;
-
- if (lock_get_type(lock) == LOCK_REC) {
-
- lock_rec_dequeue_from_page(lock);
- } else {
- ut_ad(lock_get_type(lock) & LOCK_TABLE);
-
- if (lock_get_mode(lock) != LOCK_IS
- && 0 != ut_dulint_cmp(trx->undo_no,
- ut_dulint_zero)) {
-
- /* The trx may have modified the table. We
- block the use of the MySQL query cache for
- all currently active transactions. */
-
- table = lock->un_member.tab_lock.table;
-
- table->query_cache_inv_trx_id
- = trx_sys->max_trx_id;
- }
-
- lock_table_dequeue(lock);
- }
-
- if (count == LOCK_RELEASE_KERNEL_INTERVAL) {
- /* Release the kernel mutex for a while, so that we
- do not monopolize it */
-
- lock_mutex_exit_kernel();
-
- lock_mutex_enter_kernel();
-
- count = 0;
- }
-
- lock = UT_LIST_GET_LAST(trx->trx_locks);
- }
-
- mem_heap_empty(trx->lock_heap);
-
- ut_a(trx->auto_inc_lock == NULL);
-}
-
-/*************************************************************************
-Cancels a waiting lock request and releases possible other transactions
-waiting behind it. */
-
-void
-lock_cancel_waiting_and_release(
-/*============================*/
- lock_t* lock) /* in: waiting lock request */
-{
- ut_ad(mutex_own(&kernel_mutex));
-
- if (lock_get_type(lock) == LOCK_REC) {
-
- lock_rec_dequeue_from_page(lock);
- } else {
- ut_ad(lock_get_type(lock) & LOCK_TABLE);
-
- lock_table_dequeue(lock);
- }
-
- /* Reset the wait flag and the back pointer to lock in trx */
-
- lock_reset_lock_and_trx_wait(lock);
-
- /* The following function releases the trx from lock wait */
-
- trx_end_lock_wait(lock->trx);
-}
-
-/* True if a lock mode is S or X */
-#define IS_LOCK_S_OR_X(lock) \
- (lock_get_mode(lock) == LOCK_S \
- || lock_get_mode(lock) == LOCK_X)
-
-
-/*************************************************************************
-Removes locks of a transaction on a table to be dropped.
-If remove_also_table_sx_locks is TRUE then table-level S and X locks are
-also removed in addition to other table-level and record-level locks.
-No lock, that is going to be removed, is allowed to be a wait lock. */
-static
-void
-lock_remove_all_on_table_for_trx(
-/*=============================*/
- dict_table_t* table, /* in: table to be dropped */
- trx_t* trx, /* in: a transaction */
- ibool remove_also_table_sx_locks)/* in: also removes
- table S and X locks */
-{
- lock_t* lock;
- lock_t* prev_lock;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- lock = UT_LIST_GET_LAST(trx->trx_locks);
-
- while (lock != NULL) {
- prev_lock = UT_LIST_GET_PREV(trx_locks, lock);
-
- if (lock_get_type(lock) == LOCK_REC
- && lock->index->table == table) {
- ut_a(!lock_get_wait(lock));
-
- lock_rec_discard(lock);
- } else if (lock_get_type(lock) & LOCK_TABLE
- && lock->un_member.tab_lock.table == table
- && (remove_also_table_sx_locks
- || !IS_LOCK_S_OR_X(lock))) {
-
- ut_a(!lock_get_wait(lock));
-
- lock_table_remove_low(lock);
- }
-
- lock = prev_lock;
- }
-}
-
-/*************************************************************************
-Removes locks on a table to be dropped or truncated.
-If remove_also_table_sx_locks is TRUE then table-level S and X locks are
-also removed in addition to other table-level and record-level locks.
-No lock, that is going to be removed, is allowed to be a wait lock. */
-
-void
-lock_remove_all_on_table(
-/*=====================*/
- dict_table_t* table, /* in: table to be dropped
- or truncated */
- ibool remove_also_table_sx_locks)/* in: also removes
- table S and X locks */
-{
- lock_t* lock;
- lock_t* prev_lock;
-
- mutex_enter(&kernel_mutex);
-
- lock = UT_LIST_GET_FIRST(table->locks);
-
- while (lock != NULL) {
-
- prev_lock = UT_LIST_GET_PREV(un_member.tab_lock.locks,
- lock);
-
- /* If we should remove all locks (remove_also_table_sx_locks
- is TRUE), or if the lock is not table-level S or X lock,
- then check we are not going to remove a wait lock. */
- if (remove_also_table_sx_locks
- || !(lock_get_type(lock) == LOCK_TABLE
- && IS_LOCK_S_OR_X(lock))) {
-
- ut_a(!lock_get_wait(lock));
- }
-
- lock_remove_all_on_table_for_trx(table, lock->trx,
- remove_also_table_sx_locks);
-
- if (prev_lock == NULL) {
- if (lock == UT_LIST_GET_FIRST(table->locks)) {
- /* lock was not removed, pick its successor */
- lock = UT_LIST_GET_NEXT(
- un_member.tab_lock.locks, lock);
- } else {
- /* lock was removed, pick the first one */
- lock = UT_LIST_GET_FIRST(table->locks);
- }
- } else if (UT_LIST_GET_NEXT(un_member.tab_lock.locks,
- prev_lock) != lock) {
- /* If lock was removed by
- lock_remove_all_on_table_for_trx() then pick the
- successor of prev_lock ... */
- lock = UT_LIST_GET_NEXT(
- un_member.tab_lock.locks, prev_lock);
- } else {
- /* ... otherwise pick the successor of lock. */
- lock = UT_LIST_GET_NEXT(
- un_member.tab_lock.locks, lock);
- }
- }
-
- mutex_exit(&kernel_mutex);
-}
-
-/*===================== VALIDATION AND DEBUGGING ====================*/
-
-/*************************************************************************
-Prints info of a table lock. */
-
-void
-lock_table_print(
-/*=============*/
- FILE* file, /* in: file where to print */
- lock_t* lock) /* in: table type lock */
-{
- ut_ad(mutex_own(&kernel_mutex));
- ut_a(lock_get_type(lock) == LOCK_TABLE);
-
- fputs("TABLE LOCK table ", file);
- ut_print_name(file, lock->trx, TRUE,
- lock->un_member.tab_lock.table->name);
- fprintf(file, " trx id %lu %lu",
- (ulong) (lock->trx)->id.high, (ulong) (lock->trx)->id.low);
-
- if (lock_get_mode(lock) == LOCK_S) {
- fputs(" lock mode S", file);
- } else if (lock_get_mode(lock) == LOCK_X) {
- fputs(" lock mode X", file);
- } else if (lock_get_mode(lock) == LOCK_IS) {
- fputs(" lock mode IS", file);
- } else if (lock_get_mode(lock) == LOCK_IX) {
- fputs(" lock mode IX", file);
- } else if (lock_get_mode(lock) == LOCK_AUTO_INC) {
- fputs(" lock mode AUTO-INC", file);
- } else {
- fprintf(file, " unknown lock mode %lu",
- (ulong) lock_get_mode(lock));
- }
-
- if (lock_get_wait(lock)) {
- fputs(" waiting", file);
- }
-
- putc('\n', file);
-}
-
-/*************************************************************************
-Prints info of a record lock. */
-
-void
-lock_rec_print(
-/*===========*/
- FILE* file, /* in: file where to print */
- lock_t* lock) /* in: record type lock */
-{
- page_t* page;
- ulint space;
- ulint page_no;
- ulint i;
- mtr_t mtr;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_a(lock_get_type(lock) == LOCK_REC);
-
- space = lock->un_member.rec_lock.space;
- page_no = lock->un_member.rec_lock.page_no;
-
- fprintf(file, "RECORD LOCKS space id %lu page no %lu n bits %lu ",
- (ulong) space, (ulong) page_no,
- (ulong) lock_rec_get_n_bits(lock));
- dict_index_name_print(file, lock->trx, lock->index);
- fprintf(file, " trx id %lu %lu",
- (ulong) (lock->trx)->id.high,
- (ulong) (lock->trx)->id.low);
-
- if (lock_get_mode(lock) == LOCK_S) {
- fputs(" lock mode S", file);
- } else if (lock_get_mode(lock) == LOCK_X) {
- fputs(" lock_mode X", file);
- } else {
- ut_error;
- }
-
- if (lock_rec_get_gap(lock)) {
- fputs(" locks gap before rec", file);
- }
-
- if (lock_rec_get_rec_not_gap(lock)) {
- fputs(" locks rec but not gap", file);
- }
-
- if (lock_rec_get_insert_intention(lock)) {
- fputs(" insert intention", file);
- }
-
- if (lock_get_wait(lock)) {
- fputs(" waiting", file);
- }
-
- mtr_start(&mtr);
-
- putc('\n', file);
-
- /* If the page is not in the buffer pool, we cannot load it
- because we have the kernel mutex and ibuf operations would
- break the latching order */
-
- page = buf_page_get_gen(space, page_no, RW_NO_LATCH,
- NULL, BUF_GET_IF_IN_POOL,
- __FILE__, __LINE__, &mtr);
- if (page) {
- page = buf_page_get_nowait(space, page_no, RW_S_LATCH, &mtr);
-
- if (!page) {
- /* Let us try to get an X-latch. If the current thread
- is holding an X-latch on the page, we cannot get an
- S-latch. */
-
- page = buf_page_get_nowait(space, page_no, RW_X_LATCH,
- &mtr);
- }
- }
-
- if (page) {
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
- }
-
- for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
-
- if (lock_rec_get_nth_bit(lock, i)) {
-
- fprintf(file, "Record lock, heap no %lu ", (ulong) i);
-
- if (page) {
- rec_t* rec
- = page_find_rec_with_heap_no(page, i);
- offsets = rec_get_offsets(
- rec, lock->index, offsets,
- ULINT_UNDEFINED, &heap);
- rec_print_new(file, rec, offsets);
- }
-
- putc('\n', file);
- }
- }
-
- mtr_commit(&mtr);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-}
-
-#ifndef UNIV_HOTBACKUP
-
-#ifdef UNIV_DEBUG
-/* Print the number of lock structs from lock_print_info_summary() only
-in non-production builds for performance reasons, see
-http://bugs.mysql.com/36942 */
-#define PRINT_NUM_OF_LOCK_STRUCTS
-#endif /* UNIV_DEBUG */
-
-#ifdef PRINT_NUM_OF_LOCK_STRUCTS
-/*************************************************************************
-Calculates the number of record lock structs in the record lock hash table. */
-static
-ulint
-lock_get_n_rec_locks(void)
-/*======================*/
-{
- lock_t* lock;
- ulint n_locks = 0;
- ulint i;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- for (i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) {
-
- lock = HASH_GET_FIRST(lock_sys->rec_hash, i);
-
- while (lock) {
- n_locks++;
-
- lock = HASH_GET_NEXT(hash, lock);
- }
- }
-
- return(n_locks);
-}
-#endif /* PRINT_NUM_OF_LOCK_STRUCTS */
-
-/*************************************************************************
-Prints info of locks for all transactions. */
-
-void
-lock_print_info_summary(
-/*====================*/
- FILE* file) /* in: file where to print */
-{
- /* We must protect the MySQL thd->query field with a MySQL mutex, and
- because the MySQL mutex must be reserved before the kernel_mutex of
- InnoDB, we call innobase_mysql_prepare_print_arbitrary_thd() here. */
-
- innobase_mysql_prepare_print_arbitrary_thd();
- lock_mutex_enter_kernel();
-
- if (lock_deadlock_found) {
- fputs("------------------------\n"
- "LATEST DETECTED DEADLOCK\n"
- "------------------------\n", file);
-
- ut_copy_file(file, lock_latest_err_file);
- }
-
- fputs("------------\n"
- "TRANSACTIONS\n"
- "------------\n", file);
-
- fprintf(file, "Trx id counter %lu %lu\n",
- (ulong) ut_dulint_get_high(trx_sys->max_trx_id),
- (ulong) ut_dulint_get_low(trx_sys->max_trx_id));
-
- fprintf(file,
- "Purge done for trx's n:o < %lu %lu undo n:o < %lu %lu\n",
- (ulong) ut_dulint_get_high(purge_sys->purge_trx_no),
- (ulong) ut_dulint_get_low(purge_sys->purge_trx_no),
- (ulong) ut_dulint_get_high(purge_sys->purge_undo_no),
- (ulong) ut_dulint_get_low(purge_sys->purge_undo_no));
-
- fprintf(file,
- "History list length %lu\n",
- (ulong) trx_sys->rseg_history_len);
-
-#ifdef PRINT_NUM_OF_LOCK_STRUCTS
- fprintf(file,
- "Total number of lock structs in row lock hash table %lu\n",
- (ulong) lock_get_n_rec_locks());
-#endif /* PRINT_NUM_OF_LOCK_STRUCTS */
-}
-
-/*************************************************************************
-Prints info of locks for each transaction. */
-
-void
-lock_print_info_all_transactions(
-/*=============================*/
- FILE* file) /* in: file where to print */
-{
- lock_t* lock;
- ulint space;
- ulint page_no;
- page_t* page;
- ibool load_page_first = TRUE;
- ulint nth_trx = 0;
- ulint nth_lock = 0;
- ulint i;
- mtr_t mtr;
- trx_t* trx;
-
- fprintf(file, "LIST OF TRANSACTIONS FOR EACH SESSION:\n");
-
- /* First print info on non-active transactions */
-
- trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list);
-
- while (trx) {
- if (trx->conc_state == TRX_NOT_STARTED) {
- fputs("---", file);
- trx_print(file, trx, 600);
- }
-
- trx = UT_LIST_GET_NEXT(mysql_trx_list, trx);
- }
-
-loop:
- trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
-
- i = 0;
-
- /* Since we temporarily release the kernel mutex when
- reading a database page in below, variable trx may be
- obsolete now and we must loop through the trx list to
- get probably the same trx, or some other trx. */
-
- while (trx && (i < nth_trx)) {
- trx = UT_LIST_GET_NEXT(trx_list, trx);
- i++;
- }
-
- if (trx == NULL) {
- lock_mutex_exit_kernel();
- innobase_mysql_end_print_arbitrary_thd();
-
- ut_ad(lock_validate());
-
- return;
- }
-
- if (nth_lock == 0) {
- fputs("---", file);
- trx_print(file, trx, 600);
-
- if (trx->read_view) {
- fprintf(file,
- "Trx read view will not see trx with"
- " id >= %lu %lu, sees < %lu %lu\n",
- (ulong) ut_dulint_get_high(
- trx->read_view->low_limit_id),
- (ulong) ut_dulint_get_low(
- trx->read_view->low_limit_id),
- (ulong) ut_dulint_get_high(
- trx->read_view->up_limit_id),
- (ulong) ut_dulint_get_low(
- trx->read_view->up_limit_id));
- }
-
- if (trx->que_state == TRX_QUE_LOCK_WAIT) {
- fprintf(file,
- "------- TRX HAS BEEN WAITING %lu SEC"
- " FOR THIS LOCK TO BE GRANTED:\n",
- (ulong) difftime(time(NULL),
- trx->wait_started));
-
- if (lock_get_type(trx->wait_lock) == LOCK_REC) {
- lock_rec_print(file, trx->wait_lock);
- } else {
- lock_table_print(file, trx->wait_lock);
- }
-
- fputs("------------------\n", file);
- }
- }
-
- if (!srv_print_innodb_lock_monitor) {
- nth_trx++;
- goto loop;
- }
-
- i = 0;
-
- /* Look at the note about the trx loop above why we loop here:
- lock may be an obsolete pointer now. */
-
- lock = UT_LIST_GET_FIRST(trx->trx_locks);
-
- while (lock && (i < nth_lock)) {
- lock = UT_LIST_GET_NEXT(trx_locks, lock);
- i++;
- }
-
- if (lock == NULL) {
- nth_trx++;
- nth_lock = 0;
-
- goto loop;
- }
-
- if (lock_get_type(lock) == LOCK_REC) {
- space = lock->un_member.rec_lock.space;
- page_no = lock->un_member.rec_lock.page_no;
-
- if (load_page_first) {
- lock_mutex_exit_kernel();
- innobase_mysql_end_print_arbitrary_thd();
-
- mtr_start(&mtr);
-
- page = buf_page_get_with_no_latch(
- space, page_no, &mtr);
-
- mtr_commit(&mtr);
-
- load_page_first = FALSE;
-
- innobase_mysql_prepare_print_arbitrary_thd();
- lock_mutex_enter_kernel();
-
- goto loop;
- }
-
- lock_rec_print(file, lock);
- } else {
- ut_ad(lock_get_type(lock) & LOCK_TABLE);
-
- lock_table_print(file, lock);
- }
-
- load_page_first = TRUE;
-
- nth_lock++;
-
- if (nth_lock >= 10) {
- fputs("10 LOCKS PRINTED FOR THIS TRX:"
- " SUPPRESSING FURTHER PRINTS\n",
- file);
-
- nth_trx++;
- nth_lock = 0;
-
- goto loop;
- }
-
- goto loop;
-}
-
-/*************************************************************************
-Validates the lock queue on a table. */
-
-ibool
-lock_table_queue_validate(
-/*======================*/
- /* out: TRUE if ok */
- dict_table_t* table) /* in: table */
-{
- lock_t* lock;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- lock = UT_LIST_GET_FIRST(table->locks);
-
- while (lock) {
- ut_a(((lock->trx)->conc_state == TRX_ACTIVE)
- || ((lock->trx)->conc_state == TRX_PREPARED)
- || ((lock->trx)->conc_state == TRX_COMMITTED_IN_MEMORY));
-
- if (!lock_get_wait(lock)) {
-
- ut_a(!lock_table_other_has_incompatible(
- lock->trx, 0, table,
- lock_get_mode(lock)));
- } else {
-
- ut_a(lock_table_has_to_wait_in_queue(lock));
- }
-
- lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock);
- }
-
- return(TRUE);
-}
-
-/*************************************************************************
-Validates the lock queue on a single record. */
-
-ibool
-lock_rec_queue_validate(
-/*====================*/
- /* out: TRUE if ok */
- rec_t* rec, /* in: record to look at */
- dict_index_t* index, /* in: index, or NULL if not known */
- const ulint* offsets)/* in: rec_get_offsets(rec, index) */
-{
- trx_t* impl_trx;
- lock_t* lock;
-
- ut_a(rec);
- ut_ad(rec_offs_validate(rec, index, offsets));
- ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
-
- lock_mutex_enter_kernel();
-
- if (!page_rec_is_user_rec(rec)) {
-
- lock = lock_rec_get_first(rec);
-
- while (lock) {
- switch(lock->trx->conc_state) {
- case TRX_ACTIVE:
- case TRX_PREPARED:
- case TRX_COMMITTED_IN_MEMORY:
- break;
- default:
- ut_error;
- }
-
- ut_a(trx_in_trx_list(lock->trx));
-
- if (lock_get_wait(lock)) {
- ut_a(lock_rec_has_to_wait_in_queue(lock));
- }
-
- if (index) {
- ut_a(lock->index == index);
- }
-
- lock = lock_rec_get_next(rec, lock);
- }
-
- lock_mutex_exit_kernel();
-
- return(TRUE);
- }
-
- if (index && (index->type & DICT_CLUSTERED)) {
-
- impl_trx = lock_clust_rec_some_has_impl(rec, index, offsets);
-
- if (impl_trx && lock_rec_other_has_expl_req(
- LOCK_S, 0, LOCK_WAIT, rec, impl_trx)) {
-
- ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
- rec, impl_trx));
- }
- }
-
- if (index && !(index->type & DICT_CLUSTERED)) {
-
- /* The kernel mutex may get released temporarily in the
- next function call: we have to release lock table mutex
- to obey the latching order */
-
- impl_trx = lock_sec_rec_some_has_impl_off_kernel(
- rec, index, offsets);
-
- if (impl_trx && lock_rec_other_has_expl_req(
- LOCK_S, 0, LOCK_WAIT, rec, impl_trx)) {
-
- ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
- rec, impl_trx));
- }
- }
-
- lock = lock_rec_get_first(rec);
-
- while (lock) {
- ut_a(lock->trx->conc_state == TRX_ACTIVE
- || lock->trx->conc_state == TRX_PREPARED
- || lock->trx->conc_state == TRX_COMMITTED_IN_MEMORY);
- ut_a(trx_in_trx_list(lock->trx));
-
- if (index) {
- ut_a(lock->index == index);
- }
-
- if (!lock_rec_get_gap(lock) && !lock_get_wait(lock)) {
-
- ulint mode;
-
- if (lock_get_mode(lock) == LOCK_S) {
- mode = LOCK_X;
- } else {
- mode = LOCK_S;
- }
- ut_a(!lock_rec_other_has_expl_req(
- mode, 0, 0, rec, lock->trx));
-
- } else if (lock_get_wait(lock) && !lock_rec_get_gap(lock)) {
-
- ut_a(lock_rec_has_to_wait_in_queue(lock));
- }
-
- lock = lock_rec_get_next(rec, lock);
- }
-
- lock_mutex_exit_kernel();
-
- return(TRUE);
-}
-
-/*************************************************************************
-Validates the record lock queues on a page. */
-
-ibool
-lock_rec_validate_page(
-/*===================*/
- /* out: TRUE if ok */
- ulint space, /* in: space id */
- ulint page_no)/* in: page number */
-{
- dict_index_t* index;
- page_t* page;
- lock_t* lock;
- rec_t* rec;
- ulint nth_lock = 0;
- ulint nth_bit = 0;
- ulint i;
- mtr_t mtr;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- ut_ad(!mutex_own(&kernel_mutex));
-
- mtr_start(&mtr);
-
- page = buf_page_get(space, page_no, RW_X_LATCH, &mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
-
- lock_mutex_enter_kernel();
-loop:
- lock = lock_rec_get_first_on_page_addr(space, page_no);
-
- if (!lock) {
- goto function_exit;
- }
-
- for (i = 0; i < nth_lock; i++) {
-
- lock = lock_rec_get_next_on_page(lock);
-
- if (!lock) {
- goto function_exit;
- }
- }
-
- ut_a(trx_in_trx_list(lock->trx));
- ut_a(lock->trx->conc_state == TRX_ACTIVE
- || lock->trx->conc_state == TRX_PREPARED
- || lock->trx->conc_state == TRX_COMMITTED_IN_MEMORY);
-
- for (i = nth_bit; i < lock_rec_get_n_bits(lock); i++) {
-
- if (i == 1 || lock_rec_get_nth_bit(lock, i)) {
-
- index = lock->index;
- rec = page_find_rec_with_heap_no(page, i);
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
-
- fprintf(stderr,
- "Validating %lu %lu\n",
- (ulong) space, (ulong) page_no);
-
- lock_mutex_exit_kernel();
-
- lock_rec_queue_validate(rec, index, offsets);
-
- lock_mutex_enter_kernel();
-
- nth_bit = i + 1;
-
- goto loop;
- }
- }
-
- nth_bit = 0;
- nth_lock++;
-
- goto loop;
-
-function_exit:
- lock_mutex_exit_kernel();
-
- mtr_commit(&mtr);
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(TRUE);
-}
-
-/*************************************************************************
-Validates the lock system. */
-
-ibool
-lock_validate(void)
-/*===============*/
- /* out: TRUE if ok */
-{
- lock_t* lock;
- trx_t* trx;
- dulint limit;
- ulint space;
- ulint page_no;
- ulint i;
-
- lock_mutex_enter_kernel();
-
- trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
-
- while (trx) {
- lock = UT_LIST_GET_FIRST(trx->trx_locks);
-
- while (lock) {
- if (lock_get_type(lock) & LOCK_TABLE) {
-
- lock_table_queue_validate(
- lock->un_member.tab_lock.table);
- }
-
- lock = UT_LIST_GET_NEXT(trx_locks, lock);
- }
-
- trx = UT_LIST_GET_NEXT(trx_list, trx);
- }
-
- for (i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) {
-
- limit = ut_dulint_zero;
-
- for (;;) {
- lock = HASH_GET_FIRST(lock_sys->rec_hash, i);
-
- while (lock) {
- ut_a(trx_in_trx_list(lock->trx));
-
- space = lock->un_member.rec_lock.space;
- page_no = lock->un_member.rec_lock.page_no;
-
- if (ut_dulint_cmp(
- ut_dulint_create(space, page_no),
- limit) >= 0) {
- break;
- }
-
- lock = HASH_GET_NEXT(hash, lock);
- }
-
- if (!lock) {
-
- break;
- }
-
- lock_mutex_exit_kernel();
-
- lock_rec_validate_page(space, page_no);
-
- lock_mutex_enter_kernel();
-
- limit = ut_dulint_create(space, page_no + 1);
- }
- }
-
- lock_mutex_exit_kernel();
-
- return(TRUE);
-}
-#endif /* !UNIV_HOTBACKUP */
-/*============ RECORD LOCK CHECKS FOR ROW OPERATIONS ====================*/
-
-/*************************************************************************
-Checks if locks of other transactions prevent an immediate insert of
-a record. If they do, first tests if the query thread should anyway
-be suspended for some reason; if not, then puts the transaction and
-the query thread to the lock wait state and inserts a waiting request
-for a gap x-lock to the lock queue. */
-
-ulint
-lock_rec_insert_check_and_lock(
-/*===========================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT,
- DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
- ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
- does nothing */
- rec_t* rec, /* in: record after which to insert */
- dict_index_t* index, /* in: index */
- que_thr_t* thr, /* in: query thread */
- ibool* inherit)/* out: set to TRUE if the new inserted
- record maybe should inherit LOCK_GAP type
- locks from the successor record */
-{
- rec_t* next_rec;
- trx_t* trx;
- lock_t* lock;
- ulint err;
-
- if (flags & BTR_NO_LOCKING_FLAG) {
-
- return(DB_SUCCESS);
- }
-
- ut_ad(rec);
-
- trx = thr_get_trx(thr);
- next_rec = page_rec_get_next(rec);
-
- *inherit = FALSE;
-
- lock_mutex_enter_kernel();
-
- ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
-
- lock = lock_rec_get_first(next_rec);
-
- if (lock == NULL) {
- /* We optimize CPU time usage in the simplest case */
-
- lock_mutex_exit_kernel();
-
- if (!(index->type & DICT_CLUSTERED)) {
-
- /* Update the page max trx id field */
- page_update_max_trx_id(buf_frame_align(rec),
- thr_get_trx(thr)->id);
- }
-
- return(DB_SUCCESS);
- }
-
- *inherit = TRUE;
-
- /* If another transaction has an explicit lock request which locks
- the gap, waiting or granted, on the successor, the insert has to wait.
-
- An exception is the case where the lock by the another transaction
- is a gap type lock which it placed to wait for its turn to insert. We
- do not consider that kind of a lock conflicting with our insert. This
- eliminates an unnecessary deadlock which resulted when 2 transactions
- had to wait for their insert. Both had waiting gap type lock requests
- on the successor, which produced an unnecessary deadlock. */
-
- if (lock_rec_other_has_conflicting(
- LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION, next_rec,
- trx)) {
-
- /* Note that we may get DB_SUCCESS also here! */
- err = lock_rec_enqueue_waiting(LOCK_X | LOCK_GAP
- | LOCK_INSERT_INTENTION,
- next_rec, index, thr);
- } else {
- err = DB_SUCCESS;
- }
-
- lock_mutex_exit_kernel();
-
- if (!(index->type & DICT_CLUSTERED) && (err == DB_SUCCESS)) {
-
- /* Update the page max trx id field */
- page_update_max_trx_id(buf_frame_align(rec),
- thr_get_trx(thr)->id);
- }
-
-#ifdef UNIV_DEBUG
- {
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- const ulint* offsets;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- offsets = rec_get_offsets(next_rec, index, offsets_,
- ULINT_UNDEFINED, &heap);
- ut_ad(lock_rec_queue_validate(next_rec, index, offsets));
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- }
-#endif /* UNIV_DEBUG */
-
- return(err);
-}
-
-/*************************************************************************
-If a transaction has an implicit x-lock on a record, but no explicit x-lock
-set on the record, sets one for it. NOTE that in the case of a secondary
-index, the kernel mutex may get temporarily released. */
-static
-void
-lock_rec_convert_impl_to_expl(
-/*==========================*/
- rec_t* rec, /* in: user record on page */
- dict_index_t* index, /* in: index of record */
- const ulint* offsets)/* in: rec_get_offsets(rec, index) */
-{
- trx_t* impl_trx;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(page_rec_is_user_rec(rec));
- ut_ad(rec_offs_validate(rec, index, offsets));
- ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
-
- if (index->type & DICT_CLUSTERED) {
- impl_trx = lock_clust_rec_some_has_impl(rec, index, offsets);
- } else {
- impl_trx = lock_sec_rec_some_has_impl_off_kernel(
- rec, index, offsets);
- }
-
- if (impl_trx) {
- /* If the transaction has no explicit x-lock set on the
- record, set one for it */
-
- if (!lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, rec,
- impl_trx)) {
-
- lock_rec_add_to_queue(
- LOCK_REC | LOCK_X | LOCK_REC_NOT_GAP,
- rec, index, impl_trx);
- }
- }
-}
-
-/*************************************************************************
-Checks if locks of other transactions prevent an immediate modify (update,
-delete mark, or delete unmark) of a clustered index record. If they do,
-first tests if the query thread should anyway be suspended for some
-reason; if not, then puts the transaction and the query thread to the
-lock wait state and inserts a waiting request for a record x-lock to the
-lock queue. */
-
-ulint
-lock_clust_rec_modify_check_and_lock(
-/*=================================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT,
- DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
- ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
- does nothing */
- rec_t* rec, /* in: record which should be modified */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- que_thr_t* thr) /* in: query thread */
-{
- ulint err;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
- ut_ad(index->type & DICT_CLUSTERED);
-
- if (flags & BTR_NO_LOCKING_FLAG) {
-
- return(DB_SUCCESS);
- }
-
- lock_mutex_enter_kernel();
-
- ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
-
- /* If a transaction has no explicit x-lock set on the record, set one
- for it */
-
- lock_rec_convert_impl_to_expl(rec, index, offsets);
-
- err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP, rec, index, thr);
-
- lock_mutex_exit_kernel();
-
- ut_ad(lock_rec_queue_validate(rec, index, offsets));
-
- return(err);
-}
-
-/*************************************************************************
-Checks if locks of other transactions prevent an immediate modify (delete
-mark or delete unmark) of a secondary index record. */
-
-ulint
-lock_sec_rec_modify_check_and_lock(
-/*===============================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT,
- DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
- ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
- does nothing */
- rec_t* rec, /* in: record which should be modified;
- NOTE: as this is a secondary index, we
- always have to modify the clustered index
- record first: see the comment below */
- dict_index_t* index, /* in: secondary index */
- que_thr_t* thr) /* in: query thread */
-{
- ulint err;
-
- if (flags & BTR_NO_LOCKING_FLAG) {
-
- return(DB_SUCCESS);
- }
-
- ut_ad(!(index->type & DICT_CLUSTERED));
-
- /* Another transaction cannot have an implicit lock on the record,
- because when we come here, we already have modified the clustered
- index record, and this would not have been possible if another active
- transaction had modified this secondary index record. */
-
- lock_mutex_enter_kernel();
-
- ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
-
- err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP, rec, index, thr);
-
- lock_mutex_exit_kernel();
-
-#ifdef UNIV_DEBUG
- {
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- const ulint* offsets;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- offsets = rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap);
- ut_ad(lock_rec_queue_validate(rec, index, offsets));
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- }
-#endif /* UNIV_DEBUG */
-
- if (err == DB_SUCCESS) {
- /* Update the page max trx id field */
-
- page_update_max_trx_id(buf_frame_align(rec),
- thr_get_trx(thr)->id);
- }
-
- return(err);
-}
-
-/*************************************************************************
-Like the counterpart for a clustered index below, but now we read a
-secondary index record. */
-
-ulint
-lock_sec_rec_read_check_and_lock(
-/*=============================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT,
- DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
- ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
- does nothing */
- rec_t* rec, /* in: user record or page supremum record
- which should be read or passed over by a read
- cursor */
- dict_index_t* index, /* in: secondary index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- ulint mode, /* in: mode of the lock which the read cursor
- should set on records: LOCK_S or LOCK_X; the
- latter is possible in SELECT FOR UPDATE */
- ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or
- LOCK_REC_NOT_GAP */
- que_thr_t* thr) /* in: query thread */
-{
- ulint err;
-
- ut_ad(!(index->type & DICT_CLUSTERED));
- ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- if (flags & BTR_NO_LOCKING_FLAG) {
-
- return(DB_SUCCESS);
- }
-
- lock_mutex_enter_kernel();
-
- ut_ad(mode != LOCK_X
- || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
- ut_ad(mode != LOCK_S
- || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
-
- /* Some transaction may have an implicit x-lock on the record only
- if the max trx id for the page >= min trx id for the trx list or a
- database recovery is running. */
-
- if (((ut_dulint_cmp(page_get_max_trx_id(buf_frame_align(rec)),
- trx_list_get_min_trx_id()) >= 0)
- || recv_recovery_is_on())
- && !page_rec_is_supremum(rec)) {
-
- lock_rec_convert_impl_to_expl(rec, index, offsets);
- }
-
- err = lock_rec_lock(FALSE, mode | gap_mode, rec, index, thr);
-
- lock_mutex_exit_kernel();
-
- ut_ad(lock_rec_queue_validate(rec, index, offsets));
-
- return(err);
-}
-
-/*************************************************************************
-Checks if locks of other transactions prevent an immediate read, or passing
-over by a read cursor, of a clustered index record. If they do, first tests
-if the query thread should anyway be suspended for some reason; if not, then
-puts the transaction and the query thread to the lock wait state and inserts a
-waiting request for a record lock to the lock queue. Sets the requested mode
-lock on the record. */
-
-ulint
-lock_clust_rec_read_check_and_lock(
-/*===============================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT,
- DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
- ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
- does nothing */
- rec_t* rec, /* in: user record or page supremum record
- which should be read or passed over by a read
- cursor */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- ulint mode, /* in: mode of the lock which the read cursor
- should set on records: LOCK_S or LOCK_X; the
- latter is possible in SELECT FOR UPDATE */
- ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or
- LOCK_REC_NOT_GAP */
- que_thr_t* thr) /* in: query thread */
-{
- ulint err;
-
- ut_ad(index->type & DICT_CLUSTERED);
- ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
- ut_ad(gap_mode == LOCK_ORDINARY || gap_mode == LOCK_GAP
- || gap_mode == LOCK_REC_NOT_GAP);
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- if (flags & BTR_NO_LOCKING_FLAG) {
-
- return(DB_SUCCESS);
- }
-
- lock_mutex_enter_kernel();
-
- ut_ad(mode != LOCK_X
- || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
- ut_ad(mode != LOCK_S
- || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
-
- if (!page_rec_is_supremum(rec)) {
-
- lock_rec_convert_impl_to_expl(rec, index, offsets);
- }
-
- err = lock_rec_lock(FALSE, mode | gap_mode, rec, index, thr);
-
- lock_mutex_exit_kernel();
-
- ut_ad(lock_rec_queue_validate(rec, index, offsets));
-
- return(err);
-}
-/*************************************************************************
-Checks if locks of other transactions prevent an immediate read, or passing
-over by a read cursor, of a clustered index record. If they do, first tests
-if the query thread should anyway be suspended for some reason; if not, then
-puts the transaction and the query thread to the lock wait state and inserts a
-waiting request for a record lock to the lock queue. Sets the requested mode
-lock on the record. This is an alternative version of
-lock_clust_rec_read_check_and_lock() that does not require the parameter
-"offsets". */
-
-ulint
-lock_clust_rec_read_check_and_lock_alt(
-/*===================================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT,
- DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
- ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
- does nothing */
- rec_t* rec, /* in: user record or page supremum record
- which should be read or passed over by a read
- cursor */
- dict_index_t* index, /* in: clustered index */
- ulint mode, /* in: mode of the lock which the read cursor
- should set on records: LOCK_S or LOCK_X; the
- latter is possible in SELECT FOR UPDATE */
- ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or
- LOCK_REC_NOT_GAP */
- que_thr_t* thr) /* in: query thread */
-{
- mem_heap_t* tmp_heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- ulint ret;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &tmp_heap);
- ret = lock_clust_rec_read_check_and_lock(flags, rec, index,
- offsets, mode, gap_mode, thr);
- if (tmp_heap) {
- mem_heap_free(tmp_heap);
- }
- return(ret);
-}
-
diff --git a/storage/innobase/log/log0log.c b/storage/innobase/log/log0log.c
deleted file mode 100644
index 7eb5e3db422..00000000000
--- a/storage/innobase/log/log0log.c
+++ /dev/null
@@ -1,3354 +0,0 @@
-/******************************************************
-Database log
-
-(c) 1995-1997 Innobase Oy
-
-Created 12/9/1995 Heikki Tuuri
-*******************************************************/
-
-#include "log0log.h"
-
-#ifdef UNIV_NONINL
-#include "log0log.ic"
-#endif
-
-#include "mem0mem.h"
-#include "buf0buf.h"
-#include "buf0flu.h"
-#include "srv0srv.h"
-#include "log0recv.h"
-#include "fil0fil.h"
-#include "dict0boot.h"
-#include "srv0srv.h"
-#include "srv0start.h"
-#include "trx0sys.h"
-#include "trx0trx.h"
-
-/*
-General philosophy of InnoDB redo-logs:
-
-1) Every change to a contents of a data page must be done
-through mtr, which in mtr_commit() writes log records
-to the InnoDB redo log.
-
-2) Normally these changes are performed using a mlog_write_ulint()
-or similar function.
-
-3) In some page level operations only a code number of a
-c-function and its parameters are written to the log to
-reduce the size of the log.
-
- 3a) You should not add parameters to these kind of functions
- (e.g. trx_undo_header_create(), trx_undo_insert_header_reuse())
-
- 3b) You should not add such functionality which either change
- working when compared with the old or are dependent on data
- outside of the page. These kind of functions should implement
- self-contained page transformation and it should be unchanged
- if you don't have very essential reasons to change log
- semantics or format.
-
-*/
-
-/* Current free limit of space 0; protected by the log sys mutex; 0 means
-uninitialized */
-ulint log_fsp_current_free_limit = 0;
-
-/* Global log system variable */
-log_t* log_sys = NULL;
-
-#ifdef UNIV_DEBUG
-ibool log_do_write = TRUE;
-
-ibool log_debug_writes = FALSE;
-#endif /* UNIV_DEBUG */
-
-/* These control how often we print warnings if the last checkpoint is too
-old */
-ibool log_has_printed_chkp_warning = FALSE;
-time_t log_last_warning_time;
-
-#ifdef UNIV_LOG_ARCHIVE
-/* Pointer to this variable is used as the i/o-message when we do i/o to an
-archive */
-byte log_archive_io;
-#endif /* UNIV_LOG_ARCHIVE */
-
-/* A margin for free space in the log buffer before a log entry is catenated */
-#define LOG_BUF_WRITE_MARGIN (4 * OS_FILE_LOG_BLOCK_SIZE)
-
-/* Margins for free space in the log buffer after a log entry is catenated */
-#define LOG_BUF_FLUSH_RATIO 2
-#define LOG_BUF_FLUSH_MARGIN (LOG_BUF_WRITE_MARGIN + 4 * UNIV_PAGE_SIZE)
-
-/* Margin for the free space in the smallest log group, before a new query
-step which modifies the database, is started */
-
-#define LOG_CHECKPOINT_FREE_PER_THREAD (4 * UNIV_PAGE_SIZE)
-#define LOG_CHECKPOINT_EXTRA_FREE (8 * UNIV_PAGE_SIZE)
-
-/* This parameter controls asynchronous making of a new checkpoint; the value
-should be bigger than LOG_POOL_PREFLUSH_RATIO_SYNC */
-
-#define LOG_POOL_CHECKPOINT_RATIO_ASYNC 32
-
-/* This parameter controls synchronous preflushing of modified buffer pages */
-#define LOG_POOL_PREFLUSH_RATIO_SYNC 16
-
-/* The same ratio for asynchronous preflushing; this value should be less than
-the previous */
-#define LOG_POOL_PREFLUSH_RATIO_ASYNC 8
-
-/* Extra margin, in addition to one log file, used in archiving */
-#define LOG_ARCHIVE_EXTRA_MARGIN (4 * UNIV_PAGE_SIZE)
-
-/* This parameter controls asynchronous writing to the archive */
-#define LOG_ARCHIVE_RATIO_ASYNC 16
-
-/* Codes used in unlocking flush latches */
-#define LOG_UNLOCK_NONE_FLUSHED_LOCK 1
-#define LOG_UNLOCK_FLUSH_LOCK 2
-
-/* States of an archiving operation */
-#define LOG_ARCHIVE_READ 1
-#define LOG_ARCHIVE_WRITE 2
-
-/**********************************************************
-Completes a checkpoint write i/o to a log file. */
-static
-void
-log_io_complete_checkpoint(void);
-/*============================*/
-#ifdef UNIV_LOG_ARCHIVE
-/**********************************************************
-Completes an archiving i/o. */
-static
-void
-log_io_complete_archive(void);
-/*=========================*/
-#endif /* UNIV_LOG_ARCHIVE */
-
-/********************************************************************
-Sets the global variable log_fsp_current_free_limit. Also makes a checkpoint,
-so that we know that the limit has been written to a log checkpoint field
-on disk. */
-
-void
-log_fsp_current_free_limit_set_and_checkpoint(
-/*==========================================*/
- ulint limit) /* in: limit to set */
-{
- ibool success;
-
- mutex_enter(&(log_sys->mutex));
-
- log_fsp_current_free_limit = limit;
-
- mutex_exit(&(log_sys->mutex));
-
- /* Try to make a synchronous checkpoint */
-
- success = FALSE;
-
- while (!success) {
- success = log_checkpoint(TRUE, TRUE);
- }
-}
-
-/********************************************************************
-Returns the oldest modified block lsn in the pool, or log_sys->lsn if none
-exists. */
-static
-dulint
-log_buf_pool_get_oldest_modification(void)
-/*======================================*/
-{
- dulint lsn;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- lsn = buf_pool_get_oldest_modification();
-
- if (ut_dulint_is_zero(lsn)) {
-
- lsn = log_sys->lsn;
- }
-
- return(lsn);
-}
-
-/****************************************************************
-Opens the log for log_write_low. The log must be closed with log_close and
-released with log_release. */
-
-dulint
-log_reserve_and_open(
-/*=================*/
- /* out: start lsn of the log record */
- ulint len) /* in: length of data to be catenated */
-{
- log_t* log = log_sys;
- ulint len_upper_limit;
-#ifdef UNIV_LOG_ARCHIVE
- ulint archived_lsn_age;
- ulint dummy;
-#endif /* UNIV_LOG_ARCHIVE */
-#ifdef UNIV_DEBUG
- ulint count = 0;
-#endif /* UNIV_DEBUG */
-
- ut_a(len < log->buf_size / 2);
-loop:
- mutex_enter(&(log->mutex));
-
- /* Calculate an upper limit for the space the string may take in the
- log buffer */
-
- len_upper_limit = LOG_BUF_WRITE_MARGIN + (5 * len) / 4;
-
- if (log->buf_free + len_upper_limit > log->buf_size) {
-
- mutex_exit(&(log->mutex));
-
- /* Not enough free space, do a syncronous flush of the log
- buffer */
-
- log_buffer_flush_to_disk();
-
- srv_log_waits++;
-
- ut_ad(++count < 50);
-
- goto loop;
- }
-
-#ifdef UNIV_LOG_ARCHIVE
- if (log->archiving_state != LOG_ARCH_OFF) {
-
- archived_lsn_age = ut_dulint_minus(log->lsn,
- log->archived_lsn);
- if (archived_lsn_age + len_upper_limit
- > log->max_archived_lsn_age) {
- /* Not enough free archived space in log groups: do a
- synchronous archive write batch: */
-
- mutex_exit(&(log->mutex));
-
- ut_ad(len_upper_limit <= log->max_archived_lsn_age);
-
- log_archive_do(TRUE, &dummy);
-
- ut_ad(++count < 50);
-
- goto loop;
- }
- }
-#endif /* UNIV_LOG_ARCHIVE */
-
-#ifdef UNIV_LOG_DEBUG
- log->old_buf_free = log->buf_free;
- log->old_lsn = log->lsn;
-#endif
- return(log->lsn);
-}
-
-/****************************************************************
-Writes to the log the string given. It is assumed that the caller holds the
-log mutex. */
-
-void
-log_write_low(
-/*==========*/
- byte* str, /* in: string */
- ulint str_len) /* in: string length */
-{
- log_t* log = log_sys;
- ulint len;
- ulint data_len;
- byte* log_block;
-
- ut_ad(mutex_own(&(log->mutex)));
-part_loop:
- /* Calculate a part length */
-
- data_len = (log->buf_free % OS_FILE_LOG_BLOCK_SIZE) + str_len;
-
- if (data_len <= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
-
- /* The string fits within the current log block */
-
- len = str_len;
- } else {
- data_len = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE;
-
- len = OS_FILE_LOG_BLOCK_SIZE
- - (log->buf_free % OS_FILE_LOG_BLOCK_SIZE)
- - LOG_BLOCK_TRL_SIZE;
- }
-
- ut_memcpy(log->buf + log->buf_free, str, len);
-
- str_len -= len;
- str = str + len;
-
- log_block = ut_align_down(log->buf + log->buf_free,
- OS_FILE_LOG_BLOCK_SIZE);
- log_block_set_data_len(log_block, data_len);
-
- if (data_len == OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
- /* This block became full */
- log_block_set_data_len(log_block, OS_FILE_LOG_BLOCK_SIZE);
- log_block_set_checkpoint_no(log_block,
- log_sys->next_checkpoint_no);
- len += LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE;
-
- log->lsn = ut_dulint_add(log->lsn, len);
-
- /* Initialize the next block header */
- log_block_init(log_block + OS_FILE_LOG_BLOCK_SIZE, log->lsn);
- } else {
- log->lsn = ut_dulint_add(log->lsn, len);
- }
-
- log->buf_free += len;
-
- ut_ad(log->buf_free <= log->buf_size);
-
- if (str_len > 0) {
- goto part_loop;
- }
-
- srv_log_write_requests++;
-}
-
-/****************************************************************
-Closes the log. */
-
-dulint
-log_close(void)
-/*===========*/
- /* out: lsn */
-{
- byte* log_block;
- ulint first_rec_group;
- dulint oldest_lsn;
- dulint lsn;
- log_t* log = log_sys;
- ulint checkpoint_age;
-
- ut_ad(mutex_own(&(log->mutex)));
-
- lsn = log->lsn;
-
- log_block = ut_align_down(log->buf + log->buf_free,
- OS_FILE_LOG_BLOCK_SIZE);
- first_rec_group = log_block_get_first_rec_group(log_block);
-
- if (first_rec_group == 0) {
- /* We initialized a new log block which was not written
- full by the current mtr: the next mtr log record group
- will start within this block at the offset data_len */
-
- log_block_set_first_rec_group(
- log_block, log_block_get_data_len(log_block));
- }
-
- if (log->buf_free > log->max_buf_free) {
-
- log->check_flush_or_checkpoint = TRUE;
- }
-
- checkpoint_age = ut_dulint_minus(lsn, log->last_checkpoint_lsn);
-
- if (checkpoint_age >= log->log_group_capacity) {
- /* TODO: split btr_store_big_rec_extern_fields() into small
- steps so that we can release all latches in the middle, and
- call log_free_check() to ensure we never write over log written
- after the latest checkpoint. In principle, we should split all
- big_rec operations, but other operations are smaller. */
-
- if (!log_has_printed_chkp_warning
- || difftime(time(NULL), log_last_warning_time) > 15) {
-
- log_has_printed_chkp_warning = TRUE;
- log_last_warning_time = time(NULL);
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: ERROR: the age of the last"
- " checkpoint is %lu,\n"
- "InnoDB: which exceeds the log group"
- " capacity %lu.\n"
- "InnoDB: If you are using big"
- " BLOB or TEXT rows, you must set the\n"
- "InnoDB: combined size of log files"
- " at least 10 times bigger than the\n"
- "InnoDB: largest such row.\n",
- (ulong) checkpoint_age,
- (ulong) log->log_group_capacity);
- }
- }
-
- if (checkpoint_age <= log->max_modified_age_async) {
-
- goto function_exit;
- }
-
- oldest_lsn = buf_pool_get_oldest_modification();
-
- if (ut_dulint_is_zero(oldest_lsn)
- || (ut_dulint_minus(lsn, oldest_lsn)
- > log->max_modified_age_async)
- || checkpoint_age > log->max_checkpoint_age_async) {
-
- log->check_flush_or_checkpoint = TRUE;
- }
-function_exit:
-
-#ifdef UNIV_LOG_DEBUG
- log_check_log_recs(log->buf + log->old_buf_free,
- log->buf_free - log->old_buf_free, log->old_lsn);
-#endif
-
- return(lsn);
-}
-
-#ifdef UNIV_LOG_ARCHIVE
-/**********************************************************
-Pads the current log block full with dummy log records. Used in producing
-consistent archived log files. */
-static
-void
-log_pad_current_log_block(void)
-/*===========================*/
-{
- byte b = MLOG_DUMMY_RECORD;
- ulint pad_length;
- ulint i;
- dulint lsn;
-
- /* We retrieve lsn only because otherwise gcc crashed on HP-UX */
- lsn = log_reserve_and_open(OS_FILE_LOG_BLOCK_SIZE);
-
- pad_length = OS_FILE_LOG_BLOCK_SIZE
- - (log_sys->buf_free % OS_FILE_LOG_BLOCK_SIZE)
- - LOG_BLOCK_TRL_SIZE;
-
- for (i = 0; i < pad_length; i++) {
- log_write_low(&b, 1);
- }
-
- lsn = log_sys->lsn;
-
- log_close();
- log_release();
-
- ut_a((ut_dulint_get_low(lsn) % OS_FILE_LOG_BLOCK_SIZE)
- == LOG_BLOCK_HDR_SIZE);
-}
-#endif /* UNIV_LOG_ARCHIVE */
-
-/**********************************************************
-Calculates the data capacity of a log group, when the log file headers are not
-included. */
-
-ulint
-log_group_get_capacity(
-/*===================*/
- /* out: capacity in bytes */
- log_group_t* group) /* in: log group */
-{
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- return((group->file_size - LOG_FILE_HDR_SIZE) * group->n_files);
-}
-
-/**********************************************************
-Calculates the offset within a log group, when the log file headers are not
-included. */
-UNIV_INLINE
-ulint
-log_group_calc_size_offset(
-/*=======================*/
- /* out: size offset (<= offset) */
- ulint offset, /* in: real offset within the log group */
- log_group_t* group) /* in: log group */
-{
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- return(offset - LOG_FILE_HDR_SIZE * (1 + offset / group->file_size));
-}
-
-/**********************************************************
-Calculates the offset within a log group, when the log file headers are
-included. */
-UNIV_INLINE
-ulint
-log_group_calc_real_offset(
-/*=======================*/
- /* out: real offset (>= offset) */
- ulint offset, /* in: size offset within the log group */
- log_group_t* group) /* in: log group */
-{
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- return(offset + LOG_FILE_HDR_SIZE
- * (1 + offset / (group->file_size - LOG_FILE_HDR_SIZE)));
-}
-
-/**********************************************************
-Calculates the offset of an lsn within a log group. */
-static
-ulint
-log_group_calc_lsn_offset(
-/*======================*/
- /* out: offset within the log group */
- dulint lsn, /* in: lsn, must be within 4 GB of
- group->lsn */
- log_group_t* group) /* in: log group */
-{
- dulint gr_lsn;
- ib_longlong gr_lsn_size_offset;
- ib_longlong difference;
- ib_longlong group_size;
- ib_longlong offset;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- /* If total log file size is > 2 GB we can easily get overflows
- with 32-bit integers. Use 64-bit integers instead. */
-
- gr_lsn = group->lsn;
-
- gr_lsn_size_offset = (ib_longlong)
- log_group_calc_size_offset(group->lsn_offset, group);
-
- group_size = (ib_longlong) log_group_get_capacity(group);
-
- if (ut_dulint_cmp(lsn, gr_lsn) >= 0) {
-
- difference = (ib_longlong) ut_dulint_minus(lsn, gr_lsn);
- } else {
- difference = (ib_longlong) ut_dulint_minus(gr_lsn, lsn);
-
- difference = difference % group_size;
-
- difference = group_size - difference;
- }
-
- offset = (gr_lsn_size_offset + difference) % group_size;
-
- ut_a(offset < (((ib_longlong) 1) << 32)); /* offset must be < 4 GB */
-
- /* fprintf(stderr,
- "Offset is %lu gr_lsn_offset is %lu difference is %lu\n",
- (ulint)offset,(ulint)gr_lsn_size_offset, (ulint)difference);
- */
-
- return(log_group_calc_real_offset((ulint)offset, group));
-}
-
-/***********************************************************************
-Calculates where in log files we find a specified lsn. */
-
-ulint
-log_calc_where_lsn_is(
-/*==================*/
- /* out: log file number */
- ib_longlong* log_file_offset, /* out: offset in that file
- (including the header) */
- dulint first_header_lsn, /* in: first log file start
- lsn */
- dulint lsn, /* in: lsn whose position to
- determine */
- ulint n_log_files, /* in: total number of log
- files */
- ib_longlong log_file_size) /* in: log file size
- (including the header) */
-{
- ib_longlong ib_lsn;
- ib_longlong ib_first_header_lsn;
- ib_longlong capacity = log_file_size - LOG_FILE_HDR_SIZE;
- ulint file_no;
- ib_longlong add_this_many;
-
- ib_lsn = ut_conv_dulint_to_longlong(lsn);
- ib_first_header_lsn = ut_conv_dulint_to_longlong(first_header_lsn);
-
- if (ib_lsn < ib_first_header_lsn) {
- add_this_many = 1 + (ib_first_header_lsn - ib_lsn)
- / (capacity * (ib_longlong)n_log_files);
- ib_lsn += add_this_many
- * capacity * (ib_longlong)n_log_files;
- }
-
- ut_a(ib_lsn >= ib_first_header_lsn);
-
- file_no = ((ulint)((ib_lsn - ib_first_header_lsn) / capacity))
- % n_log_files;
- *log_file_offset = (ib_lsn - ib_first_header_lsn) % capacity;
-
- *log_file_offset = *log_file_offset + LOG_FILE_HDR_SIZE;
-
- return(file_no);
-}
-
-/************************************************************
-Sets the field values in group to correspond to a given lsn. For this function
-to work, the values must already be correctly initialized to correspond to
-some lsn, for instance, a checkpoint lsn. */
-
-void
-log_group_set_fields(
-/*=================*/
- log_group_t* group, /* in: group */
- dulint lsn) /* in: lsn for which the values should be
- set */
-{
- group->lsn_offset = log_group_calc_lsn_offset(lsn, group);
- group->lsn = lsn;
-}
-
-/*********************************************************************
-Calculates the recommended highest values for lsn - last_checkpoint_lsn,
-lsn - buf_get_oldest_modification(), and lsn - max_archive_lsn_age. */
-static
-ibool
-log_calc_max_ages(void)
-/*===================*/
- /* out: error value FALSE if the smallest log group is
- too small to accommodate the number of OS threads in
- the database server */
-{
- log_group_t* group;
- ulint margin;
- ulint free;
- ibool success = TRUE;
- ulint smallest_capacity;
- ulint archive_margin;
- ulint smallest_archive_margin;
-
- ut_ad(!mutex_own(&(log_sys->mutex)));
-
- mutex_enter(&(log_sys->mutex));
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- ut_ad(group);
-
- smallest_capacity = ULINT_MAX;
- smallest_archive_margin = ULINT_MAX;
-
- while (group) {
- if (log_group_get_capacity(group) < smallest_capacity) {
-
- smallest_capacity = log_group_get_capacity(group);
- }
-
- archive_margin = log_group_get_capacity(group)
- - (group->file_size - LOG_FILE_HDR_SIZE)
- - LOG_ARCHIVE_EXTRA_MARGIN;
-
- if (archive_margin < smallest_archive_margin) {
-
- smallest_archive_margin = archive_margin;
- }
-
- group = UT_LIST_GET_NEXT(log_groups, group);
- }
-
- /* Add extra safety */
- smallest_capacity = smallest_capacity - smallest_capacity / 10;
-
- /* For each OS thread we must reserve so much free space in the
- smallest log group that it can accommodate the log entries produced
- by single query steps: running out of free log space is a serious
- system error which requires rebooting the database. */
-
- free = LOG_CHECKPOINT_FREE_PER_THREAD * (10 + srv_thread_concurrency)
- + LOG_CHECKPOINT_EXTRA_FREE;
- if (free >= smallest_capacity / 2) {
- success = FALSE;
-
- goto failure;
- } else {
- margin = smallest_capacity - free;
- }
-
- margin = ut_min(margin, log_sys->adm_checkpoint_interval);
-
- margin = margin - margin / 10; /* Add still some extra safety */
-
- log_sys->log_group_capacity = smallest_capacity;
-
- log_sys->max_modified_age_async = margin
- - margin / LOG_POOL_PREFLUSH_RATIO_ASYNC;
- log_sys->max_modified_age_sync = margin
- - margin / LOG_POOL_PREFLUSH_RATIO_SYNC;
-
- log_sys->max_checkpoint_age_async = margin - margin
- / LOG_POOL_CHECKPOINT_RATIO_ASYNC;
- log_sys->max_checkpoint_age = margin;
-
-#ifdef UNIV_LOG_ARCHIVE
- log_sys->max_archived_lsn_age = smallest_archive_margin;
-
- log_sys->max_archived_lsn_age_async = smallest_archive_margin
- - smallest_archive_margin / LOG_ARCHIVE_RATIO_ASYNC;
-#endif /* UNIV_LOG_ARCHIVE */
-failure:
- mutex_exit(&(log_sys->mutex));
-
- if (!success) {
- fprintf(stderr,
- "InnoDB: Error: ib_logfiles are too small"
- " for innodb_thread_concurrency %lu.\n"
- "InnoDB: The combined size of ib_logfiles"
- " should be bigger than\n"
- "InnoDB: 200 kB * innodb_thread_concurrency.\n"
- "InnoDB: To get mysqld to start up, set"
- " innodb_thread_concurrency in my.cnf\n"
- "InnoDB: to a lower value, for example, to 8."
- " After an ERROR-FREE shutdown\n"
- "InnoDB: of mysqld you can adjust the size of"
- " ib_logfiles, as explained in\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "adding-and-removing.html\n"
- "InnoDB: Cannot continue operation."
- " Calling exit(1).\n",
- (ulong)srv_thread_concurrency);
-
- exit(1);
- }
-
- return(success);
-}
-
-/**********************************************************
-Initializes the log. */
-
-void
-log_init(void)
-/*==========*/
-{
- byte* buf;
-
- log_sys = mem_alloc(sizeof(log_t));
-
- mutex_create(&log_sys->mutex, SYNC_LOG);
-
- mutex_enter(&(log_sys->mutex));
-
- /* Start the lsn from one log block from zero: this way every
- log record has a start lsn != zero, a fact which we will use */
-
- log_sys->lsn = LOG_START_LSN;
-
- ut_a(LOG_BUFFER_SIZE >= 16 * OS_FILE_LOG_BLOCK_SIZE);
- ut_a(LOG_BUFFER_SIZE >= 4 * UNIV_PAGE_SIZE);
-
- buf = ut_malloc(LOG_BUFFER_SIZE + OS_FILE_LOG_BLOCK_SIZE);
- log_sys->buf = ut_align(buf, OS_FILE_LOG_BLOCK_SIZE);
-
- log_sys->buf_size = LOG_BUFFER_SIZE;
-
- memset(log_sys->buf, '\0', LOG_BUFFER_SIZE);
-
- log_sys->max_buf_free = log_sys->buf_size / LOG_BUF_FLUSH_RATIO
- - LOG_BUF_FLUSH_MARGIN;
- log_sys->check_flush_or_checkpoint = TRUE;
- UT_LIST_INIT(log_sys->log_groups);
-
- log_sys->n_log_ios = 0;
-
- log_sys->n_log_ios_old = log_sys->n_log_ios;
- log_sys->last_printout_time = time(NULL);
- /*----------------------------*/
-
- log_sys->buf_next_to_write = 0;
-
- log_sys->write_lsn = ut_dulint_zero;
- log_sys->current_flush_lsn = ut_dulint_zero;
- log_sys->flushed_to_disk_lsn = ut_dulint_zero;
-
- log_sys->written_to_some_lsn = log_sys->lsn;
- log_sys->written_to_all_lsn = log_sys->lsn;
-
- log_sys->n_pending_writes = 0;
-
- log_sys->no_flush_event = os_event_create(NULL);
-
- os_event_set(log_sys->no_flush_event);
-
- log_sys->one_flushed_event = os_event_create(NULL);
-
- os_event_set(log_sys->one_flushed_event);
-
- /*----------------------------*/
- log_sys->adm_checkpoint_interval = ULINT_MAX;
-
- log_sys->next_checkpoint_no = ut_dulint_zero;
- log_sys->last_checkpoint_lsn = log_sys->lsn;
- log_sys->n_pending_checkpoint_writes = 0;
-
- rw_lock_create(&log_sys->checkpoint_lock, SYNC_NO_ORDER_CHECK);
-
- log_sys->checkpoint_buf
- = ut_align(mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE),
- OS_FILE_LOG_BLOCK_SIZE);
- memset(log_sys->checkpoint_buf, '\0', OS_FILE_LOG_BLOCK_SIZE);
- /*----------------------------*/
-
-#ifdef UNIV_LOG_ARCHIVE
- /* Under MySQL, log archiving is always off */
- log_sys->archiving_state = LOG_ARCH_OFF;
- log_sys->archived_lsn = log_sys->lsn;
- log_sys->next_archived_lsn = ut_dulint_zero;
-
- log_sys->n_pending_archive_ios = 0;
-
- rw_lock_create(&log_sys->archive_lock, SYNC_NO_ORDER_CHECK);
-
- log_sys->archive_buf = NULL;
-
- /* ut_align(
- ut_malloc(LOG_ARCHIVE_BUF_SIZE
- + OS_FILE_LOG_BLOCK_SIZE),
- OS_FILE_LOG_BLOCK_SIZE); */
- log_sys->archive_buf_size = 0;
-
- /* memset(log_sys->archive_buf, '\0', LOG_ARCHIVE_BUF_SIZE); */
-
- log_sys->archiving_on = os_event_create(NULL);
-#endif /* UNIV_LOG_ARCHIVE */
-
- /*----------------------------*/
-
- log_block_init(log_sys->buf, log_sys->lsn);
- log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
-
- log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
- log_sys->lsn = ut_dulint_add(LOG_START_LSN, LOG_BLOCK_HDR_SIZE);
-
- mutex_exit(&(log_sys->mutex));
-
-#ifdef UNIV_LOG_DEBUG
- recv_sys_create();
- recv_sys_init(FALSE, buf_pool_get_curr_size());
-
- recv_sys->parse_start_lsn = log_sys->lsn;
- recv_sys->scanned_lsn = log_sys->lsn;
- recv_sys->scanned_checkpoint_no = 0;
- recv_sys->recovered_lsn = log_sys->lsn;
- recv_sys->limit_lsn = ut_dulint_max;
-#endif
-}
-
-/**********************************************************************
-Inits a log group to the log system. */
-
-void
-log_group_init(
-/*===========*/
- ulint id, /* in: group id */
- ulint n_files, /* in: number of log files */
- ulint file_size, /* in: log file size in bytes */
- ulint space_id, /* in: space id of the file space
- which contains the log files of this
- group */
- ulint archive_space_id __attribute__((unused)))
- /* in: space id of the file space
- which contains some archived log
- files for this group; currently, only
- for the first log group this is
- used */
-{
- ulint i;
-
- log_group_t* group;
-
- group = mem_alloc(sizeof(log_group_t));
-
- group->id = id;
- group->n_files = n_files;
- group->file_size = file_size;
- group->space_id = space_id;
- group->state = LOG_GROUP_OK;
- group->lsn = LOG_START_LSN;
- group->lsn_offset = LOG_FILE_HDR_SIZE;
- group->n_pending_writes = 0;
-
- group->file_header_bufs = mem_alloc(sizeof(byte*) * n_files);
-#ifdef UNIV_LOG_ARCHIVE
- group->archive_file_header_bufs = mem_alloc(sizeof(byte*) * n_files);
-#endif /* UNIV_LOG_ARCHIVE */
-
- for (i = 0; i < n_files; i++) {
- *(group->file_header_bufs + i) = ut_align(
- mem_alloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE),
- OS_FILE_LOG_BLOCK_SIZE);
-
- memset(*(group->file_header_bufs + i), '\0',
- LOG_FILE_HDR_SIZE);
-
-#ifdef UNIV_LOG_ARCHIVE
- *(group->archive_file_header_bufs + i) = ut_align(
- mem_alloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE),
- OS_FILE_LOG_BLOCK_SIZE);
- memset(*(group->archive_file_header_bufs + i), '\0',
- LOG_FILE_HDR_SIZE);
-#endif /* UNIV_LOG_ARCHIVE */
- }
-
-#ifdef UNIV_LOG_ARCHIVE
- group->archive_space_id = archive_space_id;
-
- group->archived_file_no = 0;
- group->archived_offset = 0;
-#endif /* UNIV_LOG_ARCHIVE */
-
- group->checkpoint_buf = ut_align(
- mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE), OS_FILE_LOG_BLOCK_SIZE);
-
- memset(group->checkpoint_buf, '\0', OS_FILE_LOG_BLOCK_SIZE);
-
- UT_LIST_ADD_LAST(log_groups, log_sys->log_groups, group);
-
- ut_a(log_calc_max_ages());
-}
-
-/**********************************************************************
-Does the unlockings needed in flush i/o completion. */
-UNIV_INLINE
-void
-log_flush_do_unlocks(
-/*=================*/
- ulint code) /* in: any ORed combination of LOG_UNLOCK_FLUSH_LOCK
- and LOG_UNLOCK_NONE_FLUSHED_LOCK */
-{
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- /* NOTE that we must own the log mutex when doing the setting of the
- events: this is because transactions will wait for these events to
- be set, and at that moment the log flush they were waiting for must
- have ended. If the log mutex were not reserved here, the i/o-thread
- calling this function might be preempted for a while, and when it
- resumed execution, it might be that a new flush had been started, and
- this function would erroneously signal the NEW flush as completed.
- Thus, the changes in the state of these events are performed
- atomically in conjunction with the changes in the state of
- log_sys->n_pending_writes etc. */
-
- if (code & LOG_UNLOCK_NONE_FLUSHED_LOCK) {
- os_event_set(log_sys->one_flushed_event);
- }
-
- if (code & LOG_UNLOCK_FLUSH_LOCK) {
- os_event_set(log_sys->no_flush_event);
- }
-}
-
-/**********************************************************************
-Checks if a flush is completed for a log group and does the completion
-routine if yes. */
-UNIV_INLINE
-ulint
-log_group_check_flush_completion(
-/*=============================*/
- /* out: LOG_UNLOCK_NONE_FLUSHED_LOCK or 0 */
- log_group_t* group) /* in: log group */
-{
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- if (!log_sys->one_flushed && group->n_pending_writes == 0) {
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "Log flushed first to group %lu\n",
- (ulong) group->id);
- }
-#endif /* UNIV_DEBUG */
- log_sys->written_to_some_lsn = log_sys->write_lsn;
- log_sys->one_flushed = TRUE;
-
- return(LOG_UNLOCK_NONE_FLUSHED_LOCK);
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes && (group->n_pending_writes == 0)) {
-
- fprintf(stderr, "Log flushed to group %lu\n",
- (ulong) group->id);
- }
-#endif /* UNIV_DEBUG */
- return(0);
-}
-
-/**********************************************************
-Checks if a flush is completed and does the completion routine if yes. */
-static
-ulint
-log_sys_check_flush_completion(void)
-/*================================*/
- /* out: LOG_UNLOCK_FLUSH_LOCK or 0 */
-{
- ulint move_start;
- ulint move_end;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- if (log_sys->n_pending_writes == 0) {
-
- log_sys->written_to_all_lsn = log_sys->write_lsn;
- log_sys->buf_next_to_write = log_sys->write_end_offset;
-
- if (log_sys->write_end_offset > log_sys->max_buf_free / 2) {
- /* Move the log buffer content to the start of the
- buffer */
-
- move_start = ut_calc_align_down(
- log_sys->write_end_offset,
- OS_FILE_LOG_BLOCK_SIZE);
- move_end = ut_calc_align(log_sys->buf_free,
- OS_FILE_LOG_BLOCK_SIZE);
-
- ut_memmove(log_sys->buf, log_sys->buf + move_start,
- move_end - move_start);
- log_sys->buf_free -= move_start;
-
- log_sys->buf_next_to_write -= move_start;
- }
-
- return(LOG_UNLOCK_FLUSH_LOCK);
- }
-
- return(0);
-}
-
-/**********************************************************
-Completes an i/o to a log file. */
-
-void
-log_io_complete(
-/*============*/
- log_group_t* group) /* in: log group or a dummy pointer */
-{
- ulint unlock;
-
-#ifdef UNIV_LOG_ARCHIVE
- if ((byte*)group == &log_archive_io) {
- /* It was an archive write */
-
- log_io_complete_archive();
-
- return;
- }
-#endif /* UNIV_LOG_ARCHIVE */
-
- if ((ulint)group & 0x1UL) {
- /* It was a checkpoint write */
- group = (log_group_t*)((ulint)group - 1);
-
- if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
- && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
-
- fil_flush(group->space_id);
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "Checkpoint info written to group %lu\n",
- group->id);
- }
-#endif /* UNIV_DEBUG */
- log_io_complete_checkpoint();
-
- return;
- }
-
- ut_error; /* We currently use synchronous writing of the
- logs and cannot end up here! */
-
- if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
- && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
- && srv_flush_log_at_trx_commit != 2) {
-
- fil_flush(group->space_id);
- }
-
- mutex_enter(&(log_sys->mutex));
-
- ut_a(group->n_pending_writes > 0);
- ut_a(log_sys->n_pending_writes > 0);
-
- group->n_pending_writes--;
- log_sys->n_pending_writes--;
-
- unlock = log_group_check_flush_completion(group);
- unlock = unlock | log_sys_check_flush_completion();
-
- log_flush_do_unlocks(unlock);
-
- mutex_exit(&(log_sys->mutex));
-}
-
-/**********************************************************
-Writes a log file header to a log file space. */
-static
-void
-log_group_file_header_flush(
-/*========================*/
- log_group_t* group, /* in: log group */
- ulint nth_file, /* in: header to the nth file in the
- log file space */
- dulint start_lsn) /* in: log file data starts at this
- lsn */
-{
- byte* buf;
- ulint dest_offset;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
- ut_a(nth_file < group->n_files);
-
- buf = *(group->file_header_bufs + nth_file);
-
- mach_write_to_4(buf + LOG_GROUP_ID, group->id);
- mach_write_to_8(buf + LOG_FILE_START_LSN, start_lsn);
-
- /* Wipe over possible label of ibbackup --restore */
- memcpy(buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, " ", 4);
-
- dest_offset = nth_file * group->file_size;
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "Writing log file header to group %lu file %lu\n",
- (ulong) group->id, (ulong) nth_file);
- }
-#endif /* UNIV_DEBUG */
- if (log_do_write) {
- log_sys->n_log_ios++;
-
- srv_os_log_pending_writes++;
-
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->space_id,
- dest_offset / UNIV_PAGE_SIZE,
- dest_offset % UNIV_PAGE_SIZE,
- OS_FILE_LOG_BLOCK_SIZE,
- buf, group);
-
- srv_os_log_pending_writes--;
- }
-}
-
-/**********************************************************
-Stores a 4-byte checksum to the trailer checksum field of a log block
-before writing it to a log file. This checksum is used in recovery to
-check the consistency of a log block. */
-static
-void
-log_block_store_checksum(
-/*=====================*/
- byte* block) /* in/out: pointer to a log block */
-{
- log_block_set_checksum(block, log_block_calc_checksum(block));
-}
-
-/**********************************************************
-Writes a buffer to a log file group. */
-
-void
-log_group_write_buf(
-/*================*/
- log_group_t* group, /* in: log group */
- byte* buf, /* in: buffer */
- ulint len, /* in: buffer len; must be divisible
- by OS_FILE_LOG_BLOCK_SIZE */
- dulint start_lsn, /* in: start lsn of the buffer; must
- be divisible by
- OS_FILE_LOG_BLOCK_SIZE */
- ulint new_data_offset)/* in: start offset of new data in
- buf: this parameter is used to decide
- if we have to write a new log file
- header */
-{
- ulint write_len;
- ibool write_header;
- ulint next_offset;
- ulint i;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
- ut_a(len % OS_FILE_LOG_BLOCK_SIZE == 0);
- ut_a(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
-
- if (new_data_offset == 0) {
- write_header = TRUE;
- } else {
- write_header = FALSE;
- }
-loop:
- if (len == 0) {
-
- return;
- }
-
- next_offset = log_group_calc_lsn_offset(start_lsn, group);
-
- if ((next_offset % group->file_size == LOG_FILE_HDR_SIZE)
- && write_header) {
- /* We start to write a new log file instance in the group */
-
- log_group_file_header_flush(group,
- next_offset / group->file_size,
- start_lsn);
- srv_os_log_written+= OS_FILE_LOG_BLOCK_SIZE;
- srv_log_writes++;
- }
-
- if ((next_offset % group->file_size) + len > group->file_size) {
-
- write_len = group->file_size
- - (next_offset % group->file_size);
- } else {
- write_len = len;
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
-
- fprintf(stderr,
- "Writing log file segment to group %lu"
- " offset %lu len %lu\n"
- "start lsn %lu %lu\n"
- "First block n:o %lu last block n:o %lu\n",
- (ulong) group->id, (ulong) next_offset,
- (ulong) write_len,
- (ulong) ut_dulint_get_high(start_lsn),
- (ulong) ut_dulint_get_low(start_lsn),
- (ulong) log_block_get_hdr_no(buf),
- (ulong) log_block_get_hdr_no(
- buf + write_len - OS_FILE_LOG_BLOCK_SIZE));
- ut_a(log_block_get_hdr_no(buf)
- == log_block_convert_lsn_to_no(start_lsn));
-
- for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) {
-
- ut_a(log_block_get_hdr_no(buf) + i
- == log_block_get_hdr_no(
- buf + i * OS_FILE_LOG_BLOCK_SIZE));
- }
- }
-#endif /* UNIV_DEBUG */
- /* Calculate the checksums for each log block and write them to
- the trailer fields of the log blocks */
-
- for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) {
- log_block_store_checksum(buf + i * OS_FILE_LOG_BLOCK_SIZE);
- }
-
- if (log_do_write) {
- log_sys->n_log_ios++;
-
- srv_os_log_pending_writes++;
-
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->space_id,
- next_offset / UNIV_PAGE_SIZE,
- next_offset % UNIV_PAGE_SIZE, write_len, buf, group);
-
- srv_os_log_pending_writes--;
-
- srv_os_log_written+= write_len;
- srv_log_writes++;
- }
-
- if (write_len < len) {
- start_lsn = ut_dulint_add(start_lsn, write_len);
- len -= write_len;
- buf += write_len;
-
- write_header = TRUE;
-
- goto loop;
- }
-}
-
-/**********************************************************
-This function is called, e.g., when a transaction wants to commit. It checks
-that the log has been written to the log file up to the last log entry written
-by the transaction. If there is a flush running, it waits and checks if the
-flush flushed enough. If not, starts a new flush. */
-
-void
-log_write_up_to(
-/*============*/
- dulint lsn, /* in: log sequence number up to which the log should
- be written, ut_dulint_max if not specified */
- ulint wait, /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
- or LOG_WAIT_ALL_GROUPS */
- ibool flush_to_disk)
- /* in: TRUE if we want the written log also to be
- flushed to disk */
-{
- log_group_t* group;
- ulint start_offset;
- ulint end_offset;
- ulint area_start;
- ulint area_end;
-#ifdef UNIV_DEBUG
- ulint loop_count = 0;
-#endif /* UNIV_DEBUG */
- ulint unlock;
-
- if (recv_no_ibuf_operations) {
- /* Recovery is running and no operations on the log files are
- allowed yet (the variable name .._no_ibuf_.. is misleading) */
-
- return;
- }
-
-loop:
-#ifdef UNIV_DEBUG
- loop_count++;
-
- ut_ad(loop_count < 5);
-
-# if 0
- if (loop_count > 2) {
- fprintf(stderr, "Log loop count %lu\n", loop_count);
- }
-# endif
-#endif
-
- mutex_enter(&(log_sys->mutex));
-
- if (flush_to_disk
- && ut_dulint_cmp(log_sys->flushed_to_disk_lsn, lsn) >= 0) {
-
- mutex_exit(&(log_sys->mutex));
-
- return;
- }
-
- if (!flush_to_disk
- && (ut_dulint_cmp(log_sys->written_to_all_lsn, lsn) >= 0
- || (ut_dulint_cmp(log_sys->written_to_some_lsn, lsn)
- >= 0
- && wait != LOG_WAIT_ALL_GROUPS))) {
-
- mutex_exit(&(log_sys->mutex));
-
- return;
- }
-
- if (log_sys->n_pending_writes > 0) {
- /* A write (+ possibly flush to disk) is running */
-
- if (flush_to_disk
- && ut_dulint_cmp(log_sys->current_flush_lsn, lsn)
- >= 0) {
- /* The write + flush will write enough: wait for it to
- complete */
-
- goto do_waits;
- }
-
- if (!flush_to_disk
- && ut_dulint_cmp(log_sys->write_lsn, lsn) >= 0) {
- /* The write will write enough: wait for it to
- complete */
-
- goto do_waits;
- }
-
- mutex_exit(&(log_sys->mutex));
-
- /* Wait for the write to complete and try to start a new
- write */
-
- os_event_wait(log_sys->no_flush_event);
-
- goto loop;
- }
-
- if (!flush_to_disk
- && log_sys->buf_free == log_sys->buf_next_to_write) {
- /* Nothing to write and no flush to disk requested */
-
- mutex_exit(&(log_sys->mutex));
-
- return;
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "Writing log from %lu %lu up to lsn %lu %lu\n",
- (ulong) ut_dulint_get_high(
- log_sys->written_to_all_lsn),
- (ulong) ut_dulint_get_low(
- log_sys->written_to_all_lsn),
- (ulong) ut_dulint_get_high(log_sys->lsn),
- (ulong) ut_dulint_get_low(log_sys->lsn));
- }
-#endif /* UNIV_DEBUG */
- log_sys->n_pending_writes++;
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
- group->n_pending_writes++; /* We assume here that we have only
- one log group! */
-
- os_event_reset(log_sys->no_flush_event);
- os_event_reset(log_sys->one_flushed_event);
-
- start_offset = log_sys->buf_next_to_write;
- end_offset = log_sys->buf_free;
-
- area_start = ut_calc_align_down(start_offset, OS_FILE_LOG_BLOCK_SIZE);
- area_end = ut_calc_align(end_offset, OS_FILE_LOG_BLOCK_SIZE);
-
- ut_ad(area_end - area_start > 0);
-
- log_sys->write_lsn = log_sys->lsn;
-
- if (flush_to_disk) {
- log_sys->current_flush_lsn = log_sys->lsn;
- }
-
- log_sys->one_flushed = FALSE;
-
- log_block_set_flush_bit(log_sys->buf + area_start, TRUE);
- log_block_set_checkpoint_no(
- log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
- log_sys->next_checkpoint_no);
-
- /* Copy the last, incompletely written, log block a log block length
- up, so that when the flush operation writes from the log buffer, the
- segment to write will not be changed by writers to the log */
-
- ut_memcpy(log_sys->buf + area_end,
- log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
- OS_FILE_LOG_BLOCK_SIZE);
-
- log_sys->buf_free += OS_FILE_LOG_BLOCK_SIZE;
- log_sys->write_end_offset = log_sys->buf_free;
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- /* Do the write to the log files */
-
- while (group) {
- log_group_write_buf(
- group, log_sys->buf + area_start,
- area_end - area_start,
- ut_dulint_align_down(log_sys->written_to_all_lsn,
- OS_FILE_LOG_BLOCK_SIZE),
- start_offset - area_start);
-
- log_group_set_fields(group, log_sys->write_lsn);
-
- group = UT_LIST_GET_NEXT(log_groups, group);
- }
-
- mutex_exit(&(log_sys->mutex));
-
- if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
- /* O_DSYNC means the OS did not buffer the log file at all:
- so we have also flushed to disk what we have written */
-
- log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
-
- } else if (flush_to_disk) {
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- fil_flush(group->space_id);
- log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
- }
-
- mutex_enter(&(log_sys->mutex));
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- ut_a(group->n_pending_writes == 1);
- ut_a(log_sys->n_pending_writes == 1);
-
- group->n_pending_writes--;
- log_sys->n_pending_writes--;
-
- unlock = log_group_check_flush_completion(group);
- unlock = unlock | log_sys_check_flush_completion();
-
- log_flush_do_unlocks(unlock);
-
- mutex_exit(&(log_sys->mutex));
-
- return;
-
-do_waits:
- mutex_exit(&(log_sys->mutex));
-
- if (wait == LOG_WAIT_ONE_GROUP) {
- os_event_wait(log_sys->one_flushed_event);
- } else if (wait == LOG_WAIT_ALL_GROUPS) {
- os_event_wait(log_sys->no_flush_event);
- } else {
- ut_ad(wait == LOG_NO_WAIT);
- }
-}
-
-/********************************************************************
-Does a syncronous flush of the log buffer to disk. */
-
-void
-log_buffer_flush_to_disk(void)
-/*==========================*/
-{
- dulint lsn;
-
- mutex_enter(&(log_sys->mutex));
-
- lsn = log_sys->lsn;
-
- mutex_exit(&(log_sys->mutex));
-
- log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS, TRUE);
-}
-
-/********************************************************************
-Flush the log buffer. Force it to disk depending on the value of
-innodb_flush_log_at_trx_commit. */
-
-void
-log_buffer_flush_maybe_sync(void)
-/*==========================*/
-{
- dulint lsn;
-
- mutex_enter(&(log_sys->mutex));
-
- lsn = log_sys->lsn;
-
- mutex_exit(&(log_sys->mutex));
-
- /* Force log buffer to disk when innodb_flush_log_at_trx_commit = 1. */
- log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS,
- srv_flush_log_at_trx_commit == 1 ? TRUE : FALSE);
-}
-/********************************************************************
-Tries to establish a big enough margin of free space in the log buffer, such
-that a new log entry can be catenated without an immediate need for a flush. */
-static
-void
-log_flush_margin(void)
-/*==================*/
-{
- ibool do_flush = FALSE;
- log_t* log = log_sys;
- dulint lsn;
-
- mutex_enter(&(log->mutex));
-
- if (log->buf_free > log->max_buf_free) {
-
- if (log->n_pending_writes > 0) {
- /* A flush is running: hope that it will provide enough
- free space */
- } else {
- do_flush = TRUE;
- lsn = log->lsn;
- }
- }
-
- mutex_exit(&(log->mutex));
-
- if (do_flush) {
- log_write_up_to(lsn, LOG_NO_WAIT, FALSE);
- }
-}
-
-/********************************************************************
-Advances the smallest lsn for which there are unflushed dirty blocks in the
-buffer pool. NOTE: this function may only be called if the calling thread owns
-no synchronization objects! */
-
-ibool
-log_preflush_pool_modified_pages(
-/*=============================*/
- /* out: FALSE if there was a flush batch of
- the same type running, which means that we
- could not start this flush batch */
- dulint new_oldest, /* in: try to advance oldest_modified_lsn
- at least to this lsn */
- ibool sync) /* in: TRUE if synchronous operation is
- desired */
-{
- ulint n_pages;
-
- if (recv_recovery_on) {
- /* If the recovery is running, we must first apply all
- log records to their respective file pages to get the
- right modify lsn values to these pages: otherwise, there
- might be pages on disk which are not yet recovered to the
- current lsn, and even after calling this function, we could
- not know how up-to-date the disk version of the database is,
- and we could not make a new checkpoint on the basis of the
- info on the buffer pool only. */
-
- recv_apply_hashed_log_recs(TRUE);
- }
-
- n_pages = buf_flush_batch(BUF_FLUSH_LIST, ULINT_MAX, new_oldest);
-
- if (sync) {
- buf_flush_wait_batch_end(BUF_FLUSH_LIST);
- }
-
- if (n_pages == ULINT_UNDEFINED) {
-
- return(FALSE);
- }
-
- return(TRUE);
-}
-
-/**********************************************************
-Completes a checkpoint. */
-static
-void
-log_complete_checkpoint(void)
-/*=========================*/
-{
- ut_ad(mutex_own(&(log_sys->mutex)));
- ut_ad(log_sys->n_pending_checkpoint_writes == 0);
-
- log_sys->next_checkpoint_no
- = ut_dulint_add(log_sys->next_checkpoint_no, 1);
-
- log_sys->last_checkpoint_lsn = log_sys->next_checkpoint_lsn;
-
- rw_lock_x_unlock_gen(&(log_sys->checkpoint_lock), LOG_CHECKPOINT);
-}
-
-/**********************************************************
-Completes an asynchronous checkpoint info write i/o to a log file. */
-static
-void
-log_io_complete_checkpoint(void)
-/*============================*/
-{
- mutex_enter(&(log_sys->mutex));
-
- ut_ad(log_sys->n_pending_checkpoint_writes > 0);
-
- log_sys->n_pending_checkpoint_writes--;
-
- if (log_sys->n_pending_checkpoint_writes == 0) {
- log_complete_checkpoint();
- }
-
- mutex_exit(&(log_sys->mutex));
-}
-
-/***********************************************************************
-Writes info to a checkpoint about a log group. */
-static
-void
-log_checkpoint_set_nth_group_info(
-/*==============================*/
- byte* buf, /* in: buffer for checkpoint info */
- ulint n, /* in: nth slot */
- ulint file_no,/* in: archived file number */
- ulint offset) /* in: archived file offset */
-{
- ut_ad(n < LOG_MAX_N_GROUPS);
-
- mach_write_to_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
- + 8 * n + LOG_CHECKPOINT_ARCHIVED_FILE_NO, file_no);
- mach_write_to_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
- + 8 * n + LOG_CHECKPOINT_ARCHIVED_OFFSET, offset);
-}
-
-/***********************************************************************
-Gets info from a checkpoint about a log group. */
-
-void
-log_checkpoint_get_nth_group_info(
-/*==============================*/
- byte* buf, /* in: buffer containing checkpoint info */
- ulint n, /* in: nth slot */
- ulint* file_no,/* out: archived file number */
- ulint* offset) /* out: archived file offset */
-{
- ut_ad(n < LOG_MAX_N_GROUPS);
-
- *file_no = mach_read_from_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
- + 8 * n + LOG_CHECKPOINT_ARCHIVED_FILE_NO);
- *offset = mach_read_from_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
- + 8 * n + LOG_CHECKPOINT_ARCHIVED_OFFSET);
-}
-
-/**********************************************************
-Writes the checkpoint info to a log group header. */
-static
-void
-log_group_checkpoint(
-/*=================*/
- log_group_t* group) /* in: log group */
-{
- log_group_t* group2;
-#ifdef UNIV_LOG_ARCHIVE
- dulint archived_lsn;
- dulint next_archived_lsn;
-#endif /* UNIV_LOG_ARCHIVE */
- ulint write_offset;
- ulint fold;
- byte* buf;
- ulint i;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-#if LOG_CHECKPOINT_SIZE > OS_FILE_LOG_BLOCK_SIZE
-# error "LOG_CHECKPOINT_SIZE > OS_FILE_LOG_BLOCK_SIZE"
-#endif
-
- buf = group->checkpoint_buf;
-
- mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys->next_checkpoint_no);
- mach_write_to_8(buf + LOG_CHECKPOINT_LSN,
- log_sys->next_checkpoint_lsn);
-
- mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET,
- log_group_calc_lsn_offset(
- log_sys->next_checkpoint_lsn, group));
-
- mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, log_sys->buf_size);
-
-#ifdef UNIV_LOG_ARCHIVE
- if (log_sys->archiving_state == LOG_ARCH_OFF) {
- archived_lsn = ut_dulint_max;
- } else {
- archived_lsn = log_sys->archived_lsn;
-
- if (0 != ut_dulint_cmp(archived_lsn,
- log_sys->next_archived_lsn)) {
- next_archived_lsn = log_sys->next_archived_lsn;
- /* For debugging only */
- }
- }
-
- mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, archived_lsn);
-#else /* UNIV_LOG_ARCHIVE */
- mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, ut_dulint_max);
-#endif /* UNIV_LOG_ARCHIVE */
-
- for (i = 0; i < LOG_MAX_N_GROUPS; i++) {
- log_checkpoint_set_nth_group_info(buf, i, 0, 0);
- }
-
- group2 = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- while (group2) {
- log_checkpoint_set_nth_group_info(buf, group2->id,
-#ifdef UNIV_LOG_ARCHIVE
- group2->archived_file_no,
- group2->archived_offset
-#else /* UNIV_LOG_ARCHIVE */
- 0, 0
-#endif /* UNIV_LOG_ARCHIVE */
- );
-
- group2 = UT_LIST_GET_NEXT(log_groups, group2);
- }
-
- fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
- mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_1, fold);
-
- fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
- LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
- mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_2, fold);
-
- /* Starting from InnoDB-3.23.50, we also write info on allocated
- size in the tablespace */
-
- mach_write_to_4(buf + LOG_CHECKPOINT_FSP_FREE_LIMIT,
- log_fsp_current_free_limit);
-
- mach_write_to_4(buf + LOG_CHECKPOINT_FSP_MAGIC_N,
- LOG_CHECKPOINT_FSP_MAGIC_N_VAL);
-
- /* We alternate the physical place of the checkpoint info in the first
- log file */
-
- if (ut_dulint_get_low(log_sys->next_checkpoint_no) % 2 == 0) {
- write_offset = LOG_CHECKPOINT_1;
- } else {
- write_offset = LOG_CHECKPOINT_2;
- }
-
- if (log_do_write) {
- if (log_sys->n_pending_checkpoint_writes == 0) {
-
- rw_lock_x_lock_gen(&(log_sys->checkpoint_lock),
- LOG_CHECKPOINT);
- }
-
- log_sys->n_pending_checkpoint_writes++;
-
- log_sys->n_log_ios++;
-
- /* We send as the last parameter the group machine address
- added with 1, as we want to distinguish between a normal log
- file write and a checkpoint field write */
-
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, FALSE, group->space_id,
- write_offset / UNIV_PAGE_SIZE,
- write_offset % UNIV_PAGE_SIZE,
- OS_FILE_LOG_BLOCK_SIZE,
- buf, ((byte*)group + 1));
-
- ut_ad(((ulint)group & 0x1UL) == 0);
- }
-}
-
-/**********************************************************
-Writes info to a buffer of a log group when log files are created in
-backup restoration. */
-
-void
-log_reset_first_header_and_checkpoint(
-/*==================================*/
- byte* hdr_buf,/* in: buffer which will be written to the start
- of the first log file */
- dulint start) /* in: lsn of the start of the first log file;
- we pretend that there is a checkpoint at
- start + LOG_BLOCK_HDR_SIZE */
-{
- ulint fold;
- byte* buf;
- dulint lsn;
-
- mach_write_to_4(hdr_buf + LOG_GROUP_ID, 0);
- mach_write_to_8(hdr_buf + LOG_FILE_START_LSN, start);
-
- lsn = ut_dulint_add(start, LOG_BLOCK_HDR_SIZE);
-
- /* Write the label of ibbackup --restore */
- strcpy((char*) hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
- "ibbackup ");
- ut_sprintf_timestamp((char*) hdr_buf
- + (LOG_FILE_WAS_CREATED_BY_HOT_BACKUP
- + (sizeof "ibbackup ") - 1));
- buf = hdr_buf + LOG_CHECKPOINT_1;
-
- mach_write_to_8(buf + LOG_CHECKPOINT_NO, ut_dulint_zero);
- mach_write_to_8(buf + LOG_CHECKPOINT_LSN, lsn);
-
- mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET,
- LOG_FILE_HDR_SIZE + LOG_BLOCK_HDR_SIZE);
-
- mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, 2 * 1024 * 1024);
-
- mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, ut_dulint_max);
-
- fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
- mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_1, fold);
-
- fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
- LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
- mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_2, fold);
-
- /* Starting from InnoDB-3.23.50, we should also write info on
- allocated size in the tablespace, but unfortunately we do not
- know it here */
-}
-
-/**********************************************************
-Reads a checkpoint info from a log group header to log_sys->checkpoint_buf. */
-
-void
-log_group_read_checkpoint_info(
-/*===========================*/
- log_group_t* group, /* in: log group */
- ulint field) /* in: LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 */
-{
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- log_sys->n_log_ios++;
-
- fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, group->space_id,
- field / UNIV_PAGE_SIZE, field % UNIV_PAGE_SIZE,
- OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL);
-}
-
-/**********************************************************
-Writes checkpoint info to groups. */
-
-void
-log_groups_write_checkpoint_info(void)
-/*==================================*/
-{
- log_group_t* group;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- while (group) {
- log_group_checkpoint(group);
-
- group = UT_LIST_GET_NEXT(log_groups, group);
- }
-}
-
-/**********************************************************
-Makes a checkpoint. Note that this function does not flush dirty
-blocks from the buffer pool: it only checks what is lsn of the oldest
-modification in the pool, and writes information about the lsn in
-log files. Use log_make_checkpoint_at to flush also the pool. */
-
-ibool
-log_checkpoint(
-/*===========*/
- /* out: TRUE if success, FALSE if a checkpoint
- write was already running */
- ibool sync, /* in: TRUE if synchronous operation is
- desired */
- ibool write_always) /* in: the function normally checks if the
- the new checkpoint would have a greater
- lsn than the previous one: if not, then no
- physical write is done; by setting this
- parameter TRUE, a physical write will always be
- made to log files */
-{
- dulint oldest_lsn;
-
- if (recv_recovery_is_on()) {
- recv_apply_hashed_log_recs(TRUE);
- }
-
- if (srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
- fil_flush_file_spaces(FIL_TABLESPACE);
- }
-
- mutex_enter(&(log_sys->mutex));
-
- oldest_lsn = log_buf_pool_get_oldest_modification();
-
- mutex_exit(&(log_sys->mutex));
-
- /* Because log also contains headers and dummy log records,
- if the buffer pool contains no dirty buffers, oldest_lsn
- gets the value log_sys->lsn from the previous function,
- and we must make sure that the log is flushed up to that
- lsn. If there are dirty buffers in the buffer pool, then our
- write-ahead-logging algorithm ensures that the log has been flushed
- up to oldest_lsn. */
-
- log_write_up_to(oldest_lsn, LOG_WAIT_ALL_GROUPS, TRUE);
-
- mutex_enter(&(log_sys->mutex));
-
- if (!write_always
- && ut_dulint_cmp(log_sys->last_checkpoint_lsn, oldest_lsn) >= 0) {
-
- mutex_exit(&(log_sys->mutex));
-
- return(TRUE);
- }
-
- ut_ad(ut_dulint_cmp(log_sys->written_to_all_lsn, oldest_lsn) >= 0);
-
- if (log_sys->n_pending_checkpoint_writes > 0) {
- /* A checkpoint write is running */
-
- mutex_exit(&(log_sys->mutex));
-
- if (sync) {
- /* Wait for the checkpoint write to complete */
- rw_lock_s_lock(&(log_sys->checkpoint_lock));
- rw_lock_s_unlock(&(log_sys->checkpoint_lock));
- }
-
- return(FALSE);
- }
-
- log_sys->next_checkpoint_lsn = oldest_lsn;
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr, "Making checkpoint no %lu at lsn %lu %lu\n",
- (ulong) ut_dulint_get_low(log_sys->next_checkpoint_no),
- (ulong) ut_dulint_get_high(oldest_lsn),
- (ulong) ut_dulint_get_low(oldest_lsn));
- }
-#endif /* UNIV_DEBUG */
-
- log_groups_write_checkpoint_info();
-
- mutex_exit(&(log_sys->mutex));
-
- if (sync) {
- /* Wait for the checkpoint write to complete */
- rw_lock_s_lock(&(log_sys->checkpoint_lock));
- rw_lock_s_unlock(&(log_sys->checkpoint_lock));
- }
-
- return(TRUE);
-}
-
-/********************************************************************
-Makes a checkpoint at a given lsn or later. */
-
-void
-log_make_checkpoint_at(
-/*===================*/
- dulint lsn, /* in: make a checkpoint at this or a later
- lsn, if ut_dulint_max, makes a checkpoint at
- the latest lsn */
- ibool write_always) /* in: the function normally checks if the
- the new checkpoint would have a greater
- lsn than the previous one: if not, then no
- physical write is done; by setting this
- parameter TRUE, a physical write will always be
- made to log files */
-{
- ibool success;
-
- /* Preflush pages synchronously */
-
- success = FALSE;
-
- while (!success) {
- success = log_preflush_pool_modified_pages(lsn, TRUE);
- }
-
- success = FALSE;
-
- while (!success) {
- success = log_checkpoint(TRUE, write_always);
- }
-}
-
-/********************************************************************
-Tries to establish a big enough margin of free space in the log groups, such
-that a new log entry can be catenated without an immediate need for a
-checkpoint. NOTE: this function may only be called if the calling thread
-owns no synchronization objects! */
-static
-void
-log_checkpoint_margin(void)
-/*=======================*/
-{
- log_t* log = log_sys;
- ulint age;
- ulint checkpoint_age;
- ulint advance;
- dulint oldest_lsn;
- ibool sync;
- ibool checkpoint_sync;
- ibool do_checkpoint;
- ibool success;
-loop:
- sync = FALSE;
- checkpoint_sync = FALSE;
- do_checkpoint = FALSE;
-
- mutex_enter(&(log->mutex));
-
- if (log->check_flush_or_checkpoint == FALSE) {
- mutex_exit(&(log->mutex));
-
- return;
- }
-
- oldest_lsn = log_buf_pool_get_oldest_modification();
-
- age = ut_dulint_minus(log->lsn, oldest_lsn);
-
- if (age > log->max_modified_age_sync) {
-
- /* A flush is urgent: we have to do a synchronous preflush */
-
- sync = TRUE;
- advance = 2 * (age - log->max_modified_age_sync);
- } else if (age > log->max_modified_age_async) {
-
- /* A flush is not urgent: we do an asynchronous preflush */
- advance = age - log->max_modified_age_async;
- } else {
- advance = 0;
- }
-
- checkpoint_age = ut_dulint_minus(log->lsn, log->last_checkpoint_lsn);
-
- if (checkpoint_age > log->max_checkpoint_age) {
- /* A checkpoint is urgent: we do it synchronously */
-
- checkpoint_sync = TRUE;
-
- do_checkpoint = TRUE;
-
- } else if (checkpoint_age > log->max_checkpoint_age_async) {
- /* A checkpoint is not urgent: do it asynchronously */
-
- do_checkpoint = TRUE;
-
- log->check_flush_or_checkpoint = FALSE;
- } else {
- log->check_flush_or_checkpoint = FALSE;
- }
-
- mutex_exit(&(log->mutex));
-
- if (advance) {
- dulint new_oldest = ut_dulint_add(oldest_lsn, advance);
-
- success = log_preflush_pool_modified_pages(new_oldest, sync);
-
- /* If the flush succeeded, this thread has done its part
- and can proceed. If it did not succeed, there was another
- thread doing a flush at the same time. If sync was FALSE,
- the flush was not urgent, and we let this thread proceed.
- Otherwise, we let it start from the beginning again. */
-
- if (sync && !success) {
- mutex_enter(&(log->mutex));
-
- log->check_flush_or_checkpoint = TRUE;
-
- mutex_exit(&(log->mutex));
- goto loop;
- }
- }
-
- if (do_checkpoint) {
- log_checkpoint(checkpoint_sync, FALSE);
-
- if (checkpoint_sync) {
-
- goto loop;
- }
- }
-}
-
-/**********************************************************
-Reads a specified log segment to a buffer. */
-
-void
-log_group_read_log_seg(
-/*===================*/
- ulint type, /* in: LOG_ARCHIVE or LOG_RECOVER */
- byte* buf, /* in: buffer where to read */
- log_group_t* group, /* in: log group */
- dulint start_lsn, /* in: read area start */
- dulint end_lsn) /* in: read area end */
-{
- ulint len;
- ulint source_offset;
- ibool sync;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- sync = FALSE;
-
- if (type == LOG_RECOVER) {
- sync = TRUE;
- }
-loop:
- source_offset = log_group_calc_lsn_offset(start_lsn, group);
-
- len = ut_dulint_minus(end_lsn, start_lsn);
-
- ut_ad(len != 0);
-
- if ((source_offset % group->file_size) + len > group->file_size) {
-
- len = group->file_size - (source_offset % group->file_size);
- }
-
-#ifdef UNIV_LOG_ARCHIVE
- if (type == LOG_ARCHIVE) {
-
- log_sys->n_pending_archive_ios++;
- }
-#endif /* UNIV_LOG_ARCHIVE */
-
- log_sys->n_log_ios++;
-
- fil_io(OS_FILE_READ | OS_FILE_LOG, sync, group->space_id,
- source_offset / UNIV_PAGE_SIZE, source_offset % UNIV_PAGE_SIZE,
- len, buf, NULL);
-
- start_lsn = ut_dulint_add(start_lsn, len);
- buf += len;
-
- if (ut_dulint_cmp(start_lsn, end_lsn) != 0) {
-
- goto loop;
- }
-}
-
-#ifdef UNIV_LOG_ARCHIVE
-/**********************************************************
-Generates an archived log file name. */
-
-void
-log_archived_file_name_gen(
-/*=======================*/
- char* buf, /* in: buffer where to write */
- ulint id __attribute__((unused)),
- /* in: group id;
- currently we only archive the first group */
- ulint file_no)/* in: file number */
-{
- sprintf(buf, "%sib_arch_log_%010lu", srv_arch_dir, (ulong) file_no);
-}
-
-/**********************************************************
-Writes a log file header to a log file space. */
-static
-void
-log_group_archive_file_header_write(
-/*================================*/
- log_group_t* group, /* in: log group */
- ulint nth_file, /* in: header to the nth file in the
- archive log file space */
- ulint file_no, /* in: archived file number */
- dulint start_lsn) /* in: log file data starts at this
- lsn */
-{
- byte* buf;
- ulint dest_offset;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- ut_a(nth_file < group->n_files);
-
- buf = *(group->archive_file_header_bufs + nth_file);
-
- mach_write_to_4(buf + LOG_GROUP_ID, group->id);
- mach_write_to_8(buf + LOG_FILE_START_LSN, start_lsn);
- mach_write_to_4(buf + LOG_FILE_NO, file_no);
-
- mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, FALSE);
-
- dest_offset = nth_file * group->file_size;
-
- log_sys->n_log_ios++;
-
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->archive_space_id,
- dest_offset / UNIV_PAGE_SIZE,
- dest_offset % UNIV_PAGE_SIZE,
- 2 * OS_FILE_LOG_BLOCK_SIZE,
- buf, &log_archive_io);
-}
-
-/**********************************************************
-Writes a log file header to a completed archived log file. */
-static
-void
-log_group_archive_completed_header_write(
-/*=====================================*/
- log_group_t* group, /* in: log group */
- ulint nth_file, /* in: header to the nth file in the
- archive log file space */
- dulint end_lsn) /* in: end lsn of the file */
-{
- byte* buf;
- ulint dest_offset;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
- ut_a(nth_file < group->n_files);
-
- buf = *(group->archive_file_header_bufs + nth_file);
-
- mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, TRUE);
- mach_write_to_8(buf + LOG_FILE_END_LSN, end_lsn);
-
- dest_offset = nth_file * group->file_size + LOG_FILE_ARCH_COMPLETED;
-
- log_sys->n_log_ios++;
-
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->archive_space_id,
- dest_offset / UNIV_PAGE_SIZE,
- dest_offset % UNIV_PAGE_SIZE,
- OS_FILE_LOG_BLOCK_SIZE,
- buf + LOG_FILE_ARCH_COMPLETED,
- &log_archive_io);
-}
-
-/**********************************************************
-Does the archive writes for a single log group. */
-static
-void
-log_group_archive(
-/*==============*/
- log_group_t* group) /* in: log group */
-{
- os_file_t file_handle;
- dulint start_lsn;
- dulint end_lsn;
- char name[1024];
- byte* buf;
- ulint len;
- ibool ret;
- ulint next_offset;
- ulint n_files;
- ulint open_mode;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- start_lsn = log_sys->archived_lsn;
-
- ut_a(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
-
- end_lsn = log_sys->next_archived_lsn;
-
- ut_a(ut_dulint_get_low(end_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
-
- buf = log_sys->archive_buf;
-
- n_files = 0;
-
- next_offset = group->archived_offset;
-loop:
- if ((next_offset % group->file_size == 0)
- || (fil_space_get_size(group->archive_space_id) == 0)) {
-
- /* Add the file to the archive file space; create or open the
- file */
-
- if (next_offset % group->file_size == 0) {
- open_mode = OS_FILE_CREATE;
- } else {
- open_mode = OS_FILE_OPEN;
- }
-
- log_archived_file_name_gen(name, group->id,
- group->archived_file_no + n_files);
-
- file_handle = os_file_create(name, open_mode, OS_FILE_AIO,
- OS_DATA_FILE, &ret);
-
- if (!ret && (open_mode == OS_FILE_CREATE)) {
- file_handle = os_file_create(
- name, OS_FILE_OPEN, OS_FILE_AIO,
- OS_DATA_FILE, &ret);
- }
-
- if (!ret) {
- fprintf(stderr,
- "InnoDB: Cannot create or open"
- " archive log file %s.\n"
- "InnoDB: Cannot continue operation.\n"
- "InnoDB: Check that the log archive"
- " directory exists,\n"
- "InnoDB: you have access rights to it, and\n"
- "InnoDB: there is space available.\n", name);
- exit(1);
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr, "Created archive file %s\n", name);
- }
-#endif /* UNIV_DEBUG */
-
- ret = os_file_close(file_handle);
-
- ut_a(ret);
-
- /* Add the archive file as a node to the space */
-
- fil_node_create(name, group->file_size / UNIV_PAGE_SIZE,
- group->archive_space_id, FALSE);
-
- if (next_offset % group->file_size == 0) {
- log_group_archive_file_header_write(
- group, n_files,
- group->archived_file_no + n_files,
- start_lsn);
-
- next_offset += LOG_FILE_HDR_SIZE;
- }
- }
-
- len = ut_dulint_minus(end_lsn, start_lsn);
-
- if (group->file_size < (next_offset % group->file_size) + len) {
-
- len = group->file_size - (next_offset % group->file_size);
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "Archiving starting at lsn %lu %lu, len %lu"
- " to group %lu\n",
- (ulong) ut_dulint_get_high(start_lsn),
- (ulong) ut_dulint_get_low(start_lsn),
- (ulong) len, (ulong) group->id);
- }
-#endif /* UNIV_DEBUG */
-
- log_sys->n_pending_archive_ios++;
-
- log_sys->n_log_ios++;
-
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, FALSE, group->archive_space_id,
- next_offset / UNIV_PAGE_SIZE, next_offset % UNIV_PAGE_SIZE,
- ut_calc_align(len, OS_FILE_LOG_BLOCK_SIZE), buf,
- &log_archive_io);
-
- start_lsn = ut_dulint_add(start_lsn, len);
- next_offset += len;
- buf += len;
-
- if (next_offset % group->file_size == 0) {
- n_files++;
- }
-
- if (ut_dulint_cmp(end_lsn, start_lsn) != 0) {
-
- goto loop;
- }
-
- group->next_archived_file_no = group->archived_file_no + n_files;
- group->next_archived_offset = next_offset % group->file_size;
-
- ut_a(group->next_archived_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
-}
-
-/*********************************************************
-(Writes to the archive of each log group.) Currently, only the first
-group is archived. */
-static
-void
-log_archive_groups(void)
-/*====================*/
-{
- log_group_t* group;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- log_group_archive(group);
-}
-
-/*********************************************************
-Completes the archiving write phase for (each log group), currently,
-the first log group. */
-static
-void
-log_archive_write_complete_groups(void)
-/*===================================*/
-{
- log_group_t* group;
- ulint end_offset;
- ulint trunc_files;
- ulint n_files;
- dulint start_lsn;
- dulint end_lsn;
- ulint i;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- group->archived_file_no = group->next_archived_file_no;
- group->archived_offset = group->next_archived_offset;
-
- /* Truncate from the archive file space all but the last
- file, or if it has been written full, all files */
-
- n_files = (UNIV_PAGE_SIZE
- * fil_space_get_size(group->archive_space_id))
- / group->file_size;
- ut_ad(n_files > 0);
-
- end_offset = group->archived_offset;
-
- if (end_offset % group->file_size == 0) {
-
- trunc_files = n_files;
- } else {
- trunc_files = n_files - 1;
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes && trunc_files) {
- fprintf(stderr,
- "Complete file(s) archived to group %lu\n",
- (ulong) group->id);
- }
-#endif /* UNIV_DEBUG */
-
- /* Calculate the archive file space start lsn */
- start_lsn = ut_dulint_subtract(
- log_sys->next_archived_lsn,
- end_offset - LOG_FILE_HDR_SIZE + trunc_files
- * (group->file_size - LOG_FILE_HDR_SIZE));
- end_lsn = start_lsn;
-
- for (i = 0; i < trunc_files; i++) {
-
- end_lsn = ut_dulint_add(end_lsn,
- group->file_size - LOG_FILE_HDR_SIZE);
-
- /* Write a notice to the headers of archived log
- files that the file write has been completed */
-
- log_group_archive_completed_header_write(group, i, end_lsn);
- }
-
- fil_space_truncate_start(group->archive_space_id,
- trunc_files * group->file_size);
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fputs("Archiving writes completed\n", stderr);
- }
-#endif /* UNIV_DEBUG */
-}
-
-/**********************************************************
-Completes an archiving i/o. */
-static
-void
-log_archive_check_completion_low(void)
-/*==================================*/
-{
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- if (log_sys->n_pending_archive_ios == 0
- && log_sys->archiving_phase == LOG_ARCHIVE_READ) {
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fputs("Archiving read completed\n", stderr);
- }
-#endif /* UNIV_DEBUG */
-
- /* Archive buffer has now been read in: start archive writes */
-
- log_sys->archiving_phase = LOG_ARCHIVE_WRITE;
-
- log_archive_groups();
- }
-
- if (log_sys->n_pending_archive_ios == 0
- && log_sys->archiving_phase == LOG_ARCHIVE_WRITE) {
-
- log_archive_write_complete_groups();
-
- log_sys->archived_lsn = log_sys->next_archived_lsn;
-
- rw_lock_x_unlock_gen(&(log_sys->archive_lock), LOG_ARCHIVE);
- }
-}
-
-/**********************************************************
-Completes an archiving i/o. */
-static
-void
-log_io_complete_archive(void)
-/*=========================*/
-{
- log_group_t* group;
-
- mutex_enter(&(log_sys->mutex));
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- mutex_exit(&(log_sys->mutex));
-
- fil_flush(group->archive_space_id);
-
- mutex_enter(&(log_sys->mutex));
-
- ut_ad(log_sys->n_pending_archive_ios > 0);
-
- log_sys->n_pending_archive_ios--;
-
- log_archive_check_completion_low();
-
- mutex_exit(&(log_sys->mutex));
-}
-
-/************************************************************************
-Starts an archiving operation. */
-
-ibool
-log_archive_do(
-/*===========*/
- /* out: TRUE if succeed, FALSE if an archiving
- operation was already running */
- ibool sync, /* in: TRUE if synchronous operation is desired */
- ulint* n_bytes)/* out: archive log buffer size, 0 if nothing to
- archive */
-{
- ibool calc_new_limit;
- dulint start_lsn;
- dulint limit_lsn;
-
- calc_new_limit = TRUE;
-loop:
- mutex_enter(&(log_sys->mutex));
-
- if (log_sys->archiving_state == LOG_ARCH_OFF) {
- mutex_exit(&(log_sys->mutex));
-
- *n_bytes = 0;
-
- return(TRUE);
-
- } else if (log_sys->archiving_state == LOG_ARCH_STOPPED
- || log_sys->archiving_state == LOG_ARCH_STOPPING2) {
-
- mutex_exit(&(log_sys->mutex));
-
- os_event_wait(log_sys->archiving_on);
-
- mutex_enter(&(log_sys->mutex));
-
- goto loop;
- }
-
- start_lsn = log_sys->archived_lsn;
-
- if (calc_new_limit) {
- ut_a(log_sys->archive_buf_size % OS_FILE_LOG_BLOCK_SIZE == 0);
- limit_lsn = ut_dulint_add(start_lsn,
- log_sys->archive_buf_size);
-
- *n_bytes = log_sys->archive_buf_size;
-
- if (ut_dulint_cmp(limit_lsn, log_sys->lsn) >= 0) {
-
- limit_lsn = ut_dulint_align_down(
- log_sys->lsn, OS_FILE_LOG_BLOCK_SIZE);
- }
- }
-
- if (ut_dulint_cmp(log_sys->archived_lsn, limit_lsn) >= 0) {
-
- mutex_exit(&(log_sys->mutex));
-
- *n_bytes = 0;
-
- return(TRUE);
- }
-
- if (ut_dulint_cmp(log_sys->written_to_all_lsn, limit_lsn) < 0) {
-
- mutex_exit(&(log_sys->mutex));
-
- log_write_up_to(limit_lsn, LOG_WAIT_ALL_GROUPS, TRUE);
-
- calc_new_limit = FALSE;
-
- goto loop;
- }
-
- if (log_sys->n_pending_archive_ios > 0) {
- /* An archiving operation is running */
-
- mutex_exit(&(log_sys->mutex));
-
- if (sync) {
- rw_lock_s_lock(&(log_sys->archive_lock));
- rw_lock_s_unlock(&(log_sys->archive_lock));
- }
-
- *n_bytes = log_sys->archive_buf_size;
-
- return(FALSE);
- }
-
- rw_lock_x_lock_gen(&(log_sys->archive_lock), LOG_ARCHIVE);
-
- log_sys->archiving_phase = LOG_ARCHIVE_READ;
-
- log_sys->next_archived_lsn = limit_lsn;
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "Archiving from lsn %lu %lu to lsn %lu %lu\n",
- (ulong) ut_dulint_get_high(log_sys->archived_lsn),
- (ulong) ut_dulint_get_low(log_sys->archived_lsn),
- (ulong) ut_dulint_get_high(limit_lsn),
- (ulong) ut_dulint_get_low(limit_lsn));
- }
-#endif /* UNIV_DEBUG */
-
- /* Read the log segment to the archive buffer */
-
- log_group_read_log_seg(LOG_ARCHIVE, log_sys->archive_buf,
- UT_LIST_GET_FIRST(log_sys->log_groups),
- start_lsn, limit_lsn);
-
- mutex_exit(&(log_sys->mutex));
-
- if (sync) {
- rw_lock_s_lock(&(log_sys->archive_lock));
- rw_lock_s_unlock(&(log_sys->archive_lock));
- }
-
- *n_bytes = log_sys->archive_buf_size;
-
- return(TRUE);
-}
-
-/********************************************************************
-Writes the log contents to the archive at least up to the lsn when this
-function was called. */
-static
-void
-log_archive_all(void)
-/*=================*/
-{
- dulint present_lsn;
- ulint dummy;
-
- mutex_enter(&(log_sys->mutex));
-
- if (log_sys->archiving_state == LOG_ARCH_OFF) {
- mutex_exit(&(log_sys->mutex));
-
- return;
- }
-
- present_lsn = log_sys->lsn;
-
- mutex_exit(&(log_sys->mutex));
-
- log_pad_current_log_block();
-
- for (;;) {
- mutex_enter(&(log_sys->mutex));
-
- if (ut_dulint_cmp(present_lsn, log_sys->archived_lsn) <= 0) {
-
- mutex_exit(&(log_sys->mutex));
-
- return;
- }
-
- mutex_exit(&(log_sys->mutex));
-
- log_archive_do(TRUE, &dummy);
- }
-}
-
-/*********************************************************
-Closes the possible open archive log file (for each group) the first group,
-and if it was open, increments the group file count by 2, if desired. */
-static
-void
-log_archive_close_groups(
-/*=====================*/
- ibool increment_file_count) /* in: TRUE if we want to increment
- the file count */
-{
- log_group_t* group;
- ulint trunc_len;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- if (log_sys->archiving_state == LOG_ARCH_OFF) {
-
- return;
- }
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- trunc_len = UNIV_PAGE_SIZE
- * fil_space_get_size(group->archive_space_id);
- if (trunc_len > 0) {
- ut_a(trunc_len == group->file_size);
-
- /* Write a notice to the headers of archived log
- files that the file write has been completed */
-
- log_group_archive_completed_header_write(
- group, 0, log_sys->archived_lsn);
-
- fil_space_truncate_start(group->archive_space_id,
- trunc_len);
- if (increment_file_count) {
- group->archived_offset = 0;
- group->archived_file_no += 2;
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "Incrementing arch file no to %lu"
- " in log group %lu\n",
- (ulong) group->archived_file_no + 2,
- (ulong) group->id);
- }
-#endif /* UNIV_DEBUG */
- }
-}
-
-/********************************************************************
-Writes the log contents to the archive up to the lsn when this function was
-called, and stops the archiving. When archiving is started again, the archived
-log file numbers start from 2 higher, so that the archiving will not write
-again to the archived log files which exist when this function returns. */
-
-ulint
-log_archive_stop(void)
-/*==================*/
- /* out: DB_SUCCESS or DB_ERROR */
-{
- ibool success;
-
- mutex_enter(&(log_sys->mutex));
-
- if (log_sys->archiving_state != LOG_ARCH_ON) {
-
- mutex_exit(&(log_sys->mutex));
-
- return(DB_ERROR);
- }
-
- log_sys->archiving_state = LOG_ARCH_STOPPING;
-
- mutex_exit(&(log_sys->mutex));
-
- log_archive_all();
-
- mutex_enter(&(log_sys->mutex));
-
- log_sys->archiving_state = LOG_ARCH_STOPPING2;
- os_event_reset(log_sys->archiving_on);
-
- mutex_exit(&(log_sys->mutex));
-
- /* Wait for a possible archiving operation to end */
-
- rw_lock_s_lock(&(log_sys->archive_lock));
- rw_lock_s_unlock(&(log_sys->archive_lock));
-
- mutex_enter(&(log_sys->mutex));
-
- /* Close all archived log files, incrementing the file count by 2,
- if appropriate */
-
- log_archive_close_groups(TRUE);
-
- mutex_exit(&(log_sys->mutex));
-
- /* Make a checkpoint, so that if recovery is needed, the file numbers
- of new archived log files will start from the right value */
-
- success = FALSE;
-
- while (!success) {
- success = log_checkpoint(TRUE, TRUE);
- }
-
- mutex_enter(&(log_sys->mutex));
-
- log_sys->archiving_state = LOG_ARCH_STOPPED;
-
- mutex_exit(&(log_sys->mutex));
-
- return(DB_SUCCESS);
-}
-
-/********************************************************************
-Starts again archiving which has been stopped. */
-
-ulint
-log_archive_start(void)
-/*===================*/
- /* out: DB_SUCCESS or DB_ERROR */
-{
- mutex_enter(&(log_sys->mutex));
-
- if (log_sys->archiving_state != LOG_ARCH_STOPPED) {
-
- mutex_exit(&(log_sys->mutex));
-
- return(DB_ERROR);
- }
-
- log_sys->archiving_state = LOG_ARCH_ON;
-
- os_event_set(log_sys->archiving_on);
-
- mutex_exit(&(log_sys->mutex));
-
- return(DB_SUCCESS);
-}
-
-/********************************************************************
-Stop archiving the log so that a gap may occur in the archived log files. */
-
-ulint
-log_archive_noarchivelog(void)
-/*==========================*/
- /* out: DB_SUCCESS or DB_ERROR */
-{
-loop:
- mutex_enter(&(log_sys->mutex));
-
- if (log_sys->archiving_state == LOG_ARCH_STOPPED
- || log_sys->archiving_state == LOG_ARCH_OFF) {
-
- log_sys->archiving_state = LOG_ARCH_OFF;
-
- os_event_set(log_sys->archiving_on);
-
- mutex_exit(&(log_sys->mutex));
-
- return(DB_SUCCESS);
- }
-
- mutex_exit(&(log_sys->mutex));
-
- log_archive_stop();
-
- os_thread_sleep(500000);
-
- goto loop;
-}
-
-/********************************************************************
-Start archiving the log so that a gap may occur in the archived log files. */
-
-ulint
-log_archive_archivelog(void)
-/*========================*/
- /* out: DB_SUCCESS or DB_ERROR */
-{
- mutex_enter(&(log_sys->mutex));
-
- if (log_sys->archiving_state == LOG_ARCH_OFF) {
-
- log_sys->archiving_state = LOG_ARCH_ON;
-
- log_sys->archived_lsn
- = ut_dulint_align_down(log_sys->lsn,
- OS_FILE_LOG_BLOCK_SIZE);
- mutex_exit(&(log_sys->mutex));
-
- return(DB_SUCCESS);
- }
-
- mutex_exit(&(log_sys->mutex));
-
- return(DB_ERROR);
-}
-
-/********************************************************************
-Tries to establish a big enough margin of free space in the log groups, such
-that a new log entry can be catenated without an immediate need for
-archiving. */
-static
-void
-log_archive_margin(void)
-/*====================*/
-{
- log_t* log = log_sys;
- ulint age;
- ibool sync;
- ulint dummy;
-loop:
- mutex_enter(&(log->mutex));
-
- if (log->archiving_state == LOG_ARCH_OFF) {
- mutex_exit(&(log->mutex));
-
- return;
- }
-
- age = ut_dulint_minus(log->lsn, log->archived_lsn);
-
- if (age > log->max_archived_lsn_age) {
-
- /* An archiving is urgent: we have to do synchronous i/o */
-
- sync = TRUE;
-
- } else if (age > log->max_archived_lsn_age_async) {
-
- /* An archiving is not urgent: we do asynchronous i/o */
-
- sync = FALSE;
- } else {
- /* No archiving required yet */
-
- mutex_exit(&(log->mutex));
-
- return;
- }
-
- mutex_exit(&(log->mutex));
-
- log_archive_do(sync, &dummy);
-
- if (sync == TRUE) {
- /* Check again that enough was written to the archive */
-
- goto loop;
- }
-}
-#endif /* UNIV_LOG_ARCHIVE */
-
-/************************************************************************
-Checks that there is enough free space in the log to start a new query step.
-Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this
-function may only be called if the calling thread owns no synchronization
-objects! */
-
-void
-log_check_margins(void)
-/*===================*/
-{
-loop:
- log_flush_margin();
-
- log_checkpoint_margin();
-
-#ifdef UNIV_LOG_ARCHIVE
- log_archive_margin();
-#endif /* UNIV_LOG_ARCHIVE */
-
- mutex_enter(&(log_sys->mutex));
-
- if (log_sys->check_flush_or_checkpoint) {
-
- mutex_exit(&(log_sys->mutex));
-
- goto loop;
- }
-
- mutex_exit(&(log_sys->mutex));
-}
-
-/********************************************************************
-Makes a checkpoint at the latest lsn and writes it to first page of each
-data file in the database, so that we know that the file spaces contain
-all modifications up to that lsn. This can only be called at database
-shutdown. This function also writes all log in log files to the log archive. */
-
-void
-logs_empty_and_mark_files_at_shutdown(void)
-/*=======================================*/
-{
- dulint lsn;
- ulint arch_log_no;
-
- if (srv_print_verbose_log) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Starting shutdown...\n");
- }
- /* Wait until the master thread and all other operations are idle: our
- algorithm only works if the server is idle at shutdown */
-
- srv_shutdown_state = SRV_SHUTDOWN_CLEANUP;
-loop:
- os_thread_sleep(100000);
-
- mutex_enter(&kernel_mutex);
-
- /* We need the monitor threads to stop before we proceed with a
- normal shutdown. In case of very fast shutdown, however, we can
- proceed without waiting for monitor threads. */
-
- if (srv_fast_shutdown < 2
- && (srv_error_monitor_active
- || srv_lock_timeout_and_monitor_active)) {
-
- mutex_exit(&kernel_mutex);
-
- goto loop;
- }
-
- /* Check that there are no longer transactions. We need this wait even
- for the 'very fast' shutdown, because the InnoDB layer may have
- committed or prepared transactions and we don't want to lose them. */
-
- if (trx_n_mysql_transactions > 0
- || UT_LIST_GET_LEN(trx_sys->trx_list) > 0) {
-
- mutex_exit(&kernel_mutex);
-
- goto loop;
- }
-
- if (srv_fast_shutdown == 2) {
- /* In this fastest shutdown we do not flush the buffer pool:
- it is essentially a 'crash' of the InnoDB server. Make sure
- that the log is all flushed to disk, so that we can recover
- all committed transactions in a crash recovery. We must not
- write the lsn stamps to the data files, since at a startup
- InnoDB deduces from the stamps if the previous shutdown was
- clean. */
-
- log_buffer_flush_to_disk();
-
- return; /* We SKIP ALL THE REST !! */
- }
-
- /* Check that the master thread is suspended */
-
- if (srv_n_threads_active[SRV_MASTER] != 0) {
-
- mutex_exit(&kernel_mutex);
-
- goto loop;
- }
-
- mutex_exit(&kernel_mutex);
-
- mutex_enter(&(log_sys->mutex));
-
- if (log_sys->n_pending_checkpoint_writes
-#ifdef UNIV_LOG_ARCHIVE
- || log_sys->n_pending_archive_ios
-#endif /* UNIV_LOG_ARCHIVE */
- || log_sys->n_pending_writes) {
-
- mutex_exit(&(log_sys->mutex));
-
- goto loop;
- }
-
- mutex_exit(&(log_sys->mutex));
-
- if (!buf_pool_check_no_pending_io()) {
-
- goto loop;
- }
-
-#ifdef UNIV_LOG_ARCHIVE
- log_archive_all();
-#endif /* UNIV_LOG_ARCHIVE */
-
- log_make_checkpoint_at(ut_dulint_max, TRUE);
-
- mutex_enter(&(log_sys->mutex));
-
- lsn = log_sys->lsn;
-
- if ((ut_dulint_cmp(lsn, log_sys->last_checkpoint_lsn) != 0)
-#ifdef UNIV_LOG_ARCHIVE
- || (srv_log_archive_on
- && ut_dulint_cmp(lsn,
- ut_dulint_add(log_sys->archived_lsn,
- LOG_BLOCK_HDR_SIZE))
- != 0)
-#endif /* UNIV_LOG_ARCHIVE */
- ) {
-
- mutex_exit(&(log_sys->mutex));
-
- goto loop;
- }
-
- arch_log_no = 0;
-
-#ifdef UNIV_LOG_ARCHIVE
- UT_LIST_GET_FIRST(log_sys->log_groups)->archived_file_no;
-
- if (0 == UT_LIST_GET_FIRST(log_sys->log_groups)->archived_offset) {
-
- arch_log_no--;
- }
-
- log_archive_close_groups(TRUE);
-#endif /* UNIV_LOG_ARCHIVE */
-
- mutex_exit(&(log_sys->mutex));
-
- mutex_enter(&kernel_mutex);
- /* Check that the master thread has stayed suspended */
- if (srv_n_threads_active[SRV_MASTER] != 0) {
- fprintf(stderr,
- "InnoDB: Warning: the master thread woke up"
- " during shutdown\n");
-
- mutex_exit(&kernel_mutex);
-
- goto loop;
- }
- mutex_exit(&kernel_mutex);
-
- fil_flush_file_spaces(FIL_TABLESPACE);
- fil_flush_file_spaces(FIL_LOG);
-
- /* The call fil_write_flushed_lsn_to_data_files() will pass the buffer
- pool: therefore it is essential that the buffer pool has been
- completely flushed to disk! (We do not call fil_write... if the
- 'very fast' shutdown is enabled.) */
-
- if (!buf_all_freed()) {
-
- goto loop;
- }
-
- srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
-
- /* Make some checks that the server really is quiet */
- ut_a(srv_n_threads_active[SRV_MASTER] == 0);
- ut_a(buf_all_freed());
- ut_a(0 == ut_dulint_cmp(lsn, log_sys->lsn));
-
- if (ut_dulint_cmp(lsn, srv_start_lsn) < 0) {
- fprintf(stderr,
- "InnoDB: Error: log sequence number"
- " at shutdown %lu %lu\n"
- "InnoDB: is lower than at startup %lu %lu!\n",
- (ulong) ut_dulint_get_high(lsn),
- (ulong) ut_dulint_get_low(lsn),
- (ulong) ut_dulint_get_high(srv_start_lsn),
- (ulong) ut_dulint_get_low(srv_start_lsn));
- }
-
- srv_shutdown_lsn = lsn;
-
- fil_write_flushed_lsn_to_data_files(lsn, arch_log_no);
-
- fil_flush_file_spaces(FIL_TABLESPACE);
-
- fil_close_all_files();
-
- /* Make some checks that the server really is quiet */
- ut_a(srv_n_threads_active[SRV_MASTER] == 0);
- ut_a(buf_all_freed());
- ut_a(0 == ut_dulint_cmp(lsn, log_sys->lsn));
-}
-
-/**********************************************************
-Checks by parsing that the catenated log segment for a single mtr is
-consistent. */
-
-ibool
-log_check_log_recs(
-/*===============*/
- byte* buf, /* in: pointer to the start of the log segment
- in the log_sys->buf log buffer */
- ulint len, /* in: segment length in bytes */
- dulint buf_start_lsn) /* in: buffer start lsn */
-{
- dulint contiguous_lsn;
- dulint scanned_lsn;
- byte* start;
- byte* end;
- byte* buf1;
- byte* scan_buf;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- if (len == 0) {
-
- return(TRUE);
- }
-
- start = ut_align_down(buf, OS_FILE_LOG_BLOCK_SIZE);
- end = ut_align(buf + len, OS_FILE_LOG_BLOCK_SIZE);
-
- buf1 = mem_alloc((end - start) + OS_FILE_LOG_BLOCK_SIZE);
- scan_buf = ut_align(buf1, OS_FILE_LOG_BLOCK_SIZE);
-
- ut_memcpy(scan_buf, start, end - start);
-
- recv_scan_log_recs(TRUE,
- (buf_pool->n_frames
- - recv_n_pool_free_frames) * UNIV_PAGE_SIZE,
- FALSE, scan_buf, end - start,
- ut_dulint_align_down(buf_start_lsn,
- OS_FILE_LOG_BLOCK_SIZE),
- &contiguous_lsn, &scanned_lsn);
-
- ut_a(ut_dulint_cmp(scanned_lsn, ut_dulint_add(buf_start_lsn, len))
- == 0);
- ut_a(ut_dulint_cmp(recv_sys->recovered_lsn, scanned_lsn) == 0);
-
- mem_free(buf1);
-
- return(TRUE);
-}
-
-/**********************************************************
-Peeks the current lsn. */
-
-ibool
-log_peek_lsn(
-/*=========*/
- /* out: TRUE if success, FALSE if could not get the
- log system mutex */
- dulint* lsn) /* out: if returns TRUE, current lsn is here */
-{
- if (0 == mutex_enter_nowait(&(log_sys->mutex))) {
- *lsn = log_sys->lsn;
-
- mutex_exit(&(log_sys->mutex));
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/**********************************************************
-Prints info of the log. */
-
-void
-log_print(
-/*======*/
- FILE* file) /* in: file where to print */
-{
- double time_elapsed;
- time_t current_time;
-
- mutex_enter(&(log_sys->mutex));
-
- fprintf(file,
- "Log sequence number %lu %lu\n"
- "Log flushed up to %lu %lu\n"
- "Last checkpoint at %lu %lu\n",
- (ulong) ut_dulint_get_high(log_sys->lsn),
- (ulong) ut_dulint_get_low(log_sys->lsn),
- (ulong) ut_dulint_get_high(log_sys->flushed_to_disk_lsn),
- (ulong) ut_dulint_get_low(log_sys->flushed_to_disk_lsn),
- (ulong) ut_dulint_get_high(log_sys->last_checkpoint_lsn),
- (ulong) ut_dulint_get_low(log_sys->last_checkpoint_lsn));
-
- current_time = time(NULL);
-
- time_elapsed = 0.001 + difftime(current_time,
- log_sys->last_printout_time);
- fprintf(file,
- "%lu pending log writes, %lu pending chkp writes\n"
- "%lu log i/o's done, %.2f log i/o's/second\n",
- (ulong) log_sys->n_pending_writes,
- (ulong) log_sys->n_pending_checkpoint_writes,
- (ulong) log_sys->n_log_ios,
- ((log_sys->n_log_ios - log_sys->n_log_ios_old)
- / time_elapsed));
-
- log_sys->n_log_ios_old = log_sys->n_log_ios;
- log_sys->last_printout_time = current_time;
-
- mutex_exit(&(log_sys->mutex));
-}
-
-/**************************************************************************
-Refreshes the statistics used to print per-second averages. */
-
-void
-log_refresh_stats(void)
-/*===================*/
-{
- log_sys->n_log_ios_old = log_sys->n_log_ios;
- log_sys->last_printout_time = time(NULL);
-}
diff --git a/storage/innobase/log/log0recv.c b/storage/innobase/log/log0recv.c
deleted file mode 100644
index aef58b7b576..00000000000
--- a/storage/innobase/log/log0recv.c
+++ /dev/null
@@ -1,3398 +0,0 @@
-/******************************************************
-Recovery
-
-(c) 1997 Innobase Oy
-
-Created 9/20/1997 Heikki Tuuri
-*******************************************************/
-
-#include "log0recv.h"
-
-#ifdef UNIV_NONINL
-#include "log0recv.ic"
-#endif
-
-#include "mem0mem.h"
-#include "buf0buf.h"
-#include "buf0flu.h"
-#include "buf0rea.h"
-#include "srv0srv.h"
-#include "srv0start.h"
-#include "mtr0mtr.h"
-#include "mtr0log.h"
-#include "page0page.h"
-#include "page0cur.h"
-#include "btr0btr.h"
-#include "btr0cur.h"
-#include "ibuf0ibuf.h"
-#include "trx0undo.h"
-#include "trx0rec.h"
-#include "trx0roll.h"
-#include "btr0cur.h"
-#include "btr0cur.h"
-#include "btr0cur.h"
-#include "dict0boot.h"
-#include "fil0fil.h"
-#include "sync0sync.h"
-
-#ifdef UNIV_HOTBACKUP
-/* This is set to FALSE if the backup was originally taken with the
-ibbackup --include regexp option: then we do not want to create tables in
-directories which were not included */
-ibool recv_replay_file_ops = TRUE;
-#endif /* UNIV_HOTBACKUP */
-
-/* Log records are stored in the hash table in chunks at most of this size;
-this must be less than UNIV_PAGE_SIZE as it is stored in the buffer pool */
-#define RECV_DATA_BLOCK_SIZE (MEM_MAX_ALLOC_IN_BUF - sizeof(recv_data_t))
-
-/* Read-ahead area in applying log records to file pages */
-#define RECV_READ_AHEAD_AREA 32
-
-recv_sys_t* recv_sys = NULL;
-ibool recv_recovery_on = FALSE;
-ibool recv_recovery_from_backup_on = FALSE;
-
-ibool recv_needed_recovery = FALSE;
-
-ibool recv_lsn_checks_on = FALSE;
-
-/* There are two conditions under which we scan the logs, the first
-is normal startup and the second is when we do a recovery from an
-archive.
-This flag is set if we are doing a scan from the last checkpoint during
-startup. If we find log entries that were written after the last checkpoint
-we know that the server was not cleanly shutdown. We must then initialize
-the crash recovery environment before attempting to store these entries in
-the log hash table. */
-ibool recv_log_scan_is_startup_type = FALSE;
-
-/* If the following is TRUE, the buffer pool file pages must be invalidated
-after recovery and no ibuf operations are allowed; this becomes TRUE if
-the log record hash table becomes too full, and log records must be merged
-to file pages already before the recovery is finished: in this case no
-ibuf operations are allowed, as they could modify the pages read in the
-buffer pool before the pages have been recovered to the up-to-date state */
-
-/* Recovery is running and no operations on the log files are allowed
-yet: the variable name is misleading */
-
-ibool recv_no_ibuf_operations = FALSE;
-
-/* The following counter is used to decide when to print info on
-log scan */
-ulint recv_scan_print_counter = 0;
-
-ibool recv_is_from_backup = FALSE;
-#ifdef UNIV_HOTBACKUP
-ibool recv_is_making_a_backup = FALSE;
-#else
-# define recv_is_making_a_backup FALSE
-#endif /* UNIV_HOTBACKUP */
-
-ulint recv_previous_parsed_rec_type = 999999;
-ulint recv_previous_parsed_rec_offset = 0;
-ulint recv_previous_parsed_rec_is_multi = 0;
-
-ulint recv_max_parsed_page_no = 0;
-
-/* This many frames must be left free in the buffer pool when we scan
-the log and store the scanned log records in the buffer pool: we will
-use these free frames to read in pages when we start applying the
-log records to the database. */
-
-ulint recv_n_pool_free_frames = 256;
-
-/* The maximum lsn we see for a page during the recovery process. If this
-is bigger than the lsn we are able to scan up to, that is an indication that
-the recovery failed and the database may be corrupt. */
-
-dulint recv_max_page_lsn;
-
-/* prototypes */
-
-/***********************************************************
-Initialize crash recovery environment. Can be called iff
-recv_needed_recovery == FALSE. */
-static
-void
-recv_init_crash_recovery(void);
-/*===========================*/
-
-/************************************************************
-Creates the recovery system. */
-
-void
-recv_sys_create(void)
-/*=================*/
-{
- if (recv_sys != NULL) {
-
- return;
- }
-
- recv_sys = mem_alloc(sizeof(recv_sys_t));
-
- mutex_create(&recv_sys->mutex, SYNC_RECV);
-
- recv_sys->heap = NULL;
- recv_sys->addr_hash = NULL;
-}
-
-/************************************************************
-Inits the recovery system for a recovery operation. */
-
-void
-recv_sys_init(
-/*==========*/
- ibool recover_from_backup, /* in: TRUE if this is called
- to recover from a hot backup */
- ulint available_memory) /* in: available memory in bytes */
-{
- if (recv_sys->heap != NULL) {
-
- return;
- }
-
- mutex_enter(&(recv_sys->mutex));
-
- if (!recover_from_backup) {
- recv_sys->heap = mem_heap_create_in_buffer(256);
- } else {
- recv_sys->heap = mem_heap_create(256);
- recv_is_from_backup = TRUE;
- }
-
- recv_sys->buf = ut_malloc(RECV_PARSING_BUF_SIZE);
- recv_sys->len = 0;
- recv_sys->recovered_offset = 0;
-
- recv_sys->addr_hash = hash_create(available_memory / 64);
- recv_sys->n_addrs = 0;
-
- recv_sys->apply_log_recs = FALSE;
- recv_sys->apply_batch_on = FALSE;
-
- recv_sys->last_block_buf_start = mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE);
-
- recv_sys->last_block = ut_align(recv_sys->last_block_buf_start,
- OS_FILE_LOG_BLOCK_SIZE);
- recv_sys->found_corrupt_log = FALSE;
-
- recv_max_page_lsn = ut_dulint_zero;
-
- mutex_exit(&(recv_sys->mutex));
-}
-
-/************************************************************
-Empties the hash table when it has been fully processed. */
-static
-void
-recv_sys_empty_hash(void)
-/*=====================*/
-{
- ut_ad(mutex_own(&(recv_sys->mutex)));
-
- if (recv_sys->n_addrs != 0) {
- fprintf(stderr,
- "InnoDB: Error: %lu pages with log records"
- " were left unprocessed!\n"
- "InnoDB: Maximum page number with"
- " log records on it %lu\n",
- (ulong) recv_sys->n_addrs,
- (ulong) recv_max_parsed_page_no);
- ut_error;
- }
-
- hash_table_free(recv_sys->addr_hash);
- mem_heap_empty(recv_sys->heap);
-
- recv_sys->addr_hash = hash_create(buf_pool_get_curr_size() / 256);
-}
-
-#ifndef UNIV_LOG_DEBUG
-/************************************************************
-Frees the recovery system. */
-static
-void
-recv_sys_free(void)
-/*===============*/
-{
- mutex_enter(&(recv_sys->mutex));
-
- hash_table_free(recv_sys->addr_hash);
- mem_heap_free(recv_sys->heap);
- ut_free(recv_sys->buf);
- mem_free(recv_sys->last_block_buf_start);
-
- recv_sys->addr_hash = NULL;
- recv_sys->heap = NULL;
-
- mutex_exit(&(recv_sys->mutex));
-}
-#endif /* UNIV_LOG_DEBUG */
-
-/************************************************************
-Truncates possible corrupted or extra records from a log group. */
-static
-void
-recv_truncate_group(
-/*================*/
- log_group_t* group, /* in: log group */
- dulint recovered_lsn, /* in: recovery succeeded up to this
- lsn */
- dulint limit_lsn, /* in: this was the limit for
- recovery */
- dulint checkpoint_lsn, /* in: recovery was started from this
- checkpoint */
- dulint archived_lsn) /* in: the log has been archived up to
- this lsn */
-{
- dulint start_lsn;
- dulint end_lsn;
- dulint finish_lsn1;
- dulint finish_lsn2;
- dulint finish_lsn;
- ulint len;
- ulint i;
-
- if (ut_dulint_cmp(archived_lsn, ut_dulint_max) == 0) {
- /* Checkpoint was taken in the NOARCHIVELOG mode */
- archived_lsn = checkpoint_lsn;
- }
-
- finish_lsn1 = ut_dulint_add(ut_dulint_align_down(
- archived_lsn,
- OS_FILE_LOG_BLOCK_SIZE),
- log_group_get_capacity(group));
-
- finish_lsn2 = ut_dulint_add(ut_dulint_align_up(
- recovered_lsn,
- OS_FILE_LOG_BLOCK_SIZE),
- recv_sys->last_log_buf_size);
-
- if (ut_dulint_cmp(limit_lsn, ut_dulint_max) != 0) {
- /* We do not know how far we should erase log records: erase
- as much as possible */
-
- finish_lsn = finish_lsn1;
- } else {
- /* It is enough to erase the length of the log buffer */
- finish_lsn = ut_dulint_get_min(finish_lsn1, finish_lsn2);
- }
-
- ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
-
- /* Write the log buffer full of zeros */
- for (i = 0; i < RECV_SCAN_SIZE; i++) {
-
- *(log_sys->buf + i) = '\0';
- }
-
- start_lsn = ut_dulint_align_down(recovered_lsn,
- OS_FILE_LOG_BLOCK_SIZE);
-
- if (ut_dulint_cmp(start_lsn, recovered_lsn) != 0) {
- /* Copy the last incomplete log block to the log buffer and
- edit its data length: */
-
- ut_memcpy(log_sys->buf, recv_sys->last_block,
- OS_FILE_LOG_BLOCK_SIZE);
- log_block_set_data_len(log_sys->buf, ut_dulint_minus(
- recovered_lsn, start_lsn));
- }
-
- if (ut_dulint_cmp(start_lsn, finish_lsn) >= 0) {
-
- return;
- }
-
- for (;;) {
- end_lsn = ut_dulint_add(start_lsn, RECV_SCAN_SIZE);
-
- if (ut_dulint_cmp(end_lsn, finish_lsn) > 0) {
-
- end_lsn = finish_lsn;
- }
-
- len = ut_dulint_minus(end_lsn, start_lsn);
-
- log_group_write_buf(group, log_sys->buf, len, start_lsn, 0);
- if (ut_dulint_cmp(end_lsn, finish_lsn) >= 0) {
-
- return;
- }
-
- /* Write the log buffer full of zeros */
- for (i = 0; i < RECV_SCAN_SIZE; i++) {
-
- *(log_sys->buf + i) = '\0';
- }
-
- start_lsn = end_lsn;
- }
-}
-
-/************************************************************
-Copies the log segment between group->recovered_lsn and recovered_lsn from the
-most up-to-date log group to group, so that it contains the latest log data. */
-static
-void
-recv_copy_group(
-/*============*/
- log_group_t* up_to_date_group, /* in: the most up-to-date log
- group */
- log_group_t* group, /* in: copy to this log
- group */
- dulint recovered_lsn) /* in: recovery succeeded up
- to this lsn */
-{
- dulint start_lsn;
- dulint end_lsn;
- ulint len;
-
- if (ut_dulint_cmp(group->scanned_lsn, recovered_lsn) >= 0) {
-
- return;
- }
-
- ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
-
- start_lsn = ut_dulint_align_down(group->scanned_lsn,
- OS_FILE_LOG_BLOCK_SIZE);
- for (;;) {
- end_lsn = ut_dulint_add(start_lsn, RECV_SCAN_SIZE);
-
- if (ut_dulint_cmp(end_lsn, recovered_lsn) > 0) {
- end_lsn = ut_dulint_align_up(recovered_lsn,
- OS_FILE_LOG_BLOCK_SIZE);
- }
-
- log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
- up_to_date_group, start_lsn, end_lsn);
-
- len = ut_dulint_minus(end_lsn, start_lsn);
-
- log_group_write_buf(group, log_sys->buf, len, start_lsn, 0);
-
- if (ut_dulint_cmp(end_lsn, recovered_lsn) >= 0) {
-
- return;
- }
-
- start_lsn = end_lsn;
- }
-}
-
-/************************************************************
-Copies a log segment from the most up-to-date log group to the other log
-groups, so that they all contain the latest log data. Also writes the info
-about the latest checkpoint to the groups, and inits the fields in the group
-memory structs to up-to-date values. */
-static
-void
-recv_synchronize_groups(
-/*====================*/
- log_group_t* up_to_date_group) /* in: the most up-to-date
- log group */
-{
- log_group_t* group;
- dulint start_lsn;
- dulint end_lsn;
- dulint recovered_lsn;
- dulint limit_lsn;
-
- recovered_lsn = recv_sys->recovered_lsn;
- limit_lsn = recv_sys->limit_lsn;
-
- /* Read the last recovered log block to the recovery system buffer:
- the block is always incomplete */
-
- start_lsn = ut_dulint_align_down(recovered_lsn,
- OS_FILE_LOG_BLOCK_SIZE);
- end_lsn = ut_dulint_align_up(recovered_lsn, OS_FILE_LOG_BLOCK_SIZE);
-
- ut_a(ut_dulint_cmp(start_lsn, end_lsn) != 0);
-
- log_group_read_log_seg(LOG_RECOVER, recv_sys->last_block,
- up_to_date_group, start_lsn, end_lsn);
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- while (group) {
- if (group != up_to_date_group) {
-
- /* Copy log data if needed */
-
- recv_copy_group(group, up_to_date_group,
- recovered_lsn);
- }
-
- /* Update the fields in the group struct to correspond to
- recovered_lsn */
-
- log_group_set_fields(group, recovered_lsn);
-
- group = UT_LIST_GET_NEXT(log_groups, group);
- }
-
- /* Copy the checkpoint info to the groups; remember that we have
- incremented checkpoint_no by one, and the info will not be written
- over the max checkpoint info, thus making the preservation of max
- checkpoint info on disk certain */
-
- log_groups_write_checkpoint_info();
-
- mutex_exit(&(log_sys->mutex));
-
- /* Wait for the checkpoint write to complete */
- rw_lock_s_lock(&(log_sys->checkpoint_lock));
- rw_lock_s_unlock(&(log_sys->checkpoint_lock));
-
- mutex_enter(&(log_sys->mutex));
-}
-
-/***************************************************************************
-Checks the consistency of the checkpoint info */
-static
-ibool
-recv_check_cp_is_consistent(
-/*========================*/
- /* out: TRUE if ok */
- byte* buf) /* in: buffer containing checkpoint info */
-{
- ulint fold;
-
- fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
-
- if ((fold & 0xFFFFFFFFUL) != mach_read_from_4(
- buf + LOG_CHECKPOINT_CHECKSUM_1)) {
- return(FALSE);
- }
-
- fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
- LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
-
- if ((fold & 0xFFFFFFFFUL) != mach_read_from_4(
- buf + LOG_CHECKPOINT_CHECKSUM_2)) {
- return(FALSE);
- }
-
- return(TRUE);
-}
-
-/************************************************************
-Looks for the maximum consistent checkpoint from the log groups. */
-static
-ulint
-recv_find_max_checkpoint(
-/*=====================*/
- /* out: error code or DB_SUCCESS */
- log_group_t** max_group, /* out: max group */
- ulint* max_field) /* out: LOG_CHECKPOINT_1 or
- LOG_CHECKPOINT_2 */
-{
- log_group_t* group;
- dulint max_no;
- dulint checkpoint_no;
- ulint field;
- byte* buf;
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- max_no = ut_dulint_zero;
- *max_group = NULL;
- *max_field = 0;
-
- buf = log_sys->checkpoint_buf;
-
- while (group) {
- group->state = LOG_GROUP_CORRUPTED;
-
- for (field = LOG_CHECKPOINT_1; field <= LOG_CHECKPOINT_2;
- field += LOG_CHECKPOINT_2 - LOG_CHECKPOINT_1) {
-
- log_group_read_checkpoint_info(group, field);
-
- if (!recv_check_cp_is_consistent(buf)) {
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "InnoDB: Checkpoint in group"
- " %lu at %lu invalid, %lu\n",
- (ulong) group->id,
- (ulong) field,
- (ulong) mach_read_from_4(
- buf
- + LOG_CHECKPOINT_CHECKSUM_1));
-
- }
-#endif /* UNIV_DEBUG */
- goto not_consistent;
- }
-
- group->state = LOG_GROUP_OK;
-
- group->lsn = mach_read_from_8(
- buf + LOG_CHECKPOINT_LSN);
- group->lsn_offset = mach_read_from_4(
- buf + LOG_CHECKPOINT_OFFSET);
- checkpoint_no = mach_read_from_8(
- buf + LOG_CHECKPOINT_NO);
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "InnoDB: Checkpoint number %lu"
- " found in group %lu\n",
- (ulong) ut_dulint_get_low(
- checkpoint_no),
- (ulong) group->id);
- }
-#endif /* UNIV_DEBUG */
-
- if (ut_dulint_cmp(checkpoint_no, max_no) >= 0) {
- *max_group = group;
- *max_field = field;
- max_no = checkpoint_no;
- }
-
-not_consistent:
- ;
- }
-
- group = UT_LIST_GET_NEXT(log_groups, group);
- }
-
- if (*max_group == NULL) {
-
- fprintf(stderr,
- "InnoDB: No valid checkpoint found.\n"
- "InnoDB: If this error appears when you are"
- " creating an InnoDB database,\n"
- "InnoDB: the problem may be that during"
- " an earlier attempt you managed\n"
- "InnoDB: to create the InnoDB data files,"
- " but log file creation failed.\n"
- "InnoDB: If that is the case, please refer to\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "error-creating-innodb.html\n");
- return(DB_ERROR);
- }
-
- return(DB_SUCCESS);
-}
-
-/***********************************************************************
-Reads the checkpoint info needed in hot backup. */
-
-ibool
-recv_read_cp_info_for_backup(
-/*=========================*/
- /* out: TRUE if success */
- byte* hdr, /* in: buffer containing the log group header */
- dulint* lsn, /* out: checkpoint lsn */
- ulint* offset, /* out: checkpoint offset in the log group */
- ulint* fsp_limit,/* out: fsp limit of space 0, 1000000000 if the
- database is running with < version 3.23.50 of InnoDB */
- dulint* cp_no, /* out: checkpoint number */
- dulint* first_header_lsn)
- /* out: lsn of of the start of the first log file */
-{
- ulint max_cp = 0;
- dulint max_cp_no = ut_dulint_zero;
- byte* cp_buf;
-
- cp_buf = hdr + LOG_CHECKPOINT_1;
-
- if (recv_check_cp_is_consistent(cp_buf)) {
- max_cp_no = mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO);
- max_cp = LOG_CHECKPOINT_1;
- }
-
- cp_buf = hdr + LOG_CHECKPOINT_2;
-
- if (recv_check_cp_is_consistent(cp_buf)) {
- if (ut_dulint_cmp(mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO),
- max_cp_no) > 0) {
- max_cp = LOG_CHECKPOINT_2;
- }
- }
-
- if (max_cp == 0) {
- return(FALSE);
- }
-
- cp_buf = hdr + max_cp;
-
- *lsn = mach_read_from_8(cp_buf + LOG_CHECKPOINT_LSN);
- *offset = mach_read_from_4(cp_buf + LOG_CHECKPOINT_OFFSET);
-
- /* If the user is running a pre-3.23.50 version of InnoDB, its
- checkpoint data does not contain the fsp limit info */
- if (mach_read_from_4(cp_buf + LOG_CHECKPOINT_FSP_MAGIC_N)
- == LOG_CHECKPOINT_FSP_MAGIC_N_VAL) {
-
- *fsp_limit = mach_read_from_4(
- cp_buf + LOG_CHECKPOINT_FSP_FREE_LIMIT);
-
- if (*fsp_limit == 0) {
- *fsp_limit = 1000000000;
- }
- } else {
- *fsp_limit = 1000000000;
- }
-
- /* fprintf(stderr, "fsp limit %lu MB\n", *fsp_limit); */
-
- *cp_no = mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO);
-
- *first_header_lsn = mach_read_from_8(hdr + LOG_FILE_START_LSN);
-
- return(TRUE);
-}
-
-/**********************************************************
-Checks the 4-byte checksum to the trailer checksum field of a log block.
-We also accept a log block in the old format < InnoDB-3.23.52 where the
-checksum field contains the log block number. */
-static
-ibool
-log_block_checksum_is_ok_or_old_format(
-/*===================================*/
- /* out: TRUE if ok, or if the log block may be in the
- format of InnoDB version < 3.23.52 */
- byte* block) /* in: pointer to a log block */
-{
-#ifdef UNIV_LOG_DEBUG
- return(TRUE);
-#endif /* UNIV_LOG_DEBUG */
- if (log_block_calc_checksum(block) == log_block_get_checksum(block)) {
-
- return(TRUE);
- }
-
- if (log_block_get_hdr_no(block) == log_block_get_checksum(block)) {
-
- /* We assume the log block is in the format of
- InnoDB version < 3.23.52 and the block is ok */
-#if 0
- fprintf(stderr,
- "InnoDB: Scanned old format < InnoDB-3.23.52"
- " log block number %lu\n",
- log_block_get_hdr_no(block));
-#endif
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/***********************************************************************
-Scans the log segment and n_bytes_scanned is set to the length of valid
-log scanned. */
-
-void
-recv_scan_log_seg_for_backup(
-/*=========================*/
- byte* buf, /* in: buffer containing log data */
- ulint buf_len, /* in: data length in that buffer */
- dulint* scanned_lsn, /* in/out: lsn of buffer start,
- we return scanned lsn */
- ulint* scanned_checkpoint_no,
- /* in/out: 4 lowest bytes of the
- highest scanned checkpoint number so
- far */
- ulint* n_bytes_scanned)/* out: how much we were able to
- scan, smaller than buf_len if log
- data ended here */
-{
- ulint data_len;
- byte* log_block;
- ulint no;
-
- *n_bytes_scanned = 0;
-
- for (log_block = buf; log_block < buf + buf_len;
- log_block += OS_FILE_LOG_BLOCK_SIZE) {
-
- no = log_block_get_hdr_no(log_block);
-
-#if 0
- fprintf(stderr, "Log block header no %lu\n", no);
-#endif
-
- if (no != log_block_convert_lsn_to_no(*scanned_lsn)
- || !log_block_checksum_is_ok_or_old_format(log_block)) {
-#if 0
- fprintf(stderr,
- "Log block n:o %lu, scanned lsn n:o %lu\n",
- no, log_block_convert_lsn_to_no(*scanned_lsn));
-#endif
- /* Garbage or an incompletely written log block */
-
- log_block += OS_FILE_LOG_BLOCK_SIZE;
-#if 0
- fprintf(stderr,
- "Next log block n:o %lu\n",
- log_block_get_hdr_no(log_block));
-#endif
- break;
- }
-
- if (*scanned_checkpoint_no > 0
- && log_block_get_checkpoint_no(log_block)
- < *scanned_checkpoint_no
- && *scanned_checkpoint_no
- - log_block_get_checkpoint_no(log_block)
- > 0x80000000UL) {
-
- /* Garbage from a log buffer flush which was made
- before the most recent database recovery */
-#if 0
- fprintf(stderr,
- "Scanned cp n:o %lu, block cp n:o %lu\n",
- *scanned_checkpoint_no,
- log_block_get_checkpoint_no(log_block));
-#endif
- break;
- }
-
- data_len = log_block_get_data_len(log_block);
-
- *scanned_checkpoint_no
- = log_block_get_checkpoint_no(log_block);
- *scanned_lsn = ut_dulint_add(*scanned_lsn, data_len);
-
- *n_bytes_scanned += data_len;
-
- if (data_len < OS_FILE_LOG_BLOCK_SIZE) {
- /* Log data ends here */
-
-#if 0
- fprintf(stderr, "Log block data len %lu\n",
- data_len);
-#endif
- break;
- }
- }
-}
-
-/***********************************************************************
-Tries to parse a single log record body and also applies it to a page if
-specified. File ops are parsed, but not applied in this function. */
-static
-byte*
-recv_parse_or_apply_log_rec_body(
-/*=============================*/
- /* out: log record end, NULL if not a complete
- record */
- byte type, /* in: type */
- byte* ptr, /* in: pointer to a buffer */
- byte* end_ptr,/* in: pointer to the buffer end */
- page_t* page, /* in: buffer page or NULL; if not NULL, then the log
- record is applied to the page, and the log record
- should be complete then */
- mtr_t* mtr) /* in: mtr or NULL; should be non-NULL if and only if
- page is non-NULL */
-{
- dict_index_t* index = NULL;
-
- switch (type) {
- case MLOG_1BYTE: case MLOG_2BYTES: case MLOG_4BYTES: case MLOG_8BYTES:
- ptr = mlog_parse_nbytes(type, ptr, end_ptr, page);
- break;
- case MLOG_REC_INSERT: case MLOG_COMP_REC_INSERT:
- if (NULL != (ptr = mlog_parse_index(
- ptr, end_ptr,
- type == MLOG_COMP_REC_INSERT,
- &index))) {
- ut_a(!page
- || (ibool)!!page_is_comp(page)
- == dict_table_is_comp(index->table));
- ptr = page_cur_parse_insert_rec(FALSE, ptr, end_ptr,
- index, page, mtr);
- }
- break;
- case MLOG_REC_CLUST_DELETE_MARK: case MLOG_COMP_REC_CLUST_DELETE_MARK:
- if (NULL != (ptr = mlog_parse_index(
- ptr, end_ptr,
- type == MLOG_COMP_REC_CLUST_DELETE_MARK,
- &index))) {
- ut_a(!page
- || (ibool)!!page_is_comp(page)
- == dict_table_is_comp(index->table));
- ptr = btr_cur_parse_del_mark_set_clust_rec(
- ptr, end_ptr, index, page);
- }
- break;
- case MLOG_COMP_REC_SEC_DELETE_MARK:
- /* This log record type is obsolete, but we process it for
- backward compatibility with MySQL 5.0.3 and 5.0.4. */
- ut_a(!page || page_is_comp(page));
- ptr = mlog_parse_index(ptr, end_ptr, TRUE, &index);
- if (!ptr) {
- break;
- }
- /* Fall through */
- case MLOG_REC_SEC_DELETE_MARK:
- ptr = btr_cur_parse_del_mark_set_sec_rec(ptr, end_ptr, page);
- break;
- case MLOG_REC_UPDATE_IN_PLACE: case MLOG_COMP_REC_UPDATE_IN_PLACE:
- if (NULL != (ptr = mlog_parse_index(
- ptr, end_ptr,
- type == MLOG_COMP_REC_UPDATE_IN_PLACE,
- &index))) {
- ut_a(!page
- || (ibool)!!page_is_comp(page)
- == dict_table_is_comp(index->table));
- ptr = btr_cur_parse_update_in_place(ptr, end_ptr,
- page, index);
- }
- break;
- case MLOG_LIST_END_DELETE: case MLOG_COMP_LIST_END_DELETE:
- case MLOG_LIST_START_DELETE: case MLOG_COMP_LIST_START_DELETE:
- if (NULL != (ptr = mlog_parse_index(
- ptr, end_ptr,
- type == MLOG_COMP_LIST_END_DELETE
- || type == MLOG_COMP_LIST_START_DELETE,
- &index))) {
- ut_a(!page
- || (ibool)!!page_is_comp(page)
- == dict_table_is_comp(index->table));
- ptr = page_parse_delete_rec_list(type, ptr, end_ptr,
- index, page, mtr);
- }
- break;
- case MLOG_LIST_END_COPY_CREATED: case MLOG_COMP_LIST_END_COPY_CREATED:
- if (NULL != (ptr = mlog_parse_index(
- ptr, end_ptr,
- type == MLOG_COMP_LIST_END_COPY_CREATED,
- &index))) {
- ut_a(!page
- || (ibool)!!page_is_comp(page)
- == dict_table_is_comp(index->table));
- ptr = page_parse_copy_rec_list_to_created_page(
- ptr, end_ptr, index, page, mtr);
- }
- break;
- case MLOG_PAGE_REORGANIZE: case MLOG_COMP_PAGE_REORGANIZE:
- if (NULL != (ptr = mlog_parse_index(
- ptr, end_ptr,
- type == MLOG_COMP_PAGE_REORGANIZE,
- &index))) {
- ut_a(!page
- || (ibool)!!page_is_comp(page)
- == dict_table_is_comp(index->table));
- ptr = btr_parse_page_reorganize(ptr, end_ptr, index,
- page, mtr);
- }
- break;
- case MLOG_PAGE_CREATE: case MLOG_COMP_PAGE_CREATE:
- ptr = page_parse_create(ptr, end_ptr,
- type == MLOG_COMP_PAGE_CREATE,
- page, mtr);
- break;
- case MLOG_UNDO_INSERT:
- ptr = trx_undo_parse_add_undo_rec(ptr, end_ptr, page);
- break;
- case MLOG_UNDO_ERASE_END:
- ptr = trx_undo_parse_erase_page_end(ptr, end_ptr, page, mtr);
- break;
- case MLOG_UNDO_INIT:
- ptr = trx_undo_parse_page_init(ptr, end_ptr, page, mtr);
- break;
- case MLOG_UNDO_HDR_DISCARD:
- ptr = trx_undo_parse_discard_latest(ptr, end_ptr, page, mtr);
- break;
- case MLOG_UNDO_HDR_CREATE:
- case MLOG_UNDO_HDR_REUSE:
- ptr = trx_undo_parse_page_header(type, ptr, end_ptr,
- page, mtr);
- break;
- case MLOG_REC_MIN_MARK: case MLOG_COMP_REC_MIN_MARK:
- ptr = btr_parse_set_min_rec_mark(
- ptr, end_ptr, type == MLOG_COMP_REC_MIN_MARK,
- page, mtr);
- break;
- case MLOG_REC_DELETE: case MLOG_COMP_REC_DELETE:
- if (NULL != (ptr = mlog_parse_index(
- ptr, end_ptr,
- type == MLOG_COMP_REC_DELETE,
- &index))) {
- ut_a(!page
- || (ibool)!!page_is_comp(page)
- == dict_table_is_comp(index->table));
- ptr = page_cur_parse_delete_rec(ptr, end_ptr,
- index, page, mtr);
- }
- break;
- case MLOG_IBUF_BITMAP_INIT:
- ptr = ibuf_parse_bitmap_init(ptr, end_ptr, page, mtr);
- break;
- case MLOG_INIT_FILE_PAGE:
- ptr = fsp_parse_init_file_page(ptr, end_ptr, page);
- break;
- case MLOG_WRITE_STRING:
- ptr = mlog_parse_string(ptr, end_ptr, page);
- break;
- case MLOG_FILE_CREATE:
- case MLOG_FILE_RENAME:
- case MLOG_FILE_DELETE:
- ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, FALSE,
- ULINT_UNDEFINED);
- break;
- default:
- ptr = NULL;
- recv_sys->found_corrupt_log = TRUE;
- }
-
- if (index) {
- dict_table_t* table = index->table;
-
- dict_mem_index_free(index);
- dict_mem_table_free(table);
- }
-
- return(ptr);
-}
-
-/*************************************************************************
-Calculates the fold value of a page file address: used in inserting or
-searching for a log record in the hash table. */
-UNIV_INLINE
-ulint
-recv_fold(
-/*======*/
- /* out: folded value */
- ulint space, /* in: space */
- ulint page_no)/* in: page number */
-{
- return(ut_fold_ulint_pair(space, page_no));
-}
-
-/*************************************************************************
-Calculates the hash value of a page file address: used in inserting or
-searching for a log record in the hash table. */
-UNIV_INLINE
-ulint
-recv_hash(
-/*======*/
- /* out: folded value */
- ulint space, /* in: space */
- ulint page_no)/* in: page number */
-{
- return(hash_calc_hash(recv_fold(space, page_no), recv_sys->addr_hash));
-}
-
-/*************************************************************************
-Gets the hashed file address struct for a page. */
-static
-recv_addr_t*
-recv_get_fil_addr_struct(
-/*=====================*/
- /* out: file address struct, NULL if not found from
- the hash table */
- ulint space, /* in: space id */
- ulint page_no)/* in: page number */
-{
- recv_addr_t* recv_addr;
-
- recv_addr = HASH_GET_FIRST(recv_sys->addr_hash,
- recv_hash(space, page_no));
- while (recv_addr) {
- if ((recv_addr->space == space)
- && (recv_addr->page_no == page_no)) {
-
- break;
- }
-
- recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
- }
-
- return(recv_addr);
-}
-
-/***********************************************************************
-Adds a new log record to the hash table of log records. */
-static
-void
-recv_add_to_hash_table(
-/*===================*/
- byte type, /* in: log record type */
- ulint space, /* in: space id */
- ulint page_no, /* in: page number */
- byte* body, /* in: log record body */
- byte* rec_end, /* in: log record end */
- dulint start_lsn, /* in: start lsn of the mtr */
- dulint end_lsn) /* in: end lsn of the mtr */
-{
- recv_t* recv;
- ulint len;
- recv_data_t* recv_data;
- recv_data_t** prev_field;
- recv_addr_t* recv_addr;
-
- if (fil_tablespace_deleted_or_being_deleted_in_mem(space, -1)) {
- /* The tablespace does not exist any more: do not store the
- log record */
-
- return;
- }
-
- len = rec_end - body;
-
- recv = mem_heap_alloc(recv_sys->heap, sizeof(recv_t));
- recv->type = type;
- recv->len = rec_end - body;
- recv->start_lsn = start_lsn;
- recv->end_lsn = end_lsn;
-
- recv_addr = recv_get_fil_addr_struct(space, page_no);
-
- if (recv_addr == NULL) {
- recv_addr = mem_heap_alloc(recv_sys->heap,
- sizeof(recv_addr_t));
- recv_addr->space = space;
- recv_addr->page_no = page_no;
- recv_addr->state = RECV_NOT_PROCESSED;
-
- UT_LIST_INIT(recv_addr->rec_list);
-
- HASH_INSERT(recv_addr_t, addr_hash, recv_sys->addr_hash,
- recv_fold(space, page_no), recv_addr);
- recv_sys->n_addrs++;
-#if 0
- fprintf(stderr, "Inserting log rec for space %lu, page %lu\n",
- space, page_no);
-#endif
- }
-
- UT_LIST_ADD_LAST(rec_list, recv_addr->rec_list, recv);
-
- prev_field = &(recv->data);
-
- /* Store the log record body in chunks of less than UNIV_PAGE_SIZE:
- recv_sys->heap grows into the buffer pool, and bigger chunks could not
- be allocated */
-
- while (rec_end > body) {
-
- len = rec_end - body;
-
- if (len > RECV_DATA_BLOCK_SIZE) {
- len = RECV_DATA_BLOCK_SIZE;
- }
-
- recv_data = mem_heap_alloc(recv_sys->heap,
- sizeof(recv_data_t) + len);
- *prev_field = recv_data;
-
- ut_memcpy(((byte*)recv_data) + sizeof(recv_data_t), body, len);
-
- prev_field = &(recv_data->next);
-
- body += len;
- }
-
- *prev_field = NULL;
-}
-
-/*************************************************************************
-Copies the log record body from recv to buf. */
-static
-void
-recv_data_copy_to_buf(
-/*==================*/
- byte* buf, /* in: buffer of length at least recv->len */
- recv_t* recv) /* in: log record */
-{
- recv_data_t* recv_data;
- ulint part_len;
- ulint len;
-
- len = recv->len;
- recv_data = recv->data;
-
- while (len > 0) {
- if (len > RECV_DATA_BLOCK_SIZE) {
- part_len = RECV_DATA_BLOCK_SIZE;
- } else {
- part_len = len;
- }
-
- ut_memcpy(buf, ((byte*)recv_data) + sizeof(recv_data_t),
- part_len);
- buf += part_len;
- len -= part_len;
-
- recv_data = recv_data->next;
- }
-}
-
-/****************************************************************************
-Applies the hashed log records to the page, if the page lsn is less than the
-lsn of a log record. This can be called when a buffer page has just been
-read in, or also for a page already in the buffer pool. */
-
-void
-recv_recover_page(
-/*==============*/
- ibool recover_backup, /* in: TRUE if we are recovering a backup
- page: then we do not acquire any latches
- since the page was read in outside the
- buffer pool */
- ibool just_read_in, /* in: TRUE if the i/o-handler calls this for
- a freshly read page */
- page_t* page, /* in: buffer page */
- ulint space, /* in: space id */
- ulint page_no) /* in: page number */
-{
- buf_block_t* block = NULL;
- recv_addr_t* recv_addr;
- recv_t* recv;
- byte* buf;
- dulint start_lsn;
- dulint end_lsn;
- dulint page_lsn;
- dulint page_newest_lsn;
- ibool modification_to_page;
- ibool success;
- mtr_t mtr;
-
- mutex_enter(&(recv_sys->mutex));
-
- if (recv_sys->apply_log_recs == FALSE) {
-
- /* Log records should not be applied now */
-
- mutex_exit(&(recv_sys->mutex));
-
- return;
- }
-
- recv_addr = recv_get_fil_addr_struct(space, page_no);
-
- if ((recv_addr == NULL)
- || (recv_addr->state == RECV_BEING_PROCESSED)
- || (recv_addr->state == RECV_PROCESSED)) {
-
- mutex_exit(&(recv_sys->mutex));
-
- return;
- }
-
-#if 0
- fprintf(stderr, "Recovering space %lu, page %lu\n", space, page_no);
-#endif
-
- recv_addr->state = RECV_BEING_PROCESSED;
-
- mutex_exit(&(recv_sys->mutex));
-
- mtr_start(&mtr);
- mtr_set_log_mode(&mtr, MTR_LOG_NONE);
-
- if (!recover_backup) {
- block = buf_block_align(page);
-
- if (just_read_in) {
- /* Move the ownership of the x-latch on the
- page to this OS thread, so that we can acquire
- a second x-latch on it. This is needed for the
- operations to the page to pass the debug
- checks. */
-
- rw_lock_x_lock_move_ownership(&(block->lock));
- }
-
- success = buf_page_get_known_nowait(RW_X_LATCH, page,
- BUF_KEEP_OLD,
- __FILE__, __LINE__,
- &mtr);
- ut_a(success);
-
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
- }
-
- /* Read the newest modification lsn from the page */
- page_lsn = mach_read_from_8(page + FIL_PAGE_LSN);
-
- if (!recover_backup) {
- /* It may be that the page has been modified in the buffer
- pool: read the newest modification lsn there */
-
- page_newest_lsn = buf_frame_get_newest_modification(page);
-
- if (!ut_dulint_is_zero(page_newest_lsn)) {
-
- page_lsn = page_newest_lsn;
- }
- } else {
- /* In recovery from a backup we do not really use the buffer
- pool */
-
- page_newest_lsn = ut_dulint_zero;
- }
-
- modification_to_page = FALSE;
- start_lsn = end_lsn = ut_dulint_zero;
-
- recv = UT_LIST_GET_FIRST(recv_addr->rec_list);
-
- while (recv) {
- end_lsn = recv->end_lsn;
-
- if (recv->len > RECV_DATA_BLOCK_SIZE) {
- /* We have to copy the record body to a separate
- buffer */
-
- buf = mem_alloc(recv->len);
-
- recv_data_copy_to_buf(buf, recv);
- } else {
- buf = ((byte*)(recv->data)) + sizeof(recv_data_t);
- }
-
- if (recv->type == MLOG_INIT_FILE_PAGE) {
- page_lsn = page_newest_lsn;
-
- mach_write_to_8(page + UNIV_PAGE_SIZE
- - FIL_PAGE_END_LSN_OLD_CHKSUM,
- ut_dulint_zero);
- mach_write_to_8(page + FIL_PAGE_LSN, ut_dulint_zero);
- }
-
- if (ut_dulint_cmp(recv->start_lsn, page_lsn) >= 0) {
-
- if (!modification_to_page) {
-
- modification_to_page = TRUE;
- start_lsn = recv->start_lsn;
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "InnoDB: Applying log rec"
- " type %lu len %lu"
- " to space %lu page no %lu\n",
- (ulong) recv->type, (ulong) recv->len,
- (ulong) recv_addr->space,
- (ulong) recv_addr->page_no);
- }
-#endif /* UNIV_DEBUG */
-
- recv_parse_or_apply_log_rec_body(recv->type, buf,
- buf + recv->len,
- page, &mtr);
- mach_write_to_8(page + UNIV_PAGE_SIZE
- - FIL_PAGE_END_LSN_OLD_CHKSUM,
- ut_dulint_add(recv->start_lsn,
- recv->len));
- mach_write_to_8(page + FIL_PAGE_LSN,
- ut_dulint_add(recv->start_lsn,
- recv->len));
- }
-
- if (recv->len > RECV_DATA_BLOCK_SIZE) {
- mem_free(buf);
- }
-
- recv = UT_LIST_GET_NEXT(rec_list, recv);
- }
-
- mutex_enter(&(recv_sys->mutex));
-
- if (ut_dulint_cmp(recv_max_page_lsn, page_lsn) < 0) {
- recv_max_page_lsn = page_lsn;
- }
-
- recv_addr->state = RECV_PROCESSED;
-
- ut_a(recv_sys->n_addrs);
- recv_sys->n_addrs--;
-
- mutex_exit(&(recv_sys->mutex));
-
- if (!recover_backup && modification_to_page) {
- ut_a(block);
-
- buf_flush_recv_note_modification(block, start_lsn, end_lsn);
- }
-
- /* Make sure that committing mtr does not change the modification
- lsn values of page */
-
- mtr.modifications = FALSE;
-
- mtr_commit(&mtr);
-}
-
-/***********************************************************************
-Reads in pages which have hashed log records, from an area around a given
-page number. */
-static
-ulint
-recv_read_in_area(
-/*==============*/
- /* out: number of pages found */
- ulint space, /* in: space */
- ulint page_no)/* in: page number */
-{
- recv_addr_t* recv_addr;
- ulint page_nos[RECV_READ_AHEAD_AREA];
- ulint low_limit;
- ulint n;
-
- low_limit = page_no - (page_no % RECV_READ_AHEAD_AREA);
-
- n = 0;
-
- for (page_no = low_limit; page_no < low_limit + RECV_READ_AHEAD_AREA;
- page_no++) {
- recv_addr = recv_get_fil_addr_struct(space, page_no);
-
- if (recv_addr && !buf_page_peek(space, page_no)) {
-
- mutex_enter(&(recv_sys->mutex));
-
- if (recv_addr->state == RECV_NOT_PROCESSED) {
- recv_addr->state = RECV_BEING_READ;
-
- page_nos[n] = page_no;
-
- n++;
- }
-
- mutex_exit(&(recv_sys->mutex));
- }
- }
-
- buf_read_recv_pages(FALSE, space, page_nos, n);
- /*
- fprintf(stderr, "Recv pages at %lu n %lu\n", page_nos[0], n);
- */
- return(n);
-}
-
-/***********************************************************************
-Empties the hash table of stored log records, applying them to appropriate
-pages. */
-
-void
-recv_apply_hashed_log_recs(
-/*=======================*/
- ibool allow_ibuf) /* in: if TRUE, also ibuf operations are
- allowed during the application; if FALSE,
- no ibuf operations are allowed, and after
- the application all file pages are flushed to
- disk and invalidated in buffer pool: this
- alternative means that no new log records
- can be generated during the application;
- the caller must in this case own the log
- mutex */
-{
- recv_addr_t* recv_addr;
- page_t* page;
- ulint i;
- ulint space;
- ulint page_no;
- ulint n_pages;
- ibool has_printed = FALSE;
- mtr_t mtr;
-loop:
- mutex_enter(&(recv_sys->mutex));
-
- if (recv_sys->apply_batch_on) {
-
- mutex_exit(&(recv_sys->mutex));
-
- os_thread_sleep(500000);
-
- goto loop;
- }
-
- ut_ad(!allow_ibuf == mutex_own(&log_sys->mutex));
-
- if (!allow_ibuf) {
- recv_no_ibuf_operations = TRUE;
- }
-
- recv_sys->apply_log_recs = TRUE;
- recv_sys->apply_batch_on = TRUE;
-
- for (i = 0; i < hash_get_n_cells(recv_sys->addr_hash); i++) {
-
- recv_addr = HASH_GET_FIRST(recv_sys->addr_hash, i);
-
- while (recv_addr) {
- space = recv_addr->space;
- page_no = recv_addr->page_no;
-
- if (recv_addr->state == RECV_NOT_PROCESSED) {
- if (!has_printed) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Starting an"
- " apply batch of log records"
- " to the database...\n"
- "InnoDB: Progress in percents: ",
- stderr);
- has_printed = TRUE;
- }
-
- mutex_exit(&(recv_sys->mutex));
-
- if (buf_page_peek(space, page_no)) {
-
- mtr_start(&mtr);
-
- page = buf_page_get(space, page_no,
- RW_X_LATCH, &mtr);
-
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(
- page, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
- recv_recover_page(FALSE, FALSE, page,
- space, page_no);
- mtr_commit(&mtr);
- } else {
- recv_read_in_area(space, page_no);
- }
-
- mutex_enter(&(recv_sys->mutex));
- }
-
- recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
- }
-
- if (has_printed
- && (i * 100) / hash_get_n_cells(recv_sys->addr_hash)
- != ((i + 1) * 100)
- / hash_get_n_cells(recv_sys->addr_hash)) {
-
- fprintf(stderr, "%lu ", (ulong)
- ((i * 100)
- / hash_get_n_cells(recv_sys->addr_hash)));
- }
- }
-
- /* Wait until all the pages have been processed */
-
- while (recv_sys->n_addrs != 0) {
-
- mutex_exit(&(recv_sys->mutex));
-
- os_thread_sleep(500000);
-
- mutex_enter(&(recv_sys->mutex));
- }
-
- if (has_printed) {
-
- fprintf(stderr, "\n");
- }
-
- if (!allow_ibuf) {
- /* Flush all the file pages to disk and invalidate them in
- the buffer pool */
-
- mutex_exit(&(recv_sys->mutex));
- mutex_exit(&(log_sys->mutex));
-
- n_pages = buf_flush_batch(BUF_FLUSH_LIST, ULINT_MAX,
- ut_dulint_max);
- ut_a(n_pages != ULINT_UNDEFINED);
-
- buf_flush_wait_batch_end(BUF_FLUSH_LIST);
-
- buf_pool_invalidate();
-
- mutex_enter(&(log_sys->mutex));
- mutex_enter(&(recv_sys->mutex));
-
- recv_no_ibuf_operations = FALSE;
- }
-
- recv_sys->apply_log_recs = FALSE;
- recv_sys->apply_batch_on = FALSE;
-
- recv_sys_empty_hash();
-
- if (has_printed) {
- fprintf(stderr, "InnoDB: Apply batch completed\n");
- }
-
- mutex_exit(&(recv_sys->mutex));
-}
-
-/* This page is allocated from the buffer pool and used in the function
-below */
-static page_t* recv_backup_application_page = NULL;
-
-/***********************************************************************
-Applies log records in the hash table to a backup. */
-
-void
-recv_apply_log_recs_for_backup(void)
-/*================================*/
-{
- recv_addr_t* recv_addr;
- ulint n_hash_cells;
- byte* page;
- ulint actual_size;
- ibool success;
- ulint error;
- ulint i;
-
- recv_sys->apply_log_recs = TRUE;
- recv_sys->apply_batch_on = TRUE;
-
- if (recv_backup_application_page == NULL) {
- recv_backup_application_page = buf_frame_alloc();
- }
-
- page = recv_backup_application_page;
-
- fputs("InnoDB: Starting an apply batch of log records"
- " to the database...\n"
- "InnoDB: Progress in percents: ", stderr);
-
- n_hash_cells = hash_get_n_cells(recv_sys->addr_hash);
-
- for (i = 0; i < n_hash_cells; i++) {
- /* The address hash table is externally chained */
- recv_addr = hash_get_nth_cell(recv_sys->addr_hash, i)->node;
-
- while (recv_addr != NULL) {
-
- if (!fil_tablespace_exists_in_mem(recv_addr->space)) {
-#if 0
- fprintf(stderr,
- "InnoDB: Warning: cannot apply"
- " log record to"
- " tablespace %lu page %lu,\n"
- "InnoDB: because tablespace with"
- " that id does not exist.\n",
- recv_addr->space, recv_addr->page_no);
-#endif
- recv_addr->state = RECV_PROCESSED;
-
- ut_a(recv_sys->n_addrs);
- recv_sys->n_addrs--;
-
- goto skip_this_recv_addr;
- }
-
- /* We simulate a page read made by the buffer pool, to
- make sure the recovery apparatus works ok, for
- example, the buf_frame_align() function. We must init
- the block corresponding to buf_pool->frame_zero
- (== page). */
-
- buf_page_init_for_backup_restore(
- recv_addr->space, recv_addr->page_no,
- buf_block_align(page));
-
- /* Extend the tablespace's last file if the page_no
- does not fall inside its bounds; we assume the last
- file is auto-extending, and ibbackup copied the file
- when it still was smaller */
-
- success = fil_extend_space_to_desired_size(
- &actual_size,
- recv_addr->space, recv_addr->page_no + 1);
- if (!success) {
- fprintf(stderr,
- "InnoDB: Fatal error: cannot extend"
- " tablespace %lu to hold %lu pages\n",
- recv_addr->space, recv_addr->page_no);
-
- exit(1);
- }
-
- /* Read the page from the tablespace file using the
- fil0fil.c routines */
-
- error = fil_io(OS_FILE_READ, TRUE, recv_addr->space,
- recv_addr->page_no, 0, UNIV_PAGE_SIZE,
- page, NULL);
- if (error != DB_SUCCESS) {
- fprintf(stderr,
- "InnoDB: Fatal error: cannot read"
- " from tablespace"
- " %lu page number %lu\n",
- (ulong) recv_addr->space,
- (ulong) recv_addr->page_no);
-
- exit(1);
- }
-
- /* Apply the log records to this page */
- recv_recover_page(TRUE, FALSE, page, recv_addr->space,
- recv_addr->page_no);
-
- /* Write the page back to the tablespace file using the
- fil0fil.c routines */
-
- buf_flush_init_for_writing(
- page, mach_read_from_8(page + FIL_PAGE_LSN),
- recv_addr->space, recv_addr->page_no);
-
- error = fil_io(OS_FILE_WRITE, TRUE, recv_addr->space,
- recv_addr->page_no, 0, UNIV_PAGE_SIZE,
- page, NULL);
-skip_this_recv_addr:
- recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
- }
-
- if ((100 * i) / n_hash_cells
- != (100 * (i + 1)) / n_hash_cells) {
- fprintf(stderr, "%lu ",
- (ulong) ((100 * i) / n_hash_cells));
- fflush(stderr);
- }
- }
-
- recv_sys_empty_hash();
-}
-
-/***********************************************************************
-Tries to parse a single log record and returns its length. */
-static
-ulint
-recv_parse_log_rec(
-/*===============*/
- /* out: length of the record, or 0 if the record was
- not complete */
- byte* ptr, /* in: pointer to a buffer */
- byte* end_ptr,/* in: pointer to the buffer end */
- byte* type, /* out: type */
- ulint* space, /* out: space id */
- ulint* page_no,/* out: page number */
- byte** body) /* out: log record body start */
-{
- byte* new_ptr;
-
- *body = NULL;
-
- if (ptr == end_ptr) {
-
- return(0);
- }
-
- if (*ptr == MLOG_MULTI_REC_END) {
-
- *type = *ptr;
-
- return(1);
- }
-
- if (*ptr == MLOG_DUMMY_RECORD) {
- *type = *ptr;
-
- *space = ULINT_UNDEFINED - 1; /* For debugging */
-
- return(1);
- }
-
- new_ptr = mlog_parse_initial_log_record(ptr, end_ptr, type, space,
- page_no);
- *body = new_ptr;
-
- if (UNIV_UNLIKELY(!new_ptr)) {
-
- return(0);
- }
-
- /* Check that page_no is sensible */
-
- if (UNIV_UNLIKELY(*page_no > 0x8FFFFFFFUL)) {
-
- recv_sys->found_corrupt_log = TRUE;
-
- return(0);
- }
-
- new_ptr = recv_parse_or_apply_log_rec_body(*type, new_ptr, end_ptr,
- NULL, NULL);
- if (UNIV_UNLIKELY(new_ptr == NULL)) {
-
- return(0);
- }
-
- if (*page_no > recv_max_parsed_page_no) {
- recv_max_parsed_page_no = *page_no;
- }
-
- return(new_ptr - ptr);
-}
-
-/***********************************************************
-Calculates the new value for lsn when more data is added to the log. */
-static
-dulint
-recv_calc_lsn_on_data_add(
-/*======================*/
- dulint lsn, /* in: old lsn */
- ulint len) /* in: this many bytes of data is added, log block
- headers not included */
-{
- ulint frag_len;
- ulint lsn_len;
-
- frag_len = (ut_dulint_get_low(lsn) % OS_FILE_LOG_BLOCK_SIZE)
- - LOG_BLOCK_HDR_SIZE;
- ut_ad(frag_len < OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
- - LOG_BLOCK_TRL_SIZE);
- lsn_len = len + ((len + frag_len)
- / (OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
- - LOG_BLOCK_TRL_SIZE))
- * (LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE);
-
- return(ut_dulint_add(lsn, lsn_len));
-}
-
-/***********************************************************
-Checks that the parser recognizes incomplete initial segments of a log
-record as incomplete. */
-
-void
-recv_check_incomplete_log_recs(
-/*===========================*/
- byte* ptr, /* in: pointer to a complete log record */
- ulint len) /* in: length of the log record */
-{
- ulint i;
- byte type;
- ulint space;
- ulint page_no;
- byte* body;
-
- for (i = 0; i < len; i++) {
- ut_a(0 == recv_parse_log_rec(ptr, ptr + i, &type, &space,
- &page_no, &body));
- }
-}
-
-/***********************************************************
-Prints diagnostic info of corrupt log. */
-static
-void
-recv_report_corrupt_log(
-/*====================*/
- byte* ptr, /* in: pointer to corrupt log record */
- byte type, /* in: type of the record */
- ulint space, /* in: space id, this may also be garbage */
- ulint page_no)/* in: page number, this may also be garbage */
-{
- fprintf(stderr,
- "InnoDB: ############### CORRUPT LOG RECORD FOUND\n"
- "InnoDB: Log record type %lu, space id %lu, page number %lu\n"
- "InnoDB: Log parsing proceeded successfully up to %lu %lu\n"
- "InnoDB: Previous log record type %lu, is multi %lu\n"
- "InnoDB: Recv offset %lu, prev %lu\n",
- (ulong) type, (ulong) space, (ulong) page_no,
- (ulong) ut_dulint_get_high(recv_sys->recovered_lsn),
- (ulong) ut_dulint_get_low(recv_sys->recovered_lsn),
- (ulong) recv_previous_parsed_rec_type,
- (ulong) recv_previous_parsed_rec_is_multi,
- (ulong) (ptr - recv_sys->buf),
- (ulong) recv_previous_parsed_rec_offset);
-
- if ((ulint)(ptr - recv_sys->buf + 100)
- > recv_previous_parsed_rec_offset
- && (ulint)(ptr - recv_sys->buf + 100
- - recv_previous_parsed_rec_offset)
- < 200000) {
- fputs("InnoDB: Hex dump of corrupt log starting"
- " 100 bytes before the start\n"
- "InnoDB: of the previous log rec,\n"
- "InnoDB: and ending 100 bytes after the start"
- " of the corrupt rec:\n",
- stderr);
-
- ut_print_buf(stderr,
- recv_sys->buf
- + recv_previous_parsed_rec_offset - 100,
- ptr - recv_sys->buf + 200
- - recv_previous_parsed_rec_offset);
- putc('\n', stderr);
- }
-
- fputs("InnoDB: WARNING: the log file may have been corrupt and it\n"
- "InnoDB: is possible that the log scan did not proceed\n"
- "InnoDB: far enough in recovery! Please run CHECK TABLE\n"
- "InnoDB: on your InnoDB tables to check that they are ok!\n"
- "InnoDB: If mysqld crashes after this recovery, look at\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "forcing-recovery.html\n"
- "InnoDB: about forcing recovery.\n", stderr);
-
- fflush(stderr);
-}
-
-/***********************************************************
-Parses log records from a buffer and stores them to a hash table to wait
-merging to file pages. */
-static
-ibool
-recv_parse_log_recs(
-/*================*/
- /* out: currently always returns FALSE */
- ibool store_to_hash) /* in: TRUE if the records should be stored
- to the hash table; this is set to FALSE if just
- debug checking is needed */
-{
- byte* ptr;
- byte* end_ptr;
- ulint single_rec;
- ulint len;
- ulint total_len;
- dulint new_recovered_lsn;
- dulint old_lsn;
- byte type;
- ulint space;
- ulint page_no;
- byte* body;
- ulint n_recs;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
- ut_ad(!ut_dulint_is_zero(recv_sys->parse_start_lsn));
-loop:
- ptr = recv_sys->buf + recv_sys->recovered_offset;
-
- end_ptr = recv_sys->buf + recv_sys->len;
-
- if (ptr == end_ptr) {
-
- return(FALSE);
- }
-
- single_rec = (ulint)*ptr & MLOG_SINGLE_REC_FLAG;
-
- if (single_rec || *ptr == MLOG_DUMMY_RECORD) {
- /* The mtr only modified a single page, or this is a file op */
-
- old_lsn = recv_sys->recovered_lsn;
-
- /* Try to parse a log record, fetching its type, space id,
- page no, and a pointer to the body of the log record */
-
- len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
- &page_no, &body);
-
- if (len == 0 || recv_sys->found_corrupt_log) {
- if (recv_sys->found_corrupt_log) {
-
- recv_report_corrupt_log(ptr,
- type, space, page_no);
- }
-
- return(FALSE);
- }
-
- new_recovered_lsn = recv_calc_lsn_on_data_add(old_lsn, len);
-
- if (ut_dulint_cmp(new_recovered_lsn, recv_sys->scanned_lsn)
- > 0) {
- /* The log record filled a log block, and we require
- that also the next log block should have been scanned
- in */
-
- return(FALSE);
- }
-
- recv_previous_parsed_rec_type = (ulint)type;
- recv_previous_parsed_rec_offset = recv_sys->recovered_offset;
- recv_previous_parsed_rec_is_multi = 0;
-
- recv_sys->recovered_offset += len;
- recv_sys->recovered_lsn = new_recovered_lsn;
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "InnoDB: Parsed a single log rec"
- " type %lu len %lu space %lu page no %lu\n",
- (ulong) type, (ulong) len, (ulong) space,
- (ulong) page_no);
- }
-#endif /* UNIV_DEBUG */
-
- if (type == MLOG_DUMMY_RECORD) {
- /* Do nothing */
-
- } else if (store_to_hash && (type == MLOG_FILE_CREATE
- || type == MLOG_FILE_RENAME
- || type == MLOG_FILE_DELETE)) {
-#ifdef UNIV_HOTBACKUP
- if (recv_replay_file_ops) {
-
- /* In ibbackup --apply-log, replay an .ibd file
- operation, if possible; note that
- fil_path_to_mysql_datadir is set in ibbackup to
- point to the datadir we should use there */
-
- if (NULL == fil_op_log_parse_or_replay(
- body, end_ptr, type, TRUE,
- space)) {
- fprintf(stderr,
- "InnoDB: Error: file op"
- " log record of type %lu"
- " space %lu not complete in\n"
- "InnoDB: the replay phase."
- " Path %s\n",
- (ulint)type, space,
- (char*)(body + 2));
-
- ut_a(0);
- }
- }
-#endif
- /* In normal mysqld crash recovery we do not try to
- replay file operations */
- } else if (store_to_hash) {
- recv_add_to_hash_table(type, space, page_no, body,
- ptr + len, old_lsn,
- recv_sys->recovered_lsn);
- } else {
-#ifdef UNIV_LOG_DEBUG
- recv_check_incomplete_log_recs(ptr, len);
-#endif/* UNIV_LOG_DEBUG */
- }
- } else {
- /* Check that all the records associated with the single mtr
- are included within the buffer */
-
- total_len = 0;
- n_recs = 0;
-
- for (;;) {
- len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
- &page_no, &body);
- if (len == 0 || recv_sys->found_corrupt_log) {
-
- if (recv_sys->found_corrupt_log) {
-
- recv_report_corrupt_log(
- ptr, type, space, page_no);
- }
-
- return(FALSE);
- }
-
- recv_previous_parsed_rec_type = (ulint)type;
- recv_previous_parsed_rec_offset
- = recv_sys->recovered_offset + total_len;
- recv_previous_parsed_rec_is_multi = 1;
-
- if ((!store_to_hash) && (type != MLOG_MULTI_REC_END)) {
-#ifdef UNIV_LOG_DEBUG
- recv_check_incomplete_log_recs(ptr, len);
-#endif /* UNIV_LOG_DEBUG */
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "InnoDB: Parsed a multi log rec"
- " type %lu len %lu"
- " space %lu page no %lu\n",
- (ulong) type, (ulong) len,
- (ulong) space, (ulong) page_no);
- }
-#endif /* UNIV_DEBUG */
-
- total_len += len;
- n_recs++;
-
- ptr += len;
-
- if (type == MLOG_MULTI_REC_END) {
-
- /* Found the end mark for the records */
-
- break;
- }
- }
-
- new_recovered_lsn = recv_calc_lsn_on_data_add(
- recv_sys->recovered_lsn, total_len);
-
- if (ut_dulint_cmp(new_recovered_lsn, recv_sys->scanned_lsn)
- > 0) {
- /* The log record filled a log block, and we require
- that also the next log block should have been scanned
- in */
-
- return(FALSE);
- }
-
- /* Add all the records to the hash table */
-
- ptr = recv_sys->buf + recv_sys->recovered_offset;
-
- for (;;) {
- old_lsn = recv_sys->recovered_lsn;
- len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
- &page_no, &body);
- if (recv_sys->found_corrupt_log) {
-
- recv_report_corrupt_log(ptr,
- type, space, page_no);
- }
-
- ut_a(len != 0);
- ut_a(0 == ((ulint)*ptr & MLOG_SINGLE_REC_FLAG));
-
- recv_sys->recovered_offset += len;
- recv_sys->recovered_lsn
- = recv_calc_lsn_on_data_add(old_lsn, len);
- if (type == MLOG_MULTI_REC_END) {
-
- /* Found the end mark for the records */
-
- break;
- }
-
- if (store_to_hash) {
- recv_add_to_hash_table(type, space, page_no,
- body, ptr + len,
- old_lsn,
- new_recovered_lsn);
- }
-
- ptr += len;
- }
- }
-
- goto loop;
-}
-
-/***********************************************************
-Adds data from a new log block to the parsing buffer of recv_sys if
-recv_sys->parse_start_lsn is non-zero. */
-static
-ibool
-recv_sys_add_to_parsing_buf(
-/*========================*/
- /* out: TRUE if more data added */
- byte* log_block, /* in: log block */
- dulint scanned_lsn) /* in: lsn of how far we were able to find
- data in this log block */
-{
- ulint more_len;
- ulint data_len;
- ulint start_offset;
- ulint end_offset;
-
- ut_ad(ut_dulint_cmp(scanned_lsn, recv_sys->scanned_lsn) >= 0);
-
- if (ut_dulint_is_zero(recv_sys->parse_start_lsn)) {
- /* Cannot start parsing yet because no start point for
- it found */
-
- return(FALSE);
- }
-
- data_len = log_block_get_data_len(log_block);
-
- if (ut_dulint_cmp(recv_sys->parse_start_lsn, scanned_lsn) >= 0) {
-
- return(FALSE);
-
- } else if (ut_dulint_cmp(recv_sys->scanned_lsn, scanned_lsn) >= 0) {
-
- return(FALSE);
-
- } else if (ut_dulint_cmp(recv_sys->parse_start_lsn,
- recv_sys->scanned_lsn) > 0) {
- more_len = ut_dulint_minus(scanned_lsn,
- recv_sys->parse_start_lsn);
- } else {
- more_len = ut_dulint_minus(scanned_lsn, recv_sys->scanned_lsn);
- }
-
- if (more_len == 0) {
-
- return(FALSE);
- }
-
- ut_ad(data_len >= more_len);
-
- start_offset = data_len - more_len;
-
- if (start_offset < LOG_BLOCK_HDR_SIZE) {
- start_offset = LOG_BLOCK_HDR_SIZE;
- }
-
- end_offset = data_len;
-
- if (end_offset > OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
- end_offset = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE;
- }
-
- ut_ad(start_offset <= end_offset);
-
- if (start_offset < end_offset) {
- ut_memcpy(recv_sys->buf + recv_sys->len,
- log_block + start_offset, end_offset - start_offset);
-
- recv_sys->len += end_offset - start_offset;
-
- ut_a(recv_sys->len <= RECV_PARSING_BUF_SIZE);
- }
-
- return(TRUE);
-}
-
-/***********************************************************
-Moves the parsing buffer data left to the buffer start. */
-static
-void
-recv_sys_justify_left_parsing_buf(void)
-/*===================================*/
-{
- ut_memmove(recv_sys->buf, recv_sys->buf + recv_sys->recovered_offset,
- recv_sys->len - recv_sys->recovered_offset);
-
- recv_sys->len -= recv_sys->recovered_offset;
-
- recv_sys->recovered_offset = 0;
-}
-
-/***********************************************************
-Scans log from a buffer and stores new log data to the parsing buffer. Parses
-and hashes the log records if new data found. */
-
-ibool
-recv_scan_log_recs(
-/*===============*/
- /* out: TRUE if limit_lsn has been reached, or
- not able to scan any more in this log group */
- ibool apply_automatically,/* in: TRUE if we want this function to
- apply log records automatically when the
- hash table becomes full; in the hot backup tool
- the tool does the applying, not this
- function */
- ulint available_memory,/* in: we let the hash table of recs to grow
- to this size, at the maximum */
- ibool store_to_hash, /* in: TRUE if the records should be stored
- to the hash table; this is set to FALSE if just
- debug checking is needed */
- byte* buf, /* in: buffer containing a log segment or
- garbage */
- ulint len, /* in: buffer length */
- dulint start_lsn, /* in: buffer start lsn */
- dulint* contiguous_lsn, /* in/out: it is known that all log groups
- contain contiguous log data up to this lsn */
- dulint* group_scanned_lsn)/* out: scanning succeeded up to this lsn */
-{
- byte* log_block;
- ulint no;
- dulint scanned_lsn;
- ibool finished;
- ulint data_len;
- ibool more_data;
-
- ut_ad(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
- ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0);
- ut_ad(len > 0);
- ut_a(apply_automatically <= TRUE);
- ut_a(store_to_hash <= TRUE);
-
- finished = FALSE;
-
- log_block = buf;
- scanned_lsn = start_lsn;
- more_data = FALSE;
-
- while (log_block < buf + len && !finished) {
-
- no = log_block_get_hdr_no(log_block);
- /*
- fprintf(stderr, "Log block header no %lu\n", no);
-
- fprintf(stderr, "Scanned lsn no %lu\n",
- log_block_convert_lsn_to_no(scanned_lsn));
- */
- if (no != log_block_convert_lsn_to_no(scanned_lsn)
- || !log_block_checksum_is_ok_or_old_format(log_block)) {
-
- if (no == log_block_convert_lsn_to_no(scanned_lsn)
- && !log_block_checksum_is_ok_or_old_format(
- log_block)) {
- fprintf(stderr,
- "InnoDB: Log block no %lu at"
- " lsn %lu %lu has\n"
- "InnoDB: ok header, but checksum field"
- " contains %lu, should be %lu\n",
- (ulong) no,
- (ulong) ut_dulint_get_high(
- scanned_lsn),
- (ulong) ut_dulint_get_low(scanned_lsn),
- (ulong) log_block_get_checksum(
- log_block),
- (ulong) log_block_calc_checksum(
- log_block));
- }
-
- /* Garbage or an incompletely written log block */
-
- finished = TRUE;
-
- break;
- }
-
- if (log_block_get_flush_bit(log_block)) {
- /* This block was a start of a log flush operation:
- we know that the previous flush operation must have
- been completed for all log groups before this block
- can have been flushed to any of the groups. Therefore,
- we know that log data is contiguous up to scanned_lsn
- in all non-corrupt log groups. */
-
- if (ut_dulint_cmp(scanned_lsn, *contiguous_lsn) > 0) {
- *contiguous_lsn = scanned_lsn;
- }
- }
-
- data_len = log_block_get_data_len(log_block);
-
- if ((store_to_hash || (data_len == OS_FILE_LOG_BLOCK_SIZE))
- && (ut_dulint_cmp(ut_dulint_add(scanned_lsn, data_len),
- recv_sys->scanned_lsn) > 0)
- && (recv_sys->scanned_checkpoint_no > 0)
- && (log_block_get_checkpoint_no(log_block)
- < recv_sys->scanned_checkpoint_no)
- && (recv_sys->scanned_checkpoint_no
- - log_block_get_checkpoint_no(log_block)
- > 0x80000000UL)) {
-
- /* Garbage from a log buffer flush which was made
- before the most recent database recovery */
-
- finished = TRUE;
-#ifdef UNIV_LOG_DEBUG
- /* This is not really an error, but currently
- we stop here in the debug version: */
-
- ut_error;
-#endif
- break;
- }
-
- if (ut_dulint_is_zero(recv_sys->parse_start_lsn)
- && (log_block_get_first_rec_group(log_block) > 0)) {
-
- /* We found a point from which to start the parsing
- of log records */
-
- recv_sys->parse_start_lsn
- = ut_dulint_add(scanned_lsn,
- log_block_get_first_rec_group(
- log_block));
- recv_sys->scanned_lsn = recv_sys->parse_start_lsn;
- recv_sys->recovered_lsn = recv_sys->parse_start_lsn;
- }
-
- scanned_lsn = ut_dulint_add(scanned_lsn, data_len);
-
- if (ut_dulint_cmp(scanned_lsn, recv_sys->scanned_lsn) > 0) {
-
- /* We have found more entries. If this scan is
- of startup type, we must initiate crash recovery
- environment before parsing these log records. */
-
- if (recv_log_scan_is_startup_type
- && !recv_needed_recovery) {
-
- fprintf(stderr,
- "InnoDB: Log scan progressed"
- " past the checkpoint lsn %lu %lu\n",
- (ulong) ut_dulint_get_high(
- recv_sys->scanned_lsn),
- (ulong) ut_dulint_get_low(
- recv_sys->scanned_lsn));
- recv_init_crash_recovery();
- }
-
- /* We were able to find more log data: add it to the
- parsing buffer if parse_start_lsn is already
- non-zero */
-
- if (recv_sys->len + 4 * OS_FILE_LOG_BLOCK_SIZE
- >= RECV_PARSING_BUF_SIZE) {
- fprintf(stderr,
- "InnoDB: Error: log parsing"
- " buffer overflow."
- " Recovery may have failed!\n");
-
- recv_sys->found_corrupt_log = TRUE;
-
- } else if (!recv_sys->found_corrupt_log) {
- more_data = recv_sys_add_to_parsing_buf(
- log_block, scanned_lsn);
- }
-
- recv_sys->scanned_lsn = scanned_lsn;
- recv_sys->scanned_checkpoint_no
- = log_block_get_checkpoint_no(log_block);
- }
-
- if (data_len < OS_FILE_LOG_BLOCK_SIZE) {
- /* Log data for this group ends here */
-
- finished = TRUE;
- } else {
- log_block += OS_FILE_LOG_BLOCK_SIZE;
- }
- }
-
- *group_scanned_lsn = scanned_lsn;
-
- if (recv_needed_recovery
- || (recv_is_from_backup && !recv_is_making_a_backup)) {
- recv_scan_print_counter++;
-
- if (finished || (recv_scan_print_counter % 80 == 0)) {
-
- fprintf(stderr,
- "InnoDB: Doing recovery: scanned up to"
- " log sequence number %lu %lu\n",
- (ulong) ut_dulint_get_high(*group_scanned_lsn),
- (ulong) ut_dulint_get_low(*group_scanned_lsn));
- }
- }
-
- if (more_data && !recv_sys->found_corrupt_log) {
- /* Try to parse more log records */
-
- recv_parse_log_recs(store_to_hash);
-
- if (store_to_hash && mem_heap_get_size(recv_sys->heap)
- > available_memory
- && apply_automatically) {
-
- /* Hash table of log records has grown too big:
- empty it; FALSE means no ibuf operations
- allowed, as we cannot add new records to the
- log yet: they would be produced by ibuf
- operations */
-
- recv_apply_hashed_log_recs(FALSE);
- }
-
- if (recv_sys->recovered_offset > RECV_PARSING_BUF_SIZE / 4) {
- /* Move parsing buffer data to the buffer start */
-
- recv_sys_justify_left_parsing_buf();
- }
- }
-
- return(finished);
-}
-
-/***********************************************************
-Scans log from a buffer and stores new log data to the parsing buffer. Parses
-and hashes the log records if new data found. */
-static
-void
-recv_group_scan_log_recs(
-/*=====================*/
- log_group_t* group, /* in: log group */
- dulint* contiguous_lsn, /* in/out: it is known that all log groups
- contain contiguous log data up to this lsn */
- dulint* group_scanned_lsn)/* out: scanning succeeded up to this lsn */
-{
- ibool finished;
- dulint start_lsn;
- dulint end_lsn;
-
- finished = FALSE;
-
- start_lsn = *contiguous_lsn;
-
- while (!finished) {
- end_lsn = ut_dulint_add(start_lsn, RECV_SCAN_SIZE);
-
- log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
- group, start_lsn, end_lsn);
-
- finished = recv_scan_log_recs(
- TRUE, (buf_pool->n_frames - recv_n_pool_free_frames)
- * UNIV_PAGE_SIZE, TRUE, log_sys->buf, RECV_SCAN_SIZE,
- start_lsn, contiguous_lsn, group_scanned_lsn);
- start_lsn = end_lsn;
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "InnoDB: Scanned group %lu up to"
- " log sequence number %lu %lu\n",
- (ulong) group->id,
- (ulong) ut_dulint_get_high(*group_scanned_lsn),
- (ulong) ut_dulint_get_low(*group_scanned_lsn));
- }
-#endif /* UNIV_DEBUG */
-}
-
-/***********************************************************
-Initialize crash recovery environment. Can be called iff
-recv_needed_recovery == FALSE. */
-static
-void
-recv_init_crash_recovery(void)
-/*==========================*/
-{
- ut_a(!recv_needed_recovery);
-
- recv_needed_recovery = TRUE;
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Database was not"
- " shut down normally!\n"
- "InnoDB: Starting crash recovery.\n");
-
- fprintf(stderr,
- "InnoDB: Reading tablespace information"
- " from the .ibd files...\n");
-
- fil_load_single_table_tablespaces();
-
- /* If we are using the doublewrite method, we will
- check if there are half-written pages in data files,
- and restore them from the doublewrite buffer if
- possible */
-
- if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
-
- fprintf(stderr,
- "InnoDB: Restoring possible"
- " half-written data pages from"
- " the doublewrite\n"
- "InnoDB: buffer...\n");
- trx_sys_doublewrite_init_or_restore_pages(TRUE);
- }
-}
-
-/************************************************************
-Recovers from a checkpoint. When this function returns, the database is able
-to start processing of new user transactions, but the function
-recv_recovery_from_checkpoint_finish should be called later to complete
-the recovery and free the resources used in it. */
-
-ulint
-recv_recovery_from_checkpoint_start(
-/*================================*/
- /* out: error code or DB_SUCCESS */
- ulint type, /* in: LOG_CHECKPOINT or LOG_ARCHIVE */
- dulint limit_lsn, /* in: recover up to this lsn if possible */
- dulint min_flushed_lsn,/* in: min flushed lsn from data files */
- dulint max_flushed_lsn)/* in: max flushed lsn from data files */
-{
- log_group_t* group;
- log_group_t* max_cp_group;
- log_group_t* up_to_date_group;
- ulint max_cp_field;
- dulint checkpoint_lsn;
- dulint checkpoint_no;
- dulint old_scanned_lsn;
- dulint group_scanned_lsn;
- dulint contiguous_lsn;
- dulint archived_lsn;
- ulint capacity;
- byte* buf;
- byte log_hdr_buf[LOG_FILE_HDR_SIZE];
- ulint err;
-
- ut_ad((type != LOG_CHECKPOINT)
- || (ut_dulint_cmp(limit_lsn, ut_dulint_max) == 0));
-
- if (type == LOG_CHECKPOINT) {
- recv_sys_create();
- recv_sys_init(FALSE, buf_pool_get_curr_size());
- }
-
- if (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO) {
- fprintf(stderr,
- "InnoDB: The user has set SRV_FORCE_NO_LOG_REDO on\n");
- fprintf(stderr,
- "InnoDB: Skipping log redo\n");
-
- return(DB_SUCCESS);
- }
-
- recv_recovery_on = TRUE;
-
- recv_sys->limit_lsn = limit_lsn;
-
- mutex_enter(&(log_sys->mutex));
-
- /* Look for the latest checkpoint from any of the log groups */
-
- err = recv_find_max_checkpoint(&max_cp_group, &max_cp_field);
-
- if (err != DB_SUCCESS) {
-
- mutex_exit(&(log_sys->mutex));
-
- return(err);
- }
-
- log_group_read_checkpoint_info(max_cp_group, max_cp_field);
-
- buf = log_sys->checkpoint_buf;
-
- checkpoint_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_LSN);
- checkpoint_no = mach_read_from_8(buf + LOG_CHECKPOINT_NO);
- archived_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN);
-
- /* Read the first log file header to print a note if this is
- a recovery from a restored InnoDB Hot Backup */
-
- fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, max_cp_group->space_id,
- 0, 0, LOG_FILE_HDR_SIZE,
- log_hdr_buf, max_cp_group);
-
- if (0 == ut_memcmp(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
- (byte*)"ibbackup", (sizeof "ibbackup") - 1)) {
- /* This log file was created by ibbackup --restore: print
- a note to the user about it */
-
- fprintf(stderr,
- "InnoDB: The log file was created by"
- " ibbackup --apply-log at\n"
- "InnoDB: %s\n",
- log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP);
- fprintf(stderr,
- "InnoDB: NOTE: the following crash recovery"
- " is part of a normal restore.\n");
-
- /* Wipe over the label now */
-
- memset(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
- ' ', 4);
- /* Write to the log file to wipe over the label */
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE,
- max_cp_group->space_id,
- 0, 0, OS_FILE_LOG_BLOCK_SIZE,
- log_hdr_buf, max_cp_group);
- }
-
-#ifdef UNIV_LOG_ARCHIVE
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- while (group) {
- log_checkpoint_get_nth_group_info(buf, group->id,
- &(group->archived_file_no),
- &(group->archived_offset));
-
- group = UT_LIST_GET_NEXT(log_groups, group);
- }
-#endif /* UNIV_LOG_ARCHIVE */
-
- if (type == LOG_CHECKPOINT) {
- /* Start reading the log groups from the checkpoint lsn up. The
- variable contiguous_lsn contains an lsn up to which the log is
- known to be contiguously written to all log groups. */
-
- recv_sys->parse_start_lsn = checkpoint_lsn;
- recv_sys->scanned_lsn = checkpoint_lsn;
- recv_sys->scanned_checkpoint_no = 0;
- recv_sys->recovered_lsn = checkpoint_lsn;
-
- srv_start_lsn = checkpoint_lsn;
- }
-
- contiguous_lsn = ut_dulint_align_down(recv_sys->scanned_lsn,
- OS_FILE_LOG_BLOCK_SIZE);
- if (type == LOG_ARCHIVE) {
- /* Try to recover the remaining part from logs: first from
- the logs of the archived group */
-
- group = recv_sys->archive_group;
- capacity = log_group_get_capacity(group);
-
- if ((ut_dulint_cmp(recv_sys->scanned_lsn, ut_dulint_add(
- checkpoint_lsn, capacity)) > 0)
- || (ut_dulint_cmp(checkpoint_lsn, ut_dulint_add(
- recv_sys->scanned_lsn, capacity))
- > 0)) {
-
- mutex_exit(&(log_sys->mutex));
-
- /* The group does not contain enough log: probably
- an archived log file was missing or corrupt */
-
- return(DB_ERROR);
- }
-
- recv_group_scan_log_recs(group, &contiguous_lsn,
- &group_scanned_lsn);
- if (ut_dulint_cmp(recv_sys->scanned_lsn, checkpoint_lsn) < 0) {
-
- mutex_exit(&(log_sys->mutex));
-
- /* The group did not contain enough log: an archived
- log file was missing or invalid, or the log group
- was corrupt */
-
- return(DB_ERROR);
- }
-
- group->scanned_lsn = group_scanned_lsn;
- up_to_date_group = group;
- } else {
- up_to_date_group = max_cp_group;
- }
-
- ut_ad(RECV_SCAN_SIZE <= log_sys->buf_size);
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- if ((type == LOG_ARCHIVE) && (group == recv_sys->archive_group)) {
- group = UT_LIST_GET_NEXT(log_groups, group);
- }
-
- /* Set the flag to publish that we are doing startup scan. */
- recv_log_scan_is_startup_type = (type == LOG_CHECKPOINT);
- while (group) {
- old_scanned_lsn = recv_sys->scanned_lsn;
-
- recv_group_scan_log_recs(group, &contiguous_lsn,
- &group_scanned_lsn);
- group->scanned_lsn = group_scanned_lsn;
-
- if (ut_dulint_cmp(old_scanned_lsn, group_scanned_lsn) < 0) {
- /* We found a more up-to-date group */
-
- up_to_date_group = group;
- }
-
- if ((type == LOG_ARCHIVE)
- && (group == recv_sys->archive_group)) {
- group = UT_LIST_GET_NEXT(log_groups, group);
- }
-
- group = UT_LIST_GET_NEXT(log_groups, group);
- }
-
- /* Done with startup scan. Clear the flag. */
- recv_log_scan_is_startup_type = FALSE;
- if (type == LOG_CHECKPOINT) {
- /* NOTE: we always do a 'recovery' at startup, but only if
- there is something wrong we will print a message to the
- user about recovery: */
-
- if (ut_dulint_cmp(checkpoint_lsn, max_flushed_lsn) != 0
- || ut_dulint_cmp(checkpoint_lsn, min_flushed_lsn) != 0) {
-
- if (ut_dulint_cmp(checkpoint_lsn, max_flushed_lsn)
- < 0) {
- fprintf(stderr,
- "InnoDB: #########################"
- "#################################\n"
- "InnoDB: "
- "WARNING!\n"
- "InnoDB: The log sequence number"
- " in ibdata files is higher\n"
- "InnoDB: than the log sequence number"
- " in the ib_logfiles! Are you sure\n"
- "InnoDB: you are using the right"
- " ib_logfiles to start up"
- " the database?\n"
- "InnoDB: Log sequence number in"
- " ib_logfiles is %lu %lu, log\n"
- "InnoDB: sequence numbers stamped"
- " to ibdata file headers are between\n"
- "InnoDB: %lu %lu and %lu %lu.\n"
- "InnoDB: #########################"
- "#################################\n",
- (ulong) ut_dulint_get_high(
- checkpoint_lsn),
- (ulong) ut_dulint_get_low(
- checkpoint_lsn),
- (ulong) ut_dulint_get_high(
- min_flushed_lsn),
- (ulong) ut_dulint_get_low(
- min_flushed_lsn),
- (ulong) ut_dulint_get_high(
- max_flushed_lsn),
- (ulong) ut_dulint_get_low(
- max_flushed_lsn));
-
-
- }
-
- if (!recv_needed_recovery) {
- fprintf(stderr,
- "InnoDB: The log sequence number"
- " in ibdata files does not match\n"
- "InnoDB: the log sequence number"
- " in the ib_logfiles!\n");
- recv_init_crash_recovery();
- }
-
- }
- if (!recv_needed_recovery) {
- /* Init the doublewrite buffer memory structure */
- trx_sys_doublewrite_init_or_restore_pages(FALSE);
- }
- }
-
- /* We currently have only one log group */
- if (ut_dulint_cmp(group_scanned_lsn, checkpoint_lsn) < 0) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: ERROR: We were only able to scan the log"
- " up to\n"
- "InnoDB: %lu %lu, but a checkpoint was at %lu %lu.\n"
- "InnoDB: It is possible that"
- " the database is now corrupt!\n",
- (ulong) ut_dulint_get_high(group_scanned_lsn),
- (ulong) ut_dulint_get_low(group_scanned_lsn),
- (ulong) ut_dulint_get_high(checkpoint_lsn),
- (ulong) ut_dulint_get_low(checkpoint_lsn));
- }
-
- if (ut_dulint_cmp(group_scanned_lsn, recv_max_page_lsn) < 0) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: ERROR: We were only able to scan the log"
- " up to %lu %lu\n"
- "InnoDB: but a database page a had an lsn %lu %lu."
- " It is possible that the\n"
- "InnoDB: database is now corrupt!\n",
- (ulong) ut_dulint_get_high(group_scanned_lsn),
- (ulong) ut_dulint_get_low(group_scanned_lsn),
- (ulong) ut_dulint_get_high(recv_max_page_lsn),
- (ulong) ut_dulint_get_low(recv_max_page_lsn));
- }
-
- if (ut_dulint_cmp(recv_sys->recovered_lsn, checkpoint_lsn) < 0) {
-
- mutex_exit(&(log_sys->mutex));
-
- if (ut_dulint_cmp(recv_sys->recovered_lsn, limit_lsn) >= 0) {
-
- return(DB_SUCCESS);
- }
-
- ut_error;
-
- return(DB_ERROR);
- }
-
- /* Synchronize the uncorrupted log groups to the most up-to-date log
- group; we also copy checkpoint info to groups */
-
- log_sys->next_checkpoint_lsn = checkpoint_lsn;
- log_sys->next_checkpoint_no = ut_dulint_add(checkpoint_no, 1);
-
-#ifdef UNIV_LOG_ARCHIVE
- log_sys->archived_lsn = archived_lsn;
-#endif /* UNIV_LOG_ARCHIVE */
-
- recv_synchronize_groups(up_to_date_group);
-
- if (!recv_needed_recovery) {
- ut_a(ut_dulint_cmp(checkpoint_lsn,
- recv_sys->recovered_lsn) == 0);
-
- } else {
- srv_start_lsn = recv_sys->recovered_lsn;
- }
-
- log_sys->lsn = recv_sys->recovered_lsn;
-
- ut_memcpy(log_sys->buf, recv_sys->last_block, OS_FILE_LOG_BLOCK_SIZE);
-
- log_sys->buf_free = ut_dulint_get_low(log_sys->lsn)
- % OS_FILE_LOG_BLOCK_SIZE;
- log_sys->buf_next_to_write = log_sys->buf_free;
- log_sys->written_to_some_lsn = log_sys->lsn;
- log_sys->written_to_all_lsn = log_sys->lsn;
-
- log_sys->last_checkpoint_lsn = checkpoint_lsn;
-
- log_sys->next_checkpoint_no = ut_dulint_add(checkpoint_no, 1);
-
-#ifdef UNIV_LOG_ARCHIVE
- if (ut_dulint_cmp(archived_lsn, ut_dulint_max) == 0) {
-
- log_sys->archiving_state = LOG_ARCH_OFF;
- }
-#endif /* UNIV_LOG_ARCHIVE */
-
- mutex_enter(&(recv_sys->mutex));
-
- recv_sys->apply_log_recs = TRUE;
-
- mutex_exit(&(recv_sys->mutex));
-
- mutex_exit(&(log_sys->mutex));
-
- recv_lsn_checks_on = TRUE;
-
- /* The database is now ready to start almost normal processing of user
- transactions: transaction rollbacks and the application of the log
- records in the hash table can be run in background. */
-
- return(DB_SUCCESS);
-}
-
-/************************************************************
-Completes recovery from a checkpoint. */
-
-void
-recv_recovery_from_checkpoint_finish(void)
-/*======================================*/
-{
- int i;
-
- /* Apply the hashed log records to the respective file pages */
-
- if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
-
- recv_apply_hashed_log_recs(TRUE);
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "InnoDB: Log records applied to the database\n");
- }
-#endif /* UNIV_DEBUG */
-
- if (recv_needed_recovery) {
- trx_sys_print_mysql_master_log_pos();
- trx_sys_print_mysql_binlog_offset();
- }
-
- if (recv_sys->found_corrupt_log) {
-
- fprintf(stderr,
- "InnoDB: WARNING: the log file may have been"
- " corrupt and it\n"
- "InnoDB: is possible that the log scan or parsing"
- " did not proceed\n"
- "InnoDB: far enough in recovery. Please run"
- " CHECK TABLE\n"
- "InnoDB: on your InnoDB tables to check that"
- " they are ok!\n"
- "InnoDB: It may be safest to recover your"
- " InnoDB database from\n"
- "InnoDB: a backup!\n");
- }
-
- /* Free the resources of the recovery system */
-
- recv_recovery_on = FALSE;
-
-#ifndef UNIV_LOG_DEBUG
- recv_sys_free();
-#endif
-
-#ifdef UNIV_SYNC_DEBUG
- /* Wait for a while so that created threads have time to suspend
- themselves before we switch the latching order checks on */
- os_thread_sleep(1000000);
-
- /* Switch latching order checks on in sync0sync.c */
- sync_order_checks_on = TRUE;
-#endif
- if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) {
- /* Rollback the uncommitted transactions which have no user
- session */
-
- os_thread_create(trx_rollback_or_clean_all_without_sess,
- (void *)&i, NULL);
- }
-}
-
-/**********************************************************
-Resets the logs. The contents of log files will be lost! */
-
-void
-recv_reset_logs(
-/*============*/
- dulint lsn, /* in: reset to this lsn rounded up to
- be divisible by OS_FILE_LOG_BLOCK_SIZE,
- after which we add LOG_BLOCK_HDR_SIZE */
-#ifdef UNIV_LOG_ARCHIVE
- ulint arch_log_no, /* in: next archived log file number */
-#endif /* UNIV_LOG_ARCHIVE */
- ibool new_logs_created)/* in: TRUE if resetting logs is done
- at the log creation; FALSE if it is done
- after archive recovery */
-{
- log_group_t* group;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- log_sys->lsn = ut_dulint_align_up(lsn, OS_FILE_LOG_BLOCK_SIZE);
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- while (group) {
- group->lsn = log_sys->lsn;
- group->lsn_offset = LOG_FILE_HDR_SIZE;
-#ifdef UNIV_LOG_ARCHIVE
- group->archived_file_no = arch_log_no;
- group->archived_offset = 0;
-#endif /* UNIV_LOG_ARCHIVE */
-
- if (!new_logs_created) {
- recv_truncate_group(group, group->lsn, group->lsn,
- group->lsn, group->lsn);
- }
-
- group = UT_LIST_GET_NEXT(log_groups, group);
- }
-
- log_sys->buf_next_to_write = 0;
- log_sys->written_to_some_lsn = log_sys->lsn;
- log_sys->written_to_all_lsn = log_sys->lsn;
-
- log_sys->next_checkpoint_no = ut_dulint_zero;
- log_sys->last_checkpoint_lsn = ut_dulint_zero;
-
-#ifdef UNIV_LOG_ARCHIVE
- log_sys->archived_lsn = log_sys->lsn;
-#endif /* UNIV_LOG_ARCHIVE */
-
- log_block_init(log_sys->buf, log_sys->lsn);
- log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
-
- log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
- log_sys->lsn = ut_dulint_add(log_sys->lsn, LOG_BLOCK_HDR_SIZE);
-
- mutex_exit(&(log_sys->mutex));
-
- /* Reset the checkpoint fields in logs */
-
- log_make_checkpoint_at(ut_dulint_max, TRUE);
- log_make_checkpoint_at(ut_dulint_max, TRUE);
-
- mutex_enter(&(log_sys->mutex));
-}
-
-#ifdef UNIV_HOTBACKUP
-/**********************************************************
-Creates new log files after a backup has been restored. */
-
-void
-recv_reset_log_files_for_backup(
-/*============================*/
- const char* log_dir, /* in: log file directory path */
- ulint n_log_files, /* in: number of log files */
- ulint log_file_size, /* in: log file size */
- dulint lsn) /* in: new start lsn, must be
- divisible by OS_FILE_LOG_BLOCK_SIZE */
-{
- os_file_t log_file;
- ibool success;
- byte* buf;
- ulint i;
- ulint log_dir_len;
- char name[5000];
- static const char ib_logfile_basename[] = "ib_logfile";
-
- log_dir_len = strlen(log_dir);
- /* full path name of ib_logfile consists of log dir path + basename
- + number. This must fit in the name buffer.
- */
- ut_a(log_dir_len + strlen(ib_logfile_basename) + 11 < sizeof(name));
-
- buf = ut_malloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
- memset(buf, '\0', LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
-
- for (i = 0; i < n_log_files; i++) {
-
- sprintf(name, "%s%s%lu", log_dir,
- ib_logfile_basename, (ulong)i);
-
- log_file = os_file_create_simple(name, OS_FILE_CREATE,
- OS_FILE_READ_WRITE, &success);
- if (!success) {
- fprintf(stderr,
- "InnoDB: Cannot create %s. Check that"
- " the file does not exist yet.\n", name);
-
- exit(1);
- }
-
- fprintf(stderr,
- "Setting log file size to %lu %lu\n",
- (ulong) ut_get_high32(log_file_size),
- (ulong) log_file_size & 0xFFFFFFFFUL);
-
- success = os_file_set_size(name, log_file,
- log_file_size & 0xFFFFFFFFUL,
- ut_get_high32(log_file_size));
-
- if (!success) {
- fprintf(stderr,
- "InnoDB: Cannot set %s size to %lu %lu\n",
- name, (ulong) ut_get_high32(log_file_size),
- (ulong) (log_file_size & 0xFFFFFFFFUL));
- exit(1);
- }
-
- os_file_flush(log_file);
- os_file_close(log_file);
- }
-
- /* We pretend there is a checkpoint at lsn + LOG_BLOCK_HDR_SIZE */
-
- log_reset_first_header_and_checkpoint(buf, lsn);
-
- log_block_init_in_old_format(buf + LOG_FILE_HDR_SIZE, lsn);
- log_block_set_first_rec_group(buf + LOG_FILE_HDR_SIZE,
- LOG_BLOCK_HDR_SIZE);
- sprintf(name, "%s%s%lu", log_dir, ib_logfile_basename, (ulong)0);
-
- log_file = os_file_create_simple(name, OS_FILE_OPEN,
- OS_FILE_READ_WRITE, &success);
- if (!success) {
- fprintf(stderr, "InnoDB: Cannot open %s.\n", name);
-
- exit(1);
- }
-
- os_file_write(name, log_file, buf, 0, 0,
- LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
- os_file_flush(log_file);
- os_file_close(log_file);
-
- ut_free(buf);
-}
-#endif /* UNIV_HOTBACKUP */
-
-#ifdef UNIV_LOG_ARCHIVE
-/**********************************************************
-Reads from the archive of a log group and performs recovery. */
-static
-ibool
-log_group_recover_from_archive_file(
-/*================================*/
- /* out: TRUE if no more complete
- consistent archive files */
- log_group_t* group) /* in: log group */
-{
- os_file_t file_handle;
- dulint start_lsn;
- dulint file_end_lsn;
- dulint dummy_lsn;
- dulint scanned_lsn;
- ulint len;
- ibool ret;
- byte* buf;
- ulint read_offset;
- ulint file_size;
- ulint file_size_high;
- int input_char;
- char name[10000];
-
- ut_a(0);
-
-try_open_again:
- buf = log_sys->buf;
-
- /* Add the file to the archive file space; open the file */
-
- log_archived_file_name_gen(name, group->id, group->archived_file_no);
-
- file_handle = os_file_create(name, OS_FILE_OPEN,
- OS_FILE_LOG, OS_FILE_AIO, &ret);
-
- if (ret == FALSE) {
-ask_again:
- fprintf(stderr,
- "InnoDB: Do you want to copy additional"
- " archived log files\n"
- "InnoDB: to the directory\n");
- fprintf(stderr,
- "InnoDB: or were these all the files needed"
- " in recovery?\n");
- fprintf(stderr,
- "InnoDB: (Y == copy more files; N == this is all)?");
-
- input_char = getchar();
-
- if (input_char == (int) 'N') {
-
- return(TRUE);
- } else if (input_char == (int) 'Y') {
-
- goto try_open_again;
- } else {
- goto ask_again;
- }
- }
-
- ret = os_file_get_size(file_handle, &file_size, &file_size_high);
- ut_a(ret);
-
- ut_a(file_size_high == 0);
-
- fprintf(stderr, "InnoDB: Opened archived log file %s\n", name);
-
- ret = os_file_close(file_handle);
-
- if (file_size < LOG_FILE_HDR_SIZE) {
- fprintf(stderr,
- "InnoDB: Archive file header incomplete %s\n", name);
-
- return(TRUE);
- }
-
- ut_a(ret);
-
- /* Add the archive file as a node to the space */
-
- fil_node_create(name, 1 + file_size / UNIV_PAGE_SIZE,
- group->archive_space_id, FALSE);
-#if RECV_SCAN_SIZE < LOG_FILE_HDR_SIZE
-# error "RECV_SCAN_SIZE < LOG_FILE_HDR_SIZE"
-#endif
-
- /* Read the archive file header */
- fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, group->archive_space_id, 0, 0,
- LOG_FILE_HDR_SIZE, buf, NULL);
-
- /* Check if the archive file header is consistent */
-
- if (mach_read_from_4(buf + LOG_GROUP_ID) != group->id
- || mach_read_from_4(buf + LOG_FILE_NO)
- != group->archived_file_no) {
- fprintf(stderr,
- "InnoDB: Archive file header inconsistent %s\n", name);
-
- return(TRUE);
- }
-
- if (!mach_read_from_4(buf + LOG_FILE_ARCH_COMPLETED)) {
- fprintf(stderr,
- "InnoDB: Archive file not completely written %s\n",
- name);
-
- return(TRUE);
- }
-
- start_lsn = mach_read_from_8(buf + LOG_FILE_START_LSN);
- file_end_lsn = mach_read_from_8(buf + LOG_FILE_END_LSN);
-
- if (ut_dulint_is_zero(recv_sys->scanned_lsn)) {
-
- if (ut_dulint_cmp(recv_sys->parse_start_lsn, start_lsn) < 0) {
- fprintf(stderr,
- "InnoDB: Archive log file %s"
- " starts from too big a lsn\n",
- name);
- return(TRUE);
- }
-
- recv_sys->scanned_lsn = start_lsn;
- }
-
- if (ut_dulint_cmp(recv_sys->scanned_lsn, start_lsn) != 0) {
-
- fprintf(stderr,
- "InnoDB: Archive log file %s starts from"
- " a wrong lsn\n",
- name);
- return(TRUE);
- }
-
- read_offset = LOG_FILE_HDR_SIZE;
-
- for (;;) {
- len = RECV_SCAN_SIZE;
-
- if (read_offset + len > file_size) {
- len = ut_calc_align_down(file_size - read_offset,
- OS_FILE_LOG_BLOCK_SIZE);
- }
-
- if (len == 0) {
-
- break;
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "InnoDB: Archive read starting at"
- " lsn %lu %lu, len %lu from file %s\n",
- (ulong) ut_dulint_get_high(start_lsn),
- (ulong) ut_dulint_get_low(start_lsn),
- (ulong) len, name);
- }
-#endif /* UNIV_DEBUG */
-
- fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE,
- group->archive_space_id, read_offset / UNIV_PAGE_SIZE,
- read_offset % UNIV_PAGE_SIZE, len, buf, NULL);
-
- ret = recv_scan_log_recs(
- TRUE, (buf_pool->n_frames - recv_n_pool_free_frames)
- * UNIV_PAGE_SIZE, TRUE, buf, len, start_lsn,
- &dummy_lsn, &scanned_lsn);
-
- if (ut_dulint_cmp(scanned_lsn, file_end_lsn) == 0) {
-
- return(FALSE);
- }
-
- if (ret) {
- fprintf(stderr,
- "InnoDB: Archive log file %s"
- " does not scan right\n",
- name);
- return(TRUE);
- }
-
- read_offset += len;
- start_lsn = ut_dulint_add(start_lsn, len);
-
- ut_ad(ut_dulint_cmp(start_lsn, scanned_lsn) == 0);
- }
-
- return(FALSE);
-}
-
-/************************************************************
-Recovers from archived log files, and also from log files, if they exist. */
-
-ulint
-recv_recovery_from_archive_start(
-/*=============================*/
- /* out: error code or DB_SUCCESS */
- dulint min_flushed_lsn,/* in: min flushed lsn field from the
- data files */
- dulint limit_lsn, /* in: recover up to this lsn if possible */
- ulint first_log_no) /* in: number of the first archived log file
- to use in the recovery; the file will be
- searched from INNOBASE_LOG_ARCH_DIR specified
- in server config file */
-{
- log_group_t* group;
- ulint group_id;
- ulint trunc_len;
- ibool ret;
- ulint err;
-
- ut_a(0);
-
- recv_sys_create();
- recv_sys_init(FALSE, buf_pool_get_curr_size());
-
- recv_recovery_on = TRUE;
- recv_recovery_from_backup_on = TRUE;
-
- recv_sys->limit_lsn = limit_lsn;
-
- group_id = 0;
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- while (group) {
- if (group->id == group_id) {
-
- break;
- }
-
- group = UT_LIST_GET_NEXT(log_groups, group);
- }
-
- if (!group) {
- fprintf(stderr,
- "InnoDB: There is no log group defined with id %lu!\n",
- (ulong) group_id);
- return(DB_ERROR);
- }
-
- group->archived_file_no = first_log_no;
-
- recv_sys->parse_start_lsn = min_flushed_lsn;
-
- recv_sys->scanned_lsn = ut_dulint_zero;
- recv_sys->scanned_checkpoint_no = 0;
- recv_sys->recovered_lsn = recv_sys->parse_start_lsn;
-
- recv_sys->archive_group = group;
-
- ret = FALSE;
-
- mutex_enter(&(log_sys->mutex));
-
- while (!ret) {
- ret = log_group_recover_from_archive_file(group);
-
- /* Close and truncate a possible processed archive file
- from the file space */
-
- trunc_len = UNIV_PAGE_SIZE
- * fil_space_get_size(group->archive_space_id);
- if (trunc_len > 0) {
- fil_space_truncate_start(group->archive_space_id,
- trunc_len);
- }
-
- group->archived_file_no++;
- }
-
- if (ut_dulint_cmp(recv_sys->recovered_lsn, limit_lsn) < 0) {
-
- if (ut_dulint_is_zero(recv_sys->scanned_lsn)) {
-
- recv_sys->scanned_lsn = recv_sys->parse_start_lsn;
- }
-
- mutex_exit(&(log_sys->mutex));
-
- err = recv_recovery_from_checkpoint_start(LOG_ARCHIVE,
- limit_lsn,
- ut_dulint_max,
- ut_dulint_max);
- if (err != DB_SUCCESS) {
-
- return(err);
- }
-
- mutex_enter(&(log_sys->mutex));
- }
-
- if (ut_dulint_cmp(limit_lsn, ut_dulint_max) != 0) {
-
- recv_apply_hashed_log_recs(FALSE);
-
- recv_reset_logs(recv_sys->recovered_lsn, 0, FALSE);
- }
-
- mutex_exit(&(log_sys->mutex));
-
- return(DB_SUCCESS);
-}
-
-/************************************************************
-Completes recovery from archive. */
-
-void
-recv_recovery_from_archive_finish(void)
-/*===================================*/
-{
- recv_recovery_from_checkpoint_finish();
-
- recv_recovery_from_backup_on = FALSE;
-}
-#endif /* UNIV_LOG_ARCHIVE */
diff --git a/storage/innobase/mach/mach0data.c b/storage/innobase/mach/mach0data.c
deleted file mode 100644
index b92293fd037..00000000000
--- a/storage/innobase/mach/mach0data.c
+++ /dev/null
@@ -1,119 +0,0 @@
-/**********************************************************************
-Utilities for converting data from the database file
-to the machine format.
-
-(c) 1995 Innobase Oy
-
-Created 11/28/1995 Heikki Tuuri
-***********************************************************************/
-
-#include "mach0data.h"
-
-#ifdef UNIV_NONINL
-#include "mach0data.ic"
-#endif
-
-/*************************************************************
-Reads a ulint in a compressed form if the log record fully contains it. */
-
-byte*
-mach_parse_compressed(
-/*==================*/
- /* out: pointer to end of the stored field, NULL if
- not complete */
- byte* ptr, /* in: pointer to buffer from where to read */
- byte* end_ptr,/* in: pointer to end of the buffer */
- ulint* val) /* out: read value (< 2^32) */
-{
- ulint flag;
-
- ut_ad(ptr && end_ptr && val);
-
- if (ptr >= end_ptr) {
-
- return(NULL);
- }
-
- flag = mach_read_from_1(ptr);
-
- if (flag < 0x80UL) {
- *val = flag;
- return(ptr + 1);
-
- } else if (flag < 0xC0UL) {
- if (end_ptr < ptr + 2) {
- return(NULL);
- }
-
- *val = mach_read_from_2(ptr) & 0x7FFFUL;
-
- return(ptr + 2);
-
- } else if (flag < 0xE0UL) {
- if (end_ptr < ptr + 3) {
- return(NULL);
- }
-
- *val = mach_read_from_3(ptr) & 0x3FFFFFUL;
-
- return(ptr + 3);
- } else if (flag < 0xF0UL) {
- if (end_ptr < ptr + 4) {
- return(NULL);
- }
-
- *val = mach_read_from_4(ptr) & 0x1FFFFFFFUL;
-
- return(ptr + 4);
- } else {
- ut_ad(flag == 0xF0UL);
-
- if (end_ptr < ptr + 5) {
- return(NULL);
- }
-
- *val = mach_read_from_4(ptr + 1);
- return(ptr + 5);
- }
-}
-
-/*************************************************************
-Reads a dulint in a compressed form if the log record fully contains it. */
-
-byte*
-mach_dulint_parse_compressed(
-/*=========================*/
- /* out: pointer to end of the stored field, NULL if
- not complete */
- byte* ptr, /* in: pointer to buffer from where to read */
- byte* end_ptr,/* in: pointer to end of the buffer */
- dulint* val) /* out: read value */
-{
- ulint high;
- ulint low;
- ulint size;
-
- ut_ad(ptr && end_ptr && val);
-
- if (end_ptr < ptr + 5) {
-
- return(NULL);
- }
-
- high = mach_read_compressed(ptr);
-
- size = mach_get_compressed_size(high);
-
- ptr += size;
-
- if (end_ptr < ptr + 4) {
-
- return(NULL);
- }
-
- low = mach_read_from_4(ptr);
-
- *val = ut_dulint_create(high, low);
-
- return(ptr + 4);
-}
diff --git a/storage/innobase/mem/mem0dbg.c b/storage/innobase/mem/mem0dbg.c
deleted file mode 100644
index 72452907c3f..00000000000
--- a/storage/innobase/mem/mem0dbg.c
+++ /dev/null
@@ -1,984 +0,0 @@
-/************************************************************************
-The memory management: the debug code. This is not a compilation module,
-but is included in mem0mem.* !
-
-(c) 1994, 1995 Innobase Oy
-
-Created 6/9/1994 Heikki Tuuri
-*************************************************************************/
-
-#ifdef UNIV_MEM_DEBUG
-mutex_t mem_hash_mutex; /* The mutex which protects in the
- debug version the hash table containing
- the list of live memory heaps, and
- also the global variables below. */
-
-/* The following variables contain information about the
-extent of memory allocations. Only used in the debug version.
-Protected by mem_hash_mutex above. */
-
-static ulint mem_n_created_heaps = 0;
-static ulint mem_n_allocations = 0;
-static ulint mem_total_allocated_memory = 0;
-ulint mem_current_allocated_memory = 0;
-static ulint mem_max_allocated_memory = 0;
-static ulint mem_last_print_info = 0;
-
-/* Size of the hash table for memory management tracking */
-#define MEM_HASH_SIZE 997
-
-/* The node of the list containing currently allocated memory heaps */
-
-typedef struct mem_hash_node_struct mem_hash_node_t;
-struct mem_hash_node_struct {
- UT_LIST_NODE_T(mem_hash_node_t)
- list; /* hash list node */
- mem_heap_t* heap; /* memory heap */
- const char* file_name;/* file where heap was created*/
- ulint line; /* file line of creation */
- ulint nth_heap;/* this is the nth heap created */
- UT_LIST_NODE_T(mem_hash_node_t)
- all_list;/* list of all created heaps */
-};
-
-typedef UT_LIST_BASE_NODE_T(mem_hash_node_t) mem_hash_cell_t;
-
-/* The hash table of allocated heaps */
-static mem_hash_cell_t mem_hash_table[MEM_HASH_SIZE];
-
-/* The base node of the list of all allocated heaps */
-static mem_hash_cell_t mem_all_list_base;
-
-static ibool mem_hash_initialized = FALSE;
-
-
-UNIV_INLINE
-mem_hash_cell_t*
-mem_hash_get_nth_cell(ulint i);
-
-/* Accessor function for the hash table. Returns a pointer to the
-table cell. */
-UNIV_INLINE
-mem_hash_cell_t*
-mem_hash_get_nth_cell(ulint i)
-{
- ut_a(i < MEM_HASH_SIZE);
-
- return(&(mem_hash_table[i]));
-}
-
-/* Accessor functions for a memory field in the debug version */
-
-void
-mem_field_header_set_len(byte* field, ulint len)
-{
- mach_write_to_4(field - 2 * sizeof(ulint), len);
-}
-
-ulint
-mem_field_header_get_len(byte* field)
-{
- return(mach_read_from_4(field - 2 * sizeof(ulint)));
-}
-
-void
-mem_field_header_set_check(byte* field, ulint check)
-{
- mach_write_to_4(field - sizeof(ulint), check);
-}
-
-ulint
-mem_field_header_get_check(byte* field)
-{
- return(mach_read_from_4(field - sizeof(ulint)));
-}
-
-void
-mem_field_trailer_set_check(byte* field, ulint check)
-{
- mach_write_to_4(field + mem_field_header_get_len(field), check);
-}
-
-ulint
-mem_field_trailer_get_check(byte* field)
-{
- return(mach_read_from_4(field
- + mem_field_header_get_len(field)));
-}
-#endif /* UNIV_MEM_DEBUG */
-
-/**********************************************************************
-Initializes the memory system. */
-
-void
-mem_init(
-/*=====*/
- ulint size) /* in: common pool size in bytes */
-{
-#ifdef UNIV_MEM_DEBUG
-
- ulint i;
-
- /* Initialize the hash table */
- ut_a(FALSE == mem_hash_initialized);
-
- mutex_create(&mem_hash_mutex, SYNC_MEM_HASH);
-
- for (i = 0; i < MEM_HASH_SIZE; i++) {
- UT_LIST_INIT(*mem_hash_get_nth_cell(i));
- }
-
- UT_LIST_INIT(mem_all_list_base);
-
- mem_hash_initialized = TRUE;
-#endif
-
- mem_comm_pool = mem_pool_create(size);
-}
-
-#ifdef UNIV_MEM_DEBUG
-/**********************************************************************
-Initializes an allocated memory field in the debug version. */
-
-void
-mem_field_init(
-/*===========*/
- byte* buf, /* in: memory field */
- ulint n) /* in: how many bytes the user requested */
-{
- ulint rnd;
- byte* usr_buf;
-
- usr_buf = buf + MEM_FIELD_HEADER_SIZE;
-
- /* In the debug version write the length field and the
- check fields to the start and the end of the allocated storage.
- The field header consists of a length field and
- a random number field, in this order. The field trailer contains
- the same random number as a check field. */
-
- mem_field_header_set_len(usr_buf, n);
-
- rnd = ut_rnd_gen_ulint();
-
- mem_field_header_set_check(usr_buf, rnd);
- mem_field_trailer_set_check(usr_buf, rnd);
-
- /* Update the memory allocation information */
-
- mutex_enter(&mem_hash_mutex);
-
- mem_total_allocated_memory += n;
- mem_current_allocated_memory += n;
- mem_n_allocations++;
-
- if (mem_current_allocated_memory > mem_max_allocated_memory) {
- mem_max_allocated_memory = mem_current_allocated_memory;
- }
-
- mutex_exit(&mem_hash_mutex);
-
- /* In the debug version set the buffer to a random
- combination of 0xBA and 0xBE */
-
- mem_init_buf(usr_buf, n);
-}
-
-/**********************************************************************
-Erases an allocated memory field in the debug version. */
-
-void
-mem_field_erase(
-/*============*/
- byte* buf, /* in: memory field */
- ulint n __attribute__((unused)))
- /* in: how many bytes the user requested */
-{
- byte* usr_buf;
-
- usr_buf = buf + MEM_FIELD_HEADER_SIZE;
-
- mutex_enter(&mem_hash_mutex);
- mem_current_allocated_memory -= n;
- mutex_exit(&mem_hash_mutex);
-
- /* Check that the field lengths agree */
- ut_ad(n == (ulint)mem_field_header_get_len(usr_buf));
-
- /* In the debug version, set the freed space to a random
- combination of 0xDE and 0xAD */
-
- mem_erase_buf(buf, MEM_SPACE_NEEDED(n));
-}
-
-/*******************************************************************
-Initializes a buffer to a random combination of hex BA and BE.
-Used to initialize allocated memory. */
-
-void
-mem_init_buf(
-/*=========*/
- byte* buf, /* in: pointer to buffer */
- ulint n) /* in: length of buffer */
-{
- byte* ptr;
-
- UNIV_MEM_ASSERT_W(buf, n);
-
- for (ptr = buf; ptr < buf + n; ptr++) {
-
- if (ut_rnd_gen_ibool()) {
- *ptr = 0xBA;
- } else {
- *ptr = 0xBE;
- }
- }
-
- UNIV_MEM_INVALID(buf, n);
-}
-
-/*******************************************************************
-Initializes a buffer to a random combination of hex DE and AD.
-Used to erase freed memory.*/
-
-void
-mem_erase_buf(
-/*==========*/
- byte* buf, /* in: pointer to buffer */
- ulint n) /* in: length of buffer */
-{
- byte* ptr;
-
- UNIV_MEM_ASSERT_W(buf, n);
-
- for (ptr = buf; ptr < buf + n; ptr++) {
- if (ut_rnd_gen_ibool()) {
- *ptr = 0xDE;
- } else {
- *ptr = 0xAD;
- }
- }
-
- UNIV_MEM_FREE(buf, n);
-}
-
-/*******************************************************************
-Inserts a created memory heap to the hash table of current allocated
-memory heaps. */
-
-void
-mem_hash_insert(
-/*============*/
- mem_heap_t* heap, /* in: the created heap */
- const char* file_name, /* in: file name of creation */
- ulint line) /* in: line where created */
-{
- mem_hash_node_t* new_node;
- ulint cell_no ;
-
- ut_ad(mem_heap_check(heap));
-
- mutex_enter(&mem_hash_mutex);
-
- cell_no = ut_hash_ulint((ulint)heap, MEM_HASH_SIZE);
-
- /* Allocate a new node to the list */
- new_node = ut_malloc(sizeof(mem_hash_node_t));
-
- new_node->heap = heap;
- new_node->file_name = file_name;
- new_node->line = line;
- new_node->nth_heap = mem_n_created_heaps;
-
- /* Insert into lists */
- UT_LIST_ADD_FIRST(list, *mem_hash_get_nth_cell(cell_no), new_node);
-
- UT_LIST_ADD_LAST(all_list, mem_all_list_base, new_node);
-
- mem_n_created_heaps++;
-
- mutex_exit(&mem_hash_mutex);
-}
-
-/*******************************************************************
-Removes a memory heap (which is going to be freed by the caller)
-from the list of live memory heaps. Returns the size of the heap
-in terms of how much memory in bytes was allocated for the user of
-the heap (not the total space occupied by the heap).
-Also validates the heap.
-NOTE: This function does not free the storage occupied by the
-heap itself, only the node in the list of heaps. */
-
-void
-mem_hash_remove(
-/*============*/
- mem_heap_t* heap, /* in: the heap to be freed */
- const char* file_name, /* in: file name of freeing */
- ulint line) /* in: line where freed */
-{
- mem_hash_node_t* node;
- ulint cell_no;
- ibool error;
- ulint size;
-
- ut_ad(mem_heap_check(heap));
-
- mutex_enter(&mem_hash_mutex);
-
- cell_no = ut_hash_ulint((ulint)heap, MEM_HASH_SIZE);
-
- /* Look for the heap in the hash table list */
- node = UT_LIST_GET_FIRST(*mem_hash_get_nth_cell(cell_no));
-
- while (node != NULL) {
- if (node->heap == heap) {
-
- break;
- }
-
- node = UT_LIST_GET_NEXT(list, node);
- }
-
- if (node == NULL) {
- fprintf(stderr,
- "Memory heap or buffer freed in %s line %lu"
- " did not exist.\n",
- file_name, (ulong) line);
- ut_error;
- }
-
- /* Remove from lists */
- UT_LIST_REMOVE(list, *mem_hash_get_nth_cell(cell_no), node);
-
- UT_LIST_REMOVE(all_list, mem_all_list_base, node);
-
- /* Validate the heap which will be freed */
- mem_heap_validate_or_print(node->heap, NULL, FALSE, &error, &size,
- NULL, NULL);
- if (error) {
- fprintf(stderr,
- "Inconsistency in memory heap or"
- " buffer n:o %lu created\n"
- "in %s line %lu and tried to free in %s line %lu.\n"
- "Hex dump of 400 bytes around memory heap"
- " first block start:\n",
- node->nth_heap, node->file_name, (ulong) node->line,
- file_name, (ulong) line);
- ut_print_buf(stderr, (byte*)node->heap - 200, 400);
- fputs("\nDump of the mem heap:\n", stderr);
- mem_heap_validate_or_print(node->heap, NULL, TRUE, &error,
- &size, NULL, NULL);
- ut_error;
- }
-
- /* Free the memory occupied by the node struct */
- ut_free(node);
-
- mem_current_allocated_memory -= size;
-
- mutex_exit(&mem_hash_mutex);
-}
-#endif /* UNIV_MEM_DEBUG */
-
-#if defined UNIV_MEM_DEBUG || defined UNIV_DEBUG
-/*******************************************************************
-Checks a memory heap for consistency and prints the contents if requested.
-Outputs the sum of sizes of buffers given to the user (only in
-the debug version), the physical size of the heap and the number of
-blocks in the heap. In case of error returns 0 as sizes and number
-of blocks. */
-
-void
-mem_heap_validate_or_print(
-/*=======================*/
- mem_heap_t* heap, /* in: memory heap */
- byte* top __attribute__((unused)),
- /* in: calculate and validate only until
- this top pointer in the heap is reached,
- if this pointer is NULL, ignored */
- ibool print, /* in: if TRUE, prints the contents
- of the heap; works only in
- the debug version */
- ibool* error, /* out: TRUE if error */
- ulint* us_size,/* out: allocated memory
- (for the user) in the heap,
- if a NULL pointer is passed as this
- argument, it is ignored; in the
- non-debug version this is always -1 */
- ulint* ph_size,/* out: physical size of the heap,
- if a NULL pointer is passed as this
- argument, it is ignored */
- ulint* n_blocks) /* out: number of blocks in the heap,
- if a NULL pointer is passed as this
- argument, it is ignored */
-{
- mem_block_t* block;
- ulint total_len = 0;
- ulint block_count = 0;
- ulint phys_len = 0;
-#ifdef UNIV_MEM_DEBUG
- ulint len;
- byte* field;
- byte* user_field;
- ulint check_field;
-#endif
-
- /* Pessimistically, we set the parameters to error values */
- if (us_size != NULL) {
- *us_size = 0;
- }
- if (ph_size != NULL) {
- *ph_size = 0;
- }
- if (n_blocks != NULL) {
- *n_blocks = 0;
- }
- *error = TRUE;
-
- block = heap;
-
- if (block->magic_n != MEM_BLOCK_MAGIC_N) {
- return;
- }
-
- if (print) {
- fputs("Memory heap:", stderr);
- }
-
- while (block != NULL) {
- phys_len += mem_block_get_len(block);
-
- if ((block->type == MEM_HEAP_BUFFER)
- && (mem_block_get_len(block) > UNIV_PAGE_SIZE)) {
-
- fprintf(stderr,
- "InnoDB: Error: mem block %p"
- " length %lu > UNIV_PAGE_SIZE\n",
- (void*) block,
- (ulong) mem_block_get_len(block));
- /* error */
-
- return;
- }
-
-#ifdef UNIV_MEM_DEBUG
- /* We can trace the fields of the block only in the debug
- version */
- if (print) {
- fprintf(stderr, " Block %ld:", block_count);
- }
-
- field = (byte*)block + mem_block_get_start(block);
-
- if (top && (field == top)) {
-
- goto completed;
- }
-
- while (field < (byte*)block + mem_block_get_free(block)) {
-
- /* Calculate the pointer to the storage
- which was given to the user */
-
- user_field = field + MEM_FIELD_HEADER_SIZE;
-
- len = mem_field_header_get_len(user_field);
-
- if (print) {
- ut_print_buf(stderr, user_field, len);
- }
-
- total_len += len;
- check_field = mem_field_header_get_check(user_field);
-
- if (check_field
- != mem_field_trailer_get_check(user_field)) {
- /* error */
-
- fprintf(stderr,
- "InnoDB: Error: block %lx mem"
- " field %lx len %lu\n"
- "InnoDB: header check field is"
- " %lx but trailer %lx\n",
- (ulint)block,
- (ulint)field, len, check_field,
- mem_field_trailer_get_check(
- user_field));
-
- return;
- }
-
- /* Move to next field */
- field = field + MEM_SPACE_NEEDED(len);
-
- if (top && (field == top)) {
-
- goto completed;
- }
-
- }
-
- /* At the end check that we have arrived to the first free
- position */
-
- if (field != (byte*)block + mem_block_get_free(block)) {
- /* error */
-
- fprintf(stderr,
- "InnoDB: Error: block %lx end of"
- " mem fields %lx\n"
- "InnoDB: but block free at %lx\n",
- (ulint)block, (ulint)field,
- (ulint)((byte*)block
- + mem_block_get_free(block)));
-
- return;
- }
-
-#endif
-
- block = UT_LIST_GET_NEXT(list, block);
- block_count++;
- }
-#ifdef UNIV_MEM_DEBUG
-completed:
-#endif
- if (us_size != NULL) {
- *us_size = total_len;
- }
- if (ph_size != NULL) {
- *ph_size = phys_len;
- }
- if (n_blocks != NULL) {
- *n_blocks = block_count;
- }
- *error = FALSE;
-}
-
-/******************************************************************
-Prints the contents of a memory heap. */
-static
-void
-mem_heap_print(
-/*===========*/
- mem_heap_t* heap) /* in: memory heap */
-{
- ibool error;
- ulint us_size;
- ulint phys_size;
- ulint n_blocks;
-
- ut_ad(mem_heap_check(heap));
-
- mem_heap_validate_or_print(heap, NULL, TRUE, &error,
- &us_size, &phys_size, &n_blocks);
- fprintf(stderr,
- "\nheap type: %lu; size: user size %lu;"
- " physical size %lu; blocks %lu.\n",
- (ulong) heap->type, (ulong) us_size,
- (ulong) phys_size, (ulong) n_blocks);
- ut_a(!error);
-}
-
-/******************************************************************
-Validates the contents of a memory heap. */
-
-ibool
-mem_heap_validate(
-/*==============*/
- /* out: TRUE if ok */
- mem_heap_t* heap) /* in: memory heap */
-{
- ibool error;
- ulint us_size;
- ulint phys_size;
- ulint n_blocks;
-
- ut_ad(mem_heap_check(heap));
-
- mem_heap_validate_or_print(heap, NULL, FALSE, &error, &us_size,
- &phys_size, &n_blocks);
- if (error) {
- mem_heap_print(heap);
- }
-
- ut_a(!error);
-
- return(TRUE);
-}
-#endif /* UNIV_MEM_DEBUG || UNIV_DEBUG */
-
-#ifdef UNIV_DEBUG
-/******************************************************************
-Checks that an object is a memory heap (or a block of it). */
-
-ibool
-mem_heap_check(
-/*===========*/
- /* out: TRUE if ok */
- mem_heap_t* heap) /* in: memory heap */
-{
- ut_a(heap->magic_n == MEM_BLOCK_MAGIC_N);
-
- return(TRUE);
-}
-#endif /* UNIV_DEBUG */
-
-#ifdef UNIV_MEM_DEBUG
-/*********************************************************************
-TRUE if no memory is currently allocated. */
-
-ibool
-mem_all_freed(void)
-/*===============*/
- /* out: TRUE if no heaps exist */
-{
- mem_hash_node_t* node;
- ulint heap_count = 0;
- ulint i;
-
- mem_validate();
-
- mutex_enter(&mem_hash_mutex);
-
- for (i = 0; i < MEM_HASH_SIZE; i++) {
-
- node = UT_LIST_GET_FIRST(*mem_hash_get_nth_cell(i));
- while (node != NULL) {
- heap_count++;
- node = UT_LIST_GET_NEXT(list, node);
- }
- }
-
- mutex_exit(&mem_hash_mutex);
-
- if (heap_count == 0) {
-
- ut_a(mem_pool_get_reserved(mem_comm_pool) == 0);
-
- return(TRUE);
- } else {
- return(FALSE);
- }
-}
-
-/*********************************************************************
-Validates the dynamic memory allocation system. */
-
-ibool
-mem_validate_no_assert(void)
-/*========================*/
- /* out: TRUE if error */
-{
- mem_hash_node_t* node;
- ulint n_heaps = 0;
- ulint allocated_mem;
- ulint ph_size;
- ulint total_allocated_mem = 0;
- ibool error = FALSE;
- ulint n_blocks;
- ulint i;
-
- mem_pool_validate(mem_comm_pool);
-
- mutex_enter(&mem_hash_mutex);
-
- for (i = 0; i < MEM_HASH_SIZE; i++) {
-
- node = UT_LIST_GET_FIRST(*mem_hash_get_nth_cell(i));
-
- while (node != NULL) {
- n_heaps++;
-
- mem_heap_validate_or_print(node->heap, NULL,
- FALSE, &error,
- &allocated_mem,
- &ph_size, &n_blocks);
-
- if (error) {
- fprintf(stderr,
- "\nERROR!!!!!!!!!!!!!!!!!!!"
- "!!!!!!!!!!!!!!!!!!!!!!!\n\n"
- "Inconsistency in memory heap"
- " or buffer created\n"
- "in %s line %lu.\n",
- node->file_name, node->line);
-
- mutex_exit(&mem_hash_mutex);
-
- return(TRUE);
- }
-
- total_allocated_mem += allocated_mem;
- node = UT_LIST_GET_NEXT(list, node);
- }
- }
-
- if ((n_heaps == 0) && (mem_current_allocated_memory != 0)) {
- error = TRUE;
- }
-
- if (mem_total_allocated_memory < mem_current_allocated_memory) {
- error = TRUE;
- }
-
- if (mem_max_allocated_memory > mem_total_allocated_memory) {
- error = TRUE;
- }
-
- if (mem_n_created_heaps < n_heaps) {
- error = TRUE;
- }
-
- mutex_exit(&mem_hash_mutex);
-
- return(error);
-}
-
-/****************************************************************
-Validates the dynamic memory */
-
-ibool
-mem_validate(void)
-/*==============*/
- /* out: TRUE if ok */
-{
- ut_a(!mem_validate_no_assert());
-
- return(TRUE);
-}
-#endif /* UNIV_MEM_DEBUG */
-
-/****************************************************************
-Tries to find neigboring memory allocation blocks and dumps to stderr
-the neighborhood of a given pointer. */
-
-void
-mem_analyze_corruption(
-/*===================*/
- void* ptr) /* in: pointer to place of possible corruption */
-{
- byte* p;
- ulint i;
- ulint dist;
-
- fputs("InnoDB: Apparent memory corruption: mem dump ", stderr);
- ut_print_buf(stderr, (byte*)ptr - 250, 500);
-
- fputs("\nInnoDB: Scanning backward trying to find"
- " previous allocated mem blocks\n", stderr);
-
- p = (byte*)ptr;
- dist = 0;
-
- for (i = 0; i < 10; i++) {
- for (;;) {
- if (((ulint)p) % 4 == 0) {
-
- if (*((ulint*)p) == MEM_BLOCK_MAGIC_N) {
- fprintf(stderr,
- "Mem block at - %lu,"
- " file %s, line %lu\n",
- (ulong) dist,
- (p + sizeof(ulint)),
- (ulong)
- (*(ulint*)(p + 8
- + sizeof(ulint))));
-
- break;
- }
-
- if (*((ulint*)p) == MEM_FREED_BLOCK_MAGIC_N) {
- fprintf(stderr,
- "Freed mem block at - %lu,"
- " file %s, line %lu\n",
- (ulong) dist,
- (p + sizeof(ulint)),
- (ulong)
- (*(ulint*)(p + 8
- + sizeof(ulint))));
-
- break;
- }
- }
-
- p--;
- dist++;
- }
-
- p--;
- dist++;
- }
-
- fprintf(stderr,
- "InnoDB: Scanning forward trying to find next"
- " allocated mem blocks\n");
-
- p = (byte*)ptr;
- dist = 0;
-
- for (i = 0; i < 10; i++) {
- for (;;) {
- if (((ulint)p) % 4 == 0) {
-
- if (*((ulint*)p) == MEM_BLOCK_MAGIC_N) {
- fprintf(stderr,
- "Mem block at + %lu, file %s,"
- " line %lu\n",
- (ulong) dist,
- (p + sizeof(ulint)),
- (ulong)
- (*(ulint*)(p + 8
- + sizeof(ulint))));
-
- break;
- }
-
- if (*((ulint*)p) == MEM_FREED_BLOCK_MAGIC_N) {
- fprintf(stderr,
- "Freed mem block at + %lu,"
- " file %s, line %lu\n",
- (ulong) dist,
- (p + sizeof(ulint)),
- (ulong)
- (*(ulint*)(p + 8
- + sizeof(ulint))));
-
- break;
- }
- }
-
- p++;
- dist++;
- }
-
- p++;
- dist++;
- }
-}
-
-/*********************************************************************
-Prints information of dynamic memory usage and currently allocated
-memory heaps or buffers. Can only be used in the debug version. */
-static
-void
-mem_print_info_low(
-/*===============*/
- ibool print_all) /* in: if TRUE, all heaps are printed,
- else only the heaps allocated after the
- previous call of this function */
-{
-#ifdef UNIV_MEM_DEBUG
- mem_hash_node_t* node;
- ulint n_heaps = 0;
- ulint allocated_mem;
- ulint ph_size;
- ulint total_allocated_mem = 0;
- ibool error;
- ulint n_blocks;
-#endif
- FILE* outfile;
-
- /* outfile = fopen("ibdebug", "a"); */
-
- outfile = stdout;
-
- fprintf(outfile, "\n");
- fprintf(outfile,
- "________________________________________________________\n");
- fprintf(outfile, "MEMORY ALLOCATION INFORMATION\n\n");
-
-#ifndef UNIV_MEM_DEBUG
-
- UT_NOT_USED(print_all);
-
- mem_pool_print_info(outfile, mem_comm_pool);
-
- fprintf(outfile,
- "Sorry, non-debug version cannot give more memory info\n");
-
- /* fclose(outfile); */
-
- return;
-#else
- mutex_enter(&mem_hash_mutex);
-
- fprintf(outfile, "LIST OF CREATED HEAPS AND ALLOCATED BUFFERS: \n\n");
-
- if (!print_all) {
- fprintf(outfile, "AFTER THE LAST PRINT INFO\n");
- }
-
- node = UT_LIST_GET_FIRST(mem_all_list_base);
-
- while (node != NULL) {
- n_heaps++;
-
- if (!print_all && node->nth_heap < mem_last_print_info) {
-
- goto next_heap;
- }
-
- mem_heap_validate_or_print(node->heap, NULL,
- FALSE, &error, &allocated_mem,
- &ph_size, &n_blocks);
- total_allocated_mem += allocated_mem;
-
- fprintf(outfile,
- "%lu: file %s line %lu of size %lu phys.size %lu"
- " with %lu blocks, type %lu\n",
- node->nth_heap, node->file_name, node->line,
- allocated_mem, ph_size, n_blocks,
- (node->heap)->type);
-next_heap:
- node = UT_LIST_GET_NEXT(all_list, node);
- }
-
- fprintf(outfile, "\n");
-
- fprintf(outfile, "Current allocated memory : %lu\n",
- mem_current_allocated_memory);
- fprintf(outfile, "Current allocated heaps and buffers : %lu\n",
- n_heaps);
- fprintf(outfile, "Cumulative allocated memory : %lu\n",
- mem_total_allocated_memory);
- fprintf(outfile, "Maximum allocated memory : %lu\n",
- mem_max_allocated_memory);
- fprintf(outfile, "Cumulative created heaps and buffers : %lu\n",
- mem_n_created_heaps);
- fprintf(outfile, "Cumulative number of allocations : %lu\n",
- mem_n_allocations);
-
- mem_last_print_info = mem_n_created_heaps;
-
- mutex_exit(&mem_hash_mutex);
-
- mem_pool_print_info(outfile, mem_comm_pool);
-
- /* mem_validate(); */
-
- /* fclose(outfile); */
-#endif
-}
-
-/*********************************************************************
-Prints information of dynamic memory usage and currently allocated memory
-heaps or buffers. Can only be used in the debug version. */
-
-void
-mem_print_info(void)
-/*================*/
-{
- mem_print_info_low(TRUE);
-}
-
-/*********************************************************************
-Prints information of dynamic memory usage and currently allocated memory
-heaps or buffers since the last ..._print_info or..._print_new_info. */
-
-void
-mem_print_new_info(void)
-/*====================*/
-{
- mem_print_info_low(FALSE);
-}
diff --git a/storage/innobase/mem/mem0mem.c b/storage/innobase/mem/mem0mem.c
deleted file mode 100644
index f4fd178a39c..00000000000
--- a/storage/innobase/mem/mem0mem.c
+++ /dev/null
@@ -1,577 +0,0 @@
-/************************************************************************
-The memory management
-
-(c) 1994, 1995 Innobase Oy
-
-Created 6/9/1994 Heikki Tuuri
-*************************************************************************/
-
-
-#include "mem0mem.h"
-#ifdef UNIV_NONINL
-#include "mem0mem.ic"
-#endif
-
-#include "mach0data.h"
-#include "buf0buf.h"
-#include "btr0sea.h"
-#include "srv0srv.h"
-#include "mem0dbg.c"
-#include <stdarg.h>
-
-/*
- THE MEMORY MANAGEMENT
- =====================
-
-The basic element of the memory management is called a memory
-heap. A memory heap is conceptually a
-stack from which memory can be allocated. The stack may grow infinitely.
-The top element of the stack may be freed, or
-the whole stack can be freed at one time. The advantage of the
-memory heap concept is that we can avoid using the malloc and free
-functions of C which are quite expensive, for example, on the Solaris + GCC
-system (50 MHz Sparc, 1993) the pair takes 3 microseconds,
-on Win NT + 100MHz Pentium, 2.5 microseconds.
-When we use a memory heap,
-we can allocate larger blocks of memory at a time and thus
-reduce overhead. Slightly more efficient the method is when we
-allocate the memory from the index page buffer pool, as we can
-claim a new page fast. This is called buffer allocation.
-When we allocate the memory from the dynamic memory of the
-C environment, that is called dynamic allocation.
-
-The default way of operation of the memory heap is the following.
-First, when the heap is created, an initial block of memory is
-allocated. In dynamic allocation this may be about 50 bytes.
-If more space is needed, additional blocks are allocated
-and they are put into a linked list.
-After the initial block, each allocated block is twice the size of the
-previous, until a threshold is attained, after which the sizes
-of the blocks stay the same. An exception is, of course, the case
-where the caller requests a memory buffer whose size is
-bigger than the threshold. In that case a block big enough must
-be allocated.
-
-The heap is physically arranged so that if the current block
-becomes full, a new block is allocated and always inserted in the
-chain of blocks as the last block.
-
-In the debug version of the memory management, all the allocated
-heaps are kept in a list (which is implemented as a hash table).
-Thus we can notice if the caller tries to free an already freed
-heap. In addition, each buffer given to the caller contains
-start field at the start and a trailer field at the end of the buffer.
-
-The start field has the following content:
-A. sizeof(ulint) bytes of field length (in the standard byte order)
-B. sizeof(ulint) bytes of check field (a random number)
-
-The trailer field contains:
-A. sizeof(ulint) bytes of check field (the same random number as at the start)
-
-Thus we can notice if something has been copied over the
-borders of the buffer, which is illegal.
-The memory in the buffers is initialized to a random byte sequence.
-After freeing, all the blocks in the heap are set to random bytes
-to help us discover errors which result from the use of
-buffers in an already freed heap. */
-
-#ifdef MEM_PERIODIC_CHECK
-
-ibool mem_block_list_inited;
-/* List of all mem blocks allocated; protected by the mem_comm_pool mutex */
-UT_LIST_BASE_NODE_T(mem_block_t) mem_block_list;
-
-#endif
-
-/*******************************************************************
-NOTE: Use the corresponding macro instead of this function.
-Allocates a single buffer of memory from the dynamic memory of
-the C compiler. Is like malloc of C. The buffer must be freed
-with mem_free. */
-
-void*
-mem_alloc_func_noninline(
-/*=====================*/
- /* out, own: free storage */
- ulint n, /* in: desired number of bytes */
- const char* file_name, /* in: file name where created */
- ulint line) /* in: line where created */
-{
- return(mem_alloc_func(n, file_name, line));
-}
-
-/**************************************************************************
-Duplicates a NUL-terminated string, allocated from a memory heap. */
-
-char*
-mem_heap_strdup(
-/*============*/
- /* out, own: a copy of the string */
- mem_heap_t* heap, /* in: memory heap where string is allocated */
- const char* str) /* in: string to be copied */
-{
- return(mem_heap_dup(heap, str, strlen(str) + 1));
-}
-
-/**************************************************************************
-Duplicate a block of data, allocated from a memory heap. */
-
-void*
-mem_heap_dup(
-/*=========*/
- /* out, own: a copy of the data */
- mem_heap_t* heap, /* in: memory heap where copy is allocated */
- const void* data, /* in: data to be copied */
- ulint len) /* in: length of data, in bytes */
-{
- return(memcpy(mem_heap_alloc(heap, len), data, len));
-}
-
-/**************************************************************************
-Concatenate two memory blocks and return the result, using a memory heap. */
-
-void*
-mem_heap_cat(
-/*=========*/
- /* out, own: the result */
- mem_heap_t* heap, /* in: memory heap where result is allocated */
- const void* b1, /* in: block 1 */
- ulint len1, /* in: length of b1, in bytes */
- const void* b2, /* in: block 2 */
- ulint len2) /* in: length of b2, in bytes */
-{
- void* res = mem_heap_alloc(heap, len1 + len2);
-
- memcpy(res, b1, len1);
- memcpy((char*)res + len1, b2, len2);
-
- return(res);
-}
-
-/**************************************************************************
-Concatenate two strings and return the result, using a memory heap. */
-
-char*
-mem_heap_strcat(
-/*============*/
- /* out, own: the result */
- mem_heap_t* heap, /* in: memory heap where string is allocated */
- const char* s1, /* in: string 1 */
- const char* s2) /* in: string 2 */
-{
- char* s;
- ulint s1_len = strlen(s1);
- ulint s2_len = strlen(s2);
-
- s = mem_heap_alloc(heap, s1_len + s2_len + 1);
-
- memcpy(s, s1, s1_len);
- memcpy(s + s1_len, s2, s2_len);
-
- s[s1_len + s2_len] = '\0';
-
- return(s);
-}
-
-
-/********************************************************************
-Helper function for mem_heap_printf. */
-static
-ulint
-mem_heap_printf_low(
-/*================*/
- /* out: length of formatted string,
- including terminating NUL */
- char* buf, /* in/out: buffer to store formatted string
- in, or NULL to just calculate length */
- const char* format, /* in: format string */
- va_list ap) /* in: arguments */
-{
- ulint len = 0;
-
- while (*format) {
-
- /* Does this format specifier have the 'l' length modifier. */
- ibool is_long = FALSE;
-
- /* Length of one parameter. */
- size_t plen;
-
- if (*format++ != '%') {
- /* Non-format character. */
-
- len++;
-
- if (buf) {
- *buf++ = *(format - 1);
- }
-
- continue;
- }
-
- if (*format == 'l') {
- is_long = TRUE;
- format++;
- }
-
- switch (*format++) {
- case 's':
- /* string */
- {
- char* s = va_arg(ap, char*);
-
- /* "%ls" is a non-sensical format specifier. */
- ut_a(!is_long);
-
- plen = strlen(s);
- len += plen;
-
- if (buf) {
- memcpy(buf, s, plen);
- buf += plen;
- }
- }
-
- break;
-
- case 'u':
- /* unsigned int */
- {
- char tmp[32];
- unsigned long val;
-
- /* We only support 'long' values for now. */
- ut_a(is_long);
-
- val = va_arg(ap, unsigned long);
-
- plen = sprintf(tmp, "%lu", val);
- len += plen;
-
- if (buf) {
- memcpy(buf, tmp, plen);
- buf += plen;
- }
- }
-
- break;
-
- case '%':
-
- /* "%l%" is a non-sensical format specifier. */
- ut_a(!is_long);
-
- len++;
-
- if (buf) {
- *buf++ = '%';
- }
-
- break;
-
- default:
- ut_error;
- }
- }
-
- /* For the NUL character. */
- len++;
-
- if (buf) {
- *buf = '\0';
- }
-
- return(len);
-}
-
-/********************************************************************
-A simple (s)printf replacement that dynamically allocates the space for the
-formatted string from the given heap. This supports a very limited set of
-the printf syntax: types 's' and 'u' and length modifier 'l' (which is
-required for the 'u' type). */
-
-char*
-mem_heap_printf(
-/*============*/
- /* out: heap-allocated formatted string */
- mem_heap_t* heap, /* in: memory heap */
- const char* format, /* in: format string */
- ...)
-{
- va_list ap;
- char* str;
- ulint len;
-
- /* Calculate length of string */
- len = 0;
- va_start(ap, format);
- len = mem_heap_printf_low(NULL, format, ap);
- va_end(ap);
-
- /* Now create it for real. */
- str = mem_heap_alloc(heap, len);
- va_start(ap, format);
- mem_heap_printf_low(str, format, ap);
- va_end(ap);
-
- return(str);
-}
-
-/*******************************************************************
-Creates a memory heap block where data can be allocated. */
-
-mem_block_t*
-mem_heap_create_block(
-/*==================*/
- /* out, own: memory heap block, NULL if
- did not succeed (only possible for
- MEM_HEAP_BTR_SEARCH type heaps) */
- mem_heap_t* heap, /* in: memory heap or NULL if first block
- should be created */
- ulint n, /* in: number of bytes needed for user data, or
- if init_block is not NULL, its size in bytes */
- void* init_block, /* in: init block in fast create,
- type must be MEM_HEAP_DYNAMIC */
- ulint type, /* in: type of heap: MEM_HEAP_DYNAMIC or
- MEM_HEAP_BUFFER */
- const char* file_name,/* in: file name where created */
- ulint line) /* in: line where created */
-{
- mem_block_t* block;
- ulint len;
-
- ut_ad((type == MEM_HEAP_DYNAMIC) || (type == MEM_HEAP_BUFFER)
- || (type == MEM_HEAP_BUFFER + MEM_HEAP_BTR_SEARCH));
-
- if (heap && heap->magic_n != MEM_BLOCK_MAGIC_N) {
- mem_analyze_corruption(heap);
- }
-
- /* In dynamic allocation, calculate the size: block header + data. */
-
- if (init_block != NULL) {
- ut_ad(type == MEM_HEAP_DYNAMIC);
- ut_ad(n > MEM_BLOCK_START_SIZE + MEM_BLOCK_HEADER_SIZE);
- len = n;
- block = init_block;
-
- } else if (type == MEM_HEAP_DYNAMIC) {
-
- len = MEM_BLOCK_HEADER_SIZE + MEM_SPACE_NEEDED(n);
- block = mem_area_alloc(len, mem_comm_pool);
- } else {
- ut_ad(n <= MEM_MAX_ALLOC_IN_BUF);
-
- len = MEM_BLOCK_HEADER_SIZE + MEM_SPACE_NEEDED(n);
-
- if (len < UNIV_PAGE_SIZE / 2) {
-
- block = mem_area_alloc(len, mem_comm_pool);
- } else {
- len = UNIV_PAGE_SIZE;
-
- if ((type & MEM_HEAP_BTR_SEARCH) && heap) {
- /* We cannot allocate the block from the
- buffer pool, but must get the free block from
- the heap header free block field */
-
- block = (mem_block_t*)heap->free_block;
- heap->free_block = NULL;
- } else {
- block = (mem_block_t*)buf_frame_alloc();
- }
- }
- }
-
- if (block == NULL) {
- /* Only MEM_HEAP_BTR_SEARCH allocation should ever fail. */
- ut_a(type & MEM_HEAP_BTR_SEARCH);
-
- return(NULL);
- }
-
- block->magic_n = MEM_BLOCK_MAGIC_N;
- ut_strlcpy_rev(block->file_name, file_name, sizeof(block->file_name));
- block->line = line;
-
-#ifdef MEM_PERIODIC_CHECK
- mem_pool_mutex_enter();
-
- if (!mem_block_list_inited) {
- mem_block_list_inited = TRUE;
- UT_LIST_INIT(mem_block_list);
- }
-
- UT_LIST_ADD_LAST(mem_block_list, mem_block_list, block);
-
- mem_pool_mutex_exit();
-#endif
- mem_block_set_len(block, len);
- mem_block_set_type(block, type);
- mem_block_set_free(block, MEM_BLOCK_HEADER_SIZE);
- mem_block_set_start(block, MEM_BLOCK_HEADER_SIZE);
-
- block->free_block = NULL;
- block->init_block = (init_block != NULL);
-
- ut_ad((ulint)MEM_BLOCK_HEADER_SIZE < len);
-
- return(block);
-}
-
-/*******************************************************************
-Adds a new block to a memory heap. */
-
-mem_block_t*
-mem_heap_add_block(
-/*===============*/
- /* out: created block, NULL if did not
- succeed (only possible for
- MEM_HEAP_BTR_SEARCH type heaps)*/
- mem_heap_t* heap, /* in: memory heap */
- ulint n) /* in: number of bytes user needs */
-{
- mem_block_t* block;
- mem_block_t* new_block;
- ulint new_size;
-
- ut_ad(mem_heap_check(heap));
-
- block = UT_LIST_GET_LAST(heap->base);
-
- /* We have to allocate a new block. The size is always at least
- doubled until the standard size is reached. After that the size
- stays the same, except in cases where the caller needs more space. */
-
- new_size = 2 * mem_block_get_len(block);
-
- if (heap->type != MEM_HEAP_DYNAMIC) {
- /* From the buffer pool we allocate buffer frames */
- ut_a(n <= MEM_MAX_ALLOC_IN_BUF);
-
- if (new_size > MEM_MAX_ALLOC_IN_BUF) {
- new_size = MEM_MAX_ALLOC_IN_BUF;
- }
- } else if (new_size > MEM_BLOCK_STANDARD_SIZE) {
-
- new_size = MEM_BLOCK_STANDARD_SIZE;
- }
-
- if (new_size < n) {
- new_size = n;
- }
-
- new_block = mem_heap_create_block(heap, new_size, NULL, heap->type,
- heap->file_name, heap->line);
- if (new_block == NULL) {
-
- return(NULL);
- }
-
- /* Add the new block as the last block */
-
- UT_LIST_INSERT_AFTER(list, heap->base, block, new_block);
-
- return(new_block);
-}
-
-/**********************************************************************
-Frees a block from a memory heap. */
-
-void
-mem_heap_block_free(
-/*================*/
- mem_heap_t* heap, /* in: heap */
- mem_block_t* block) /* in: block to free */
-{
- ulint type;
- ulint len;
- ibool init_block;
-
- if (block->magic_n != MEM_BLOCK_MAGIC_N) {
- mem_analyze_corruption(block);
- }
-
- UT_LIST_REMOVE(list, heap->base, block);
-
-#ifdef MEM_PERIODIC_CHECK
- mem_pool_mutex_enter();
-
- UT_LIST_REMOVE(mem_block_list, mem_block_list, block);
-
- mem_pool_mutex_exit();
-#endif
- type = heap->type;
- len = block->len;
- init_block = block->init_block;
- block->magic_n = MEM_FREED_BLOCK_MAGIC_N;
-
-#ifdef UNIV_MEM_DEBUG
- /* In the debug version we set the memory to a random combination
- of hex 0xDE and 0xAD. */
-
- mem_erase_buf((byte*)block, len);
-#else /* UNIV_MEM_DEBUG */
- UNIV_MEM_ASSERT_AND_FREE(block, len);
-#endif /* UNIV_MEM_DEBUG */
-
- if (init_block) {
- /* Do not have to free: do nothing */
-
- } else if (type == MEM_HEAP_DYNAMIC) {
-
- mem_area_free(block, mem_comm_pool);
- } else {
- ut_ad(type & MEM_HEAP_BUFFER);
-
- if (len >= UNIV_PAGE_SIZE / 2) {
- buf_frame_free((byte*)block);
- } else {
- mem_area_free(block, mem_comm_pool);
- }
- }
-}
-
-/**********************************************************************
-Frees the free_block field from a memory heap. */
-
-void
-mem_heap_free_block_free(
-/*=====================*/
- mem_heap_t* heap) /* in: heap */
-{
- if (heap->free_block) {
-
- buf_frame_free(heap->free_block);
-
- heap->free_block = NULL;
- }
-}
-
-#ifdef MEM_PERIODIC_CHECK
-/**********************************************************************
-Goes through the list of all allocated mem blocks, checks their magic
-numbers, and reports possible corruption. */
-
-void
-mem_validate_all_blocks(void)
-/*=========================*/
-{
- mem_block_t* block;
-
- mem_pool_mutex_enter();
-
- block = UT_LIST_GET_FIRST(mem_block_list);
-
- while (block) {
- if (block->magic_n != MEM_BLOCK_MAGIC_N) {
- mem_analyze_corruption(block);
- }
-
- block = UT_LIST_GET_NEXT(mem_block_list, block);
- }
-
- mem_pool_mutex_exit();
-}
-#endif
diff --git a/storage/innobase/mem/mem0pool.c b/storage/innobase/mem/mem0pool.c
deleted file mode 100644
index 315f719ca09..00000000000
--- a/storage/innobase/mem/mem0pool.c
+++ /dev/null
@@ -1,682 +0,0 @@
-/************************************************************************
-The lowest-level memory management
-
-(c) 1997 Innobase Oy
-
-Created 5/12/1997 Heikki Tuuri
-*************************************************************************/
-
-#include "mem0pool.h"
-#ifdef UNIV_NONINL
-#include "mem0pool.ic"
-#endif
-
-#include "sync0sync.h"
-#include "ut0mem.h"
-#include "ut0lst.h"
-#include "ut0byte.h"
-#include "mem0mem.h"
-
-/* We would like to use also the buffer frames to allocate memory. This
-would be desirable, because then the memory consumption of the database
-would be fixed, and we might even lock the buffer pool to the main memory.
-The problem here is that the buffer management routines can themselves call
-memory allocation, while the buffer pool mutex is reserved.
-
-The main components of the memory consumption are:
-
-1. buffer pool,
-2. parsed and optimized SQL statements,
-3. data dictionary cache,
-4. log buffer,
-5. locks for each transaction,
-6. hash table for the adaptive index,
-7. state and buffers for each SQL query currently being executed,
-8. session for each user, and
-9. stack for each OS thread.
-
-Items 1 and 2 are managed by an LRU algorithm. Items 5 and 6 can potentially
-consume very much memory. Items 7 and 8 should consume quite little memory,
-and the OS should take care of item 9, which too should consume little memory.
-
-A solution to the memory management:
-
-1. the buffer pool size is set separately;
-2. log buffer size is set separately;
-3. the common pool size for all the other entries, except 8, is set separately.
-
-Problems: we may waste memory if the common pool is set too big. Another
-problem is the locks, which may take very much space in big transactions.
-Then the shared pool size should be set very big. We can allow locks to take
-space from the buffer pool, but the SQL optimizer is then unaware of the
-usable size of the buffer pool. We could also combine the objects in the
-common pool and the buffers in the buffer pool into a single LRU list and
-manage it uniformly, but this approach does not take into account the parsing
-and other costs unique to SQL statements.
-
-The locks for a transaction can be seen as a part of the state of the
-transaction. Hence, they should be stored in the common pool. We still
-have the problem of a very big update transaction, for example, which
-will set very many x-locks on rows, and the locks will consume a lot
-of memory, say, half of the buffer pool size.
-
-Another problem is what to do if we are not able to malloc a requested
-block of memory from the common pool. Then we can request memory from
-the operating system. If it does not help, a system error results.
-
-Because 5 and 6 may potentially consume very much memory, we let them grow
-into the buffer pool. We may let the locks of a transaction take frames
-from the buffer pool, when the corresponding memory heap block has grown to
-the size of a buffer frame. Similarly for the hash node cells of the locks,
-and for the adaptive index. Thus, for each individual transaction, its locks
-can occupy at most about the size of the buffer frame of memory in the common
-pool, and after that its locks will grow into the buffer pool. */
-
-/* Mask used to extract the free bit from area->size */
-#define MEM_AREA_FREE 1
-
-/* The smallest memory area total size */
-#define MEM_AREA_MIN_SIZE (2 * MEM_AREA_EXTRA_SIZE)
-
-
-/* Data structure for a memory pool. The space is allocated using the buddy
-algorithm, where free list i contains areas of size 2 to power i. */
-struct mem_pool_struct{
- byte* buf; /* memory pool */
- ulint size; /* memory common pool size */
- ulint reserved; /* amount of currently allocated
- memory */
- mutex_t mutex; /* mutex protecting this struct */
- UT_LIST_BASE_NODE_T(mem_area_t)
- free_list[64]; /* lists of free memory areas: an
- area is put to the list whose number
- is the 2-logarithm of the area size */
-};
-
-/* The common memory pool */
-mem_pool_t* mem_comm_pool = NULL;
-
-/* We use this counter to check that the mem pool mutex does not leak;
-this is to track a strange assertion failure reported at
-mysql@lists.mysql.com */
-
-ulint mem_n_threads_inside = 0;
-
-/************************************************************************
-Reserves the mem pool mutex. */
-
-void
-mem_pool_mutex_enter(void)
-/*======================*/
-{
- mutex_enter(&(mem_comm_pool->mutex));
-}
-
-/************************************************************************
-Releases the mem pool mutex. */
-
-void
-mem_pool_mutex_exit(void)
-/*=====================*/
-{
- mutex_exit(&(mem_comm_pool->mutex));
-}
-
-/************************************************************************
-Returns memory area size. */
-UNIV_INLINE
-ulint
-mem_area_get_size(
-/*==============*/
- /* out: size */
- mem_area_t* area) /* in: area */
-{
- return(area->size_and_free & ~MEM_AREA_FREE);
-}
-
-/************************************************************************
-Sets memory area size. */
-UNIV_INLINE
-void
-mem_area_set_size(
-/*==============*/
- mem_area_t* area, /* in: area */
- ulint size) /* in: size */
-{
- area->size_and_free = (area->size_and_free & MEM_AREA_FREE)
- | size;
-}
-
-/************************************************************************
-Returns memory area free bit. */
-UNIV_INLINE
-ibool
-mem_area_get_free(
-/*==============*/
- /* out: TRUE if free */
- mem_area_t* area) /* in: area */
-{
-#if TRUE != MEM_AREA_FREE
-# error "TRUE != MEM_AREA_FREE"
-#endif
- return(area->size_and_free & MEM_AREA_FREE);
-}
-
-/************************************************************************
-Sets memory area free bit. */
-UNIV_INLINE
-void
-mem_area_set_free(
-/*==============*/
- mem_area_t* area, /* in: area */
- ibool free) /* in: free bit value */
-{
-#if TRUE != MEM_AREA_FREE
-# error "TRUE != MEM_AREA_FREE"
-#endif
- area->size_and_free = (area->size_and_free & ~MEM_AREA_FREE)
- | free;
-}
-
-/************************************************************************
-Creates a memory pool. */
-
-mem_pool_t*
-mem_pool_create(
-/*============*/
- /* out: memory pool */
- ulint size) /* in: pool size in bytes */
-{
- mem_pool_t* pool;
- mem_area_t* area;
- ulint i;
- ulint used;
-
- ut_a(size > 10000);
-
- pool = ut_malloc(sizeof(mem_pool_t));
-
- /* We do not set the memory to zero (FALSE) in the pool,
- but only when allocated at a higher level in mem0mem.c.
- This is to avoid masking useful Purify warnings. */
-
- pool->buf = ut_malloc_low(size, FALSE, TRUE);
- pool->size = size;
-
- mutex_create(&pool->mutex, SYNC_MEM_POOL);
-
- /* Initialize the free lists */
-
- for (i = 0; i < 64; i++) {
-
- UT_LIST_INIT(pool->free_list[i]);
- }
-
- used = 0;
-
- while (size - used >= MEM_AREA_MIN_SIZE) {
-
- i = ut_2_log(size - used);
-
- if (ut_2_exp(i) > size - used) {
-
- /* ut_2_log rounds upward */
-
- i--;
- }
-
- area = (mem_area_t*)(pool->buf + used);
-
- mem_area_set_size(area, ut_2_exp(i));
- mem_area_set_free(area, TRUE);
- UNIV_MEM_FREE(MEM_AREA_EXTRA_SIZE + (byte*) area,
- ut_2_exp(i) - MEM_AREA_EXTRA_SIZE);
-
- UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area);
-
- used = used + ut_2_exp(i);
- }
-
- ut_ad(size >= used);
-
- pool->reserved = 0;
-
- return(pool);
-}
-
-/************************************************************************
-Fills the specified free list. */
-static
-ibool
-mem_pool_fill_free_list(
-/*====================*/
- /* out: TRUE if we were able to insert a
- block to the free list */
- ulint i, /* in: free list index */
- mem_pool_t* pool) /* in: memory pool */
-{
- mem_area_t* area;
- mem_area_t* area2;
- ibool ret;
-
- ut_ad(mutex_own(&(pool->mutex)));
-
- if (i >= 63) {
- /* We come here when we have run out of space in the
- memory pool: */
-
- return(FALSE);
- }
-
- area = UT_LIST_GET_FIRST(pool->free_list[i + 1]);
-
- if (area == NULL) {
- if (UT_LIST_GET_LEN(pool->free_list[i + 1]) > 0) {
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Error: mem pool free list %lu"
- " length is %lu\n"
- "InnoDB: though the list is empty!\n",
- (ulong) i + 1,
- (ulong)
- UT_LIST_GET_LEN(pool->free_list[i + 1]));
- }
-
- ret = mem_pool_fill_free_list(i + 1, pool);
-
- if (ret == FALSE) {
-
- return(FALSE);
- }
-
- area = UT_LIST_GET_FIRST(pool->free_list[i + 1]);
- }
-
- if (UT_LIST_GET_LEN(pool->free_list[i + 1]) == 0) {
- mem_analyze_corruption(area);
-
- ut_error;
- }
-
- UT_LIST_REMOVE(free_list, pool->free_list[i + 1], area);
-
- area2 = (mem_area_t*)(((byte*)area) + ut_2_exp(i));
- UNIV_MEM_ALLOC(area2, MEM_AREA_EXTRA_SIZE);
-
- mem_area_set_size(area2, ut_2_exp(i));
- mem_area_set_free(area2, TRUE);
-
- UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area2);
-
- mem_area_set_size(area, ut_2_exp(i));
-
- UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area);
-
- return(TRUE);
-}
-
-/************************************************************************
-Allocates memory from a pool. NOTE: This low-level function should only be
-used in mem0mem.*! */
-
-void*
-mem_area_alloc(
-/*===========*/
- /* out, own: allocated memory buffer */
- ulint size, /* in: allocated size in bytes; for optimum
- space usage, the size should be a power of 2
- minus MEM_AREA_EXTRA_SIZE */
- mem_pool_t* pool) /* in: memory pool */
-{
-#ifdef UNIV_DISABLE_MEM_POOL
- (void)pool; /* Remove compiler warning */
- return malloc(size);
-#else /* UNIV_DISABLE_MEM_POOL */
- mem_area_t* area;
- ulint n;
- ibool ret;
-
- n = ut_2_log(ut_max(size + MEM_AREA_EXTRA_SIZE, MEM_AREA_MIN_SIZE));
-
- mutex_enter(&(pool->mutex));
- mem_n_threads_inside++;
-
- ut_a(mem_n_threads_inside == 1);
-
- area = UT_LIST_GET_FIRST(pool->free_list[n]);
-
- if (area == NULL) {
- ret = mem_pool_fill_free_list(n, pool);
-
- if (ret == FALSE) {
- /* Out of memory in memory pool: we try to allocate
- from the operating system with the regular malloc: */
-
- mem_n_threads_inside--;
- mutex_exit(&(pool->mutex));
-
- return(ut_malloc(size));
- }
-
- area = UT_LIST_GET_FIRST(pool->free_list[n]);
- }
-
- if (!mem_area_get_free(area)) {
- fprintf(stderr,
- "InnoDB: Error: Removing element from mem pool"
- " free list %lu though the\n"
- "InnoDB: element is not marked free!\n",
- (ulong) n);
-
- mem_analyze_corruption(area);
-
- /* Try to analyze a strange assertion failure reported at
- mysql@lists.mysql.com where the free bit IS 1 in the
- hex dump above */
-
- if (mem_area_get_free(area)) {
- fprintf(stderr,
- "InnoDB: Probably a race condition"
- " because now the area is marked free!\n");
- }
-
- ut_error;
- }
-
- if (UT_LIST_GET_LEN(pool->free_list[n]) == 0) {
- fprintf(stderr,
- "InnoDB: Error: Removing element from mem pool"
- " free list %lu\n"
- "InnoDB: though the list length is 0!\n",
- (ulong) n);
- mem_analyze_corruption(area);
-
- ut_error;
- }
-
- ut_ad(mem_area_get_size(area) == ut_2_exp(n));
-
- mem_area_set_free(area, FALSE);
-
- UT_LIST_REMOVE(free_list, pool->free_list[n], area);
-
- pool->reserved += mem_area_get_size(area);
-
- mem_n_threads_inside--;
- mutex_exit(&(pool->mutex));
-
- ut_ad(mem_pool_validate(pool));
- UNIV_MEM_ALLOC(MEM_AREA_EXTRA_SIZE + (byte*)area,
- ut_2_exp(n) - MEM_AREA_EXTRA_SIZE);
-
- return((void*)(MEM_AREA_EXTRA_SIZE + ((byte*)area)));
-#endif /* UNIV_DISABLE_MEM_POOL */
-}
-
-/************************************************************************
-Gets the buddy of an area, if it exists in pool. */
-UNIV_INLINE
-mem_area_t*
-mem_area_get_buddy(
-/*===============*/
- /* out: the buddy, NULL if no buddy in pool */
- mem_area_t* area, /* in: memory area */
- ulint size, /* in: memory area size */
- mem_pool_t* pool) /* in: memory pool */
-{
- mem_area_t* buddy;
-
- ut_ad(size != 0);
-
- if (((((byte*)area) - pool->buf) % (2 * size)) == 0) {
-
- /* The buddy is in a higher address */
-
- buddy = (mem_area_t*)(((byte*)area) + size);
-
- if ((((byte*)buddy) - pool->buf) + size > pool->size) {
-
- /* The buddy is not wholly contained in the pool:
- there is no buddy */
-
- buddy = NULL;
- }
- } else {
- /* The buddy is in a lower address; NOTE that area cannot
- be at the pool lower end, because then we would end up to
- the upper branch in this if-clause: the remainder would be
- 0 */
-
- buddy = (mem_area_t*)(((byte*)area) - size);
- }
-
- return(buddy);
-}
-
-/************************************************************************
-Frees memory to a pool. */
-
-void
-mem_area_free(
-/*==========*/
- void* ptr, /* in, own: pointer to allocated memory
- buffer */
- mem_pool_t* pool) /* in: memory pool */
-{
-#ifdef UNIV_DISABLE_MEM_POOL
- (void)pool; /* Remove compiler warning */
- free(ptr);
-#else /* UNIV_DISABLE_MEM_POOL */
- mem_area_t* area;
- mem_area_t* buddy;
- void* new_ptr;
- ulint size;
- ulint n;
-
- /* It may be that the area was really allocated from the OS with
- regular malloc: check if ptr points within our memory pool */
-
- if ((byte*)ptr < pool->buf || (byte*)ptr >= pool->buf + pool->size) {
- ut_free(ptr);
-
- return;
- }
-
- area = (mem_area_t*) (((byte*)ptr) - MEM_AREA_EXTRA_SIZE);
-
- if (mem_area_get_free(area)) {
- fprintf(stderr,
- "InnoDB: Error: Freeing element to mem pool"
- " free list though the\n"
- "InnoDB: element is marked free!\n");
-
- mem_analyze_corruption(area);
- ut_error;
- }
-
- size = mem_area_get_size(area);
- UNIV_MEM_FREE(ptr, size - MEM_AREA_EXTRA_SIZE);
-
- if (size == 0) {
- fprintf(stderr,
- "InnoDB: Error: Mem area size is 0. Possibly a"
- " memory overrun of the\n"
- "InnoDB: previous allocated area!\n");
-
- mem_analyze_corruption(area);
- ut_error;
- }
-
-#ifdef UNIV_LIGHT_MEM_DEBUG
- if (((byte*)area) + size < pool->buf + pool->size) {
-
- ulint next_size;
-
- next_size = mem_area_get_size(
- (mem_area_t*)(((byte*)area) + size));
- if (ut_2_power_up(next_size) != next_size) {
- fprintf(stderr,
- "InnoDB: Error: Memory area size %lu,"
- " next area size %lu not a power of 2!\n"
- "InnoDB: Possibly a memory overrun of"
- " the buffer being freed here.\n",
- (ulong) size, (ulong) next_size);
- mem_analyze_corruption(area);
-
- ut_error;
- }
- }
-#endif
- buddy = mem_area_get_buddy(area, size, pool);
-
- n = ut_2_log(size);
-
- mutex_enter(&(pool->mutex));
- mem_n_threads_inside++;
-
- ut_a(mem_n_threads_inside == 1);
-
- if (buddy && mem_area_get_free(buddy)
- && (size == mem_area_get_size(buddy))) {
-
- /* The buddy is in a free list */
-
- if ((byte*)buddy < (byte*)area) {
- new_ptr = ((byte*)buddy) + MEM_AREA_EXTRA_SIZE;
-
- mem_area_set_size(buddy, 2 * size);
- mem_area_set_free(buddy, FALSE);
- } else {
- new_ptr = ptr;
-
- mem_area_set_size(area, 2 * size);
- }
-
- /* Remove the buddy from its free list and merge it to area */
-
- UT_LIST_REMOVE(free_list, pool->free_list[n], buddy);
-
- pool->reserved += ut_2_exp(n);
-
- mem_n_threads_inside--;
- mutex_exit(&(pool->mutex));
-
- mem_area_free(new_ptr, pool);
-
- return;
- } else {
- UT_LIST_ADD_FIRST(free_list, pool->free_list[n], area);
-
- mem_area_set_free(area, TRUE);
-
- ut_ad(pool->reserved >= size);
-
- pool->reserved -= size;
- }
-
- mem_n_threads_inside--;
- mutex_exit(&(pool->mutex));
-
- ut_ad(mem_pool_validate(pool));
-#endif /* UNIV_DISABLE_MEM_POOL */
-}
-
-/************************************************************************
-Validates a memory pool. */
-
-ibool
-mem_pool_validate(
-/*==============*/
- /* out: TRUE if ok */
- mem_pool_t* pool) /* in: memory pool */
-{
- mem_area_t* area;
- mem_area_t* buddy;
- ulint free;
- ulint i;
-
- mutex_enter(&(pool->mutex));
-
- free = 0;
-
- for (i = 0; i < 64; i++) {
-
- UT_LIST_VALIDATE(free_list, mem_area_t, pool->free_list[i]);
-
- area = UT_LIST_GET_FIRST(pool->free_list[i]);
-
- while (area != NULL) {
- ut_a(mem_area_get_free(area));
- ut_a(mem_area_get_size(area) == ut_2_exp(i));
-
- buddy = mem_area_get_buddy(area, ut_2_exp(i), pool);
-
- ut_a(!buddy || !mem_area_get_free(buddy)
- || (ut_2_exp(i) != mem_area_get_size(buddy)));
-
- area = UT_LIST_GET_NEXT(free_list, area);
-
- free += ut_2_exp(i);
- }
- }
-
- ut_a(free + pool->reserved == pool->size);
-
- mutex_exit(&(pool->mutex));
-
- return(TRUE);
-}
-
-/************************************************************************
-Prints info of a memory pool. */
-
-void
-mem_pool_print_info(
-/*================*/
- FILE* outfile,/* in: output file to write to */
- mem_pool_t* pool) /* in: memory pool */
-{
- ulint i;
-
- mem_pool_validate(pool);
-
- fprintf(outfile, "INFO OF A MEMORY POOL\n");
-
- mutex_enter(&(pool->mutex));
-
- for (i = 0; i < 64; i++) {
- if (UT_LIST_GET_LEN(pool->free_list[i]) > 0) {
-
- fprintf(outfile,
- "Free list length %lu for"
- " blocks of size %lu\n",
- (ulong) UT_LIST_GET_LEN(pool->free_list[i]),
- (ulong) ut_2_exp(i));
- }
- }
-
- fprintf(outfile, "Pool size %lu, reserved %lu.\n", (ulong) pool->size,
- (ulong) pool->reserved);
- mutex_exit(&(pool->mutex));
-}
-
-/************************************************************************
-Returns the amount of reserved memory. */
-
-ulint
-mem_pool_get_reserved(
-/*==================*/
- /* out: reserved memory in bytes */
- mem_pool_t* pool) /* in: memory pool */
-{
- ulint reserved;
-
- mutex_enter(&(pool->mutex));
-
- reserved = pool->reserved;
-
- mutex_exit(&(pool->mutex));
-
- return(reserved);
-}
diff --git a/storage/innobase/mtr/mtr0log.c b/storage/innobase/mtr/mtr0log.c
deleted file mode 100644
index e5d572bbfa7..00000000000
--- a/storage/innobase/mtr/mtr0log.c
+++ /dev/null
@@ -1,575 +0,0 @@
-/******************************************************
-Mini-transaction log routines
-
-(c) 1995 Innobase Oy
-
-Created 12/7/1995 Heikki Tuuri
-*******************************************************/
-
-#include "mtr0log.h"
-
-#ifdef UNIV_NONINL
-#include "mtr0log.ic"
-#endif
-
-#include "buf0buf.h"
-#include "dict0boot.h"
-#include "log0recv.h"
-#include "page0page.h"
-
-/************************************************************
-Catenates n bytes to the mtr log. */
-
-void
-mlog_catenate_string(
-/*=================*/
- mtr_t* mtr, /* in: mtr */
- const byte* str, /* in: string to write */
- ulint len) /* in: string length */
-{
- dyn_array_t* mlog;
-
- if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) {
-
- return;
- }
-
- mlog = &(mtr->log);
-
- dyn_push_string(mlog, str, len);
-}
-
-/************************************************************
-Writes the initial part of a log record consisting of one-byte item
-type and four-byte space and page numbers. Also pushes info
-to the mtr memo that a buffer page has been modified. */
-
-void
-mlog_write_initial_log_record(
-/*==========================*/
- byte* ptr, /* in: pointer to (inside) a buffer frame holding the
- file page where modification is made */
- byte type, /* in: log item type: MLOG_1BYTE, ... */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- byte* log_ptr;
-
- ut_ad(type <= MLOG_BIGGEST_TYPE);
- ut_ad(type > MLOG_8BYTES);
-
- if (ptr < buf_pool->frame_zero || ptr >= buf_pool->high_end) {
- fprintf(stderr,
- "InnoDB: Error: trying to write to"
- " a stray memory location %p\n", (void*) ptr);
- ut_error;
- }
-
- log_ptr = mlog_open(mtr, 11);
-
- /* If no logging is requested, we may return now */
- if (log_ptr == NULL) {
-
- return;
- }
-
- log_ptr = mlog_write_initial_log_record_fast(ptr, type, log_ptr, mtr);
-
- mlog_close(mtr, log_ptr);
-}
-
-/************************************************************
-Parses an initial log record written by mlog_write_initial_log_record. */
-
-byte*
-mlog_parse_initial_log_record(
-/*==========================*/
- /* out: parsed record end, NULL if not a complete
- record */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- byte* type, /* out: log record type: MLOG_1BYTE, ... */
- ulint* space, /* out: space id */
- ulint* page_no)/* out: page number */
-{
- if (end_ptr < ptr + 1) {
-
- return(NULL);
- }
-
- *type = (byte)((ulint)*ptr & ~MLOG_SINGLE_REC_FLAG);
- ut_ad(*type <= MLOG_BIGGEST_TYPE);
-
- ptr++;
-
- if (end_ptr < ptr + 2) {
-
- return(NULL);
- }
-
- ptr = mach_parse_compressed(ptr, end_ptr, space);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- ptr = mach_parse_compressed(ptr, end_ptr, page_no);
-
- return(ptr);
-}
-
-/************************************************************
-Parses a log record written by mlog_write_ulint or mlog_write_dulint. */
-
-byte*
-mlog_parse_nbytes(
-/*==============*/
- /* out: parsed record end, NULL if not a complete
- record or a corrupt record */
- ulint type, /* in: log record type: MLOG_1BYTE, ... */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- byte* page) /* in: page where to apply the log record, or NULL */
-{
- ulint offset;
- ulint val;
- dulint dval;
-
- ut_a(type <= MLOG_8BYTES);
-
- if (end_ptr < ptr + 2) {
-
- return(NULL);
- }
-
- offset = mach_read_from_2(ptr);
- ptr += 2;
-
- if (offset >= UNIV_PAGE_SIZE) {
- recv_sys->found_corrupt_log = TRUE;
-
- return(NULL);
- }
-
- if (type == MLOG_8BYTES) {
- ptr = mach_dulint_parse_compressed(ptr, end_ptr, &dval);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- if (page) {
- mach_write_to_8(page + offset, dval);
- }
-
- return(ptr);
- }
-
- ptr = mach_parse_compressed(ptr, end_ptr, &val);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- if (type == MLOG_1BYTE) {
- if (val > 0xFFUL) {
- recv_sys->found_corrupt_log = TRUE;
-
- return(NULL);
- }
- } else if (type == MLOG_2BYTES) {
- if (val > 0xFFFFUL) {
- recv_sys->found_corrupt_log = TRUE;
-
- return(NULL);
- }
- } else {
- if (type != MLOG_4BYTES) {
- recv_sys->found_corrupt_log = TRUE;
-
- return(NULL);
- }
- }
-
- if (page) {
- if (type == MLOG_1BYTE) {
- mach_write_to_1(page + offset, val);
- } else if (type == MLOG_2BYTES) {
- mach_write_to_2(page + offset, val);
- } else {
- ut_a(type == MLOG_4BYTES);
- mach_write_to_4(page + offset, val);
- }
- }
-
- return(ptr);
-}
-
-/************************************************************
-Writes 1 - 4 bytes to a file page buffered in the buffer pool.
-Writes the corresponding log record to the mini-transaction log. */
-
-void
-mlog_write_ulint(
-/*=============*/
- byte* ptr, /* in: pointer where to write */
- ulint val, /* in: value to write */
- byte type, /* in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- byte* log_ptr;
-
- if (ptr < buf_pool->frame_zero || ptr >= buf_pool->high_end) {
- fprintf(stderr,
- "InnoDB: Error: trying to write to"
- " a stray memory location %p\n", (void*) ptr);
- ut_error;
- }
-
- if (type == MLOG_1BYTE) {
- mach_write_to_1(ptr, val);
- } else if (type == MLOG_2BYTES) {
- mach_write_to_2(ptr, val);
- } else {
- ut_ad(type == MLOG_4BYTES);
- mach_write_to_4(ptr, val);
- }
-
- log_ptr = mlog_open(mtr, 11 + 2 + 5);
-
- /* If no logging is requested, we may return now */
- if (log_ptr == NULL) {
-
- return;
- }
-
- log_ptr = mlog_write_initial_log_record_fast(ptr, type, log_ptr, mtr);
-
- mach_write_to_2(log_ptr, ptr - buf_frame_align(ptr));
- log_ptr += 2;
-
- log_ptr += mach_write_compressed(log_ptr, val);
-
- mlog_close(mtr, log_ptr);
-}
-
-/************************************************************
-Writes 8 bytes to a file page buffered in the buffer pool.
-Writes the corresponding log record to the mini-transaction log. */
-
-void
-mlog_write_dulint(
-/*==============*/
- byte* ptr, /* in: pointer where to write */
- dulint val, /* in: value to write */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- byte* log_ptr;
-
- if (UNIV_UNLIKELY(ptr < buf_pool->frame_zero)
- || UNIV_UNLIKELY(ptr >= buf_pool->high_end)) {
- fprintf(stderr,
- "InnoDB: Error: trying to write to"
- " a stray memory location %p\n", (void*) ptr);
- ut_error;
- }
-
- ut_ad(ptr && mtr);
-
- mach_write_to_8(ptr, val);
-
- log_ptr = mlog_open(mtr, 11 + 2 + 9);
-
- /* If no logging is requested, we may return now */
- if (log_ptr == NULL) {
-
- return;
- }
-
- log_ptr = mlog_write_initial_log_record_fast(ptr, MLOG_8BYTES,
- log_ptr, mtr);
-
- mach_write_to_2(log_ptr, ptr - buf_frame_align(ptr));
- log_ptr += 2;
-
- log_ptr += mach_dulint_write_compressed(log_ptr, val);
-
- mlog_close(mtr, log_ptr);
-}
-
-/************************************************************
-Writes a string to a file page buffered in the buffer pool. Writes the
-corresponding log record to the mini-transaction log. */
-
-void
-mlog_write_string(
-/*==============*/
- byte* ptr, /* in: pointer where to write */
- const byte* str, /* in: string to write */
- ulint len, /* in: string length */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- byte* log_ptr;
-
- if (UNIV_UNLIKELY(ptr < buf_pool->frame_zero)
- || UNIV_UNLIKELY(ptr >= buf_pool->high_end)) {
- fprintf(stderr,
- "InnoDB: Error: trying to write to"
- " a stray memory location %p\n", (void*) ptr);
- ut_error;
- }
- ut_ad(ptr && mtr);
- ut_a(len < UNIV_PAGE_SIZE);
-
- ut_memcpy(ptr, str, len);
-
- log_ptr = mlog_open(mtr, 30);
-
- /* If no logging is requested, we may return now */
- if (log_ptr == NULL) {
-
- return;
- }
-
- log_ptr = mlog_write_initial_log_record_fast(ptr, MLOG_WRITE_STRING,
- log_ptr, mtr);
- mach_write_to_2(log_ptr, ptr - buf_frame_align(ptr));
- log_ptr += 2;
-
- mach_write_to_2(log_ptr, len);
- log_ptr += 2;
-
- mlog_close(mtr, log_ptr);
-
- mlog_catenate_string(mtr, str, len);
-}
-
-/************************************************************
-Parses a log record written by mlog_write_string. */
-
-byte*
-mlog_parse_string(
-/*==============*/
- /* out: parsed record end, NULL if not a complete
- record */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- byte* page) /* in: page where to apply the log record, or NULL */
-{
- ulint offset;
- ulint len;
-
- if (end_ptr < ptr + 4) {
-
- return(NULL);
- }
-
- offset = mach_read_from_2(ptr);
- ptr += 2;
-
- if (offset >= UNIV_PAGE_SIZE) {
- recv_sys->found_corrupt_log = TRUE;
-
- return(NULL);
- }
-
- len = mach_read_from_2(ptr);
- ptr += 2;
-
- ut_a(len + offset < UNIV_PAGE_SIZE);
-
- if (end_ptr < ptr + len) {
-
- return(NULL);
- }
-
- if (page) {
- ut_memcpy(page + offset, ptr, len);
- }
-
- return(ptr + len);
-}
-
-/************************************************************
-Opens a buffer for mlog, writes the initial log record and,
-if needed, the field lengths of an index. */
-
-byte*
-mlog_open_and_write_index(
-/*======================*/
- /* out: buffer, NULL if log mode
- MTR_LOG_NONE */
- mtr_t* mtr, /* in: mtr */
- byte* rec, /* in: index record or page */
- dict_index_t* index, /* in: record descriptor */
- byte type, /* in: log item type */
- ulint size) /* in: requested buffer size in bytes
- (if 0, calls mlog_close() and returns NULL) */
-{
- byte* log_ptr;
- const byte* log_start;
- const byte* log_end;
-
- ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
-
- if (!page_rec_is_comp(rec)) {
- log_start = log_ptr = mlog_open(mtr, 11 + size);
- if (!log_ptr) {
- return(NULL); /* logging is disabled */
- }
- log_ptr = mlog_write_initial_log_record_fast(rec, type,
- log_ptr, mtr);
- log_end = log_ptr + 11 + size;
- } else {
- ulint i;
- ulint n = dict_index_get_n_fields(index);
- /* total size needed */
- ulint total = 11 + size + (n + 2) * 2;
- ulint alloc = total;
- /* allocate at most DYN_ARRAY_DATA_SIZE at a time */
- if (alloc > DYN_ARRAY_DATA_SIZE) {
- alloc = DYN_ARRAY_DATA_SIZE;
- }
- log_start = log_ptr = mlog_open(mtr, alloc);
- if (!log_ptr) {
- return(NULL); /* logging is disabled */
- }
- log_end = log_ptr + alloc;
- log_ptr = mlog_write_initial_log_record_fast(rec, type,
- log_ptr, mtr);
- mach_write_to_2(log_ptr, n);
- log_ptr += 2;
- mach_write_to_2(log_ptr,
- dict_index_get_n_unique_in_tree(index));
- log_ptr += 2;
- for (i = 0; i < n; i++) {
- dict_field_t* field;
- const dict_col_t* col;
- ulint len;
-
- field = dict_index_get_nth_field(index, i);
- col = dict_field_get_col(field);
- len = field->fixed_len;
- ut_ad(len < 0x7fff);
- if (len == 0
- && (col->len > 255 || col->mtype == DATA_BLOB)) {
- /* variable-length field
- with maximum length > 255 */
- len = 0x7fff;
- }
- if (col->prtype & DATA_NOT_NULL) {
- len |= 0x8000;
- }
- if (log_ptr + 2 > log_end) {
- mlog_close(mtr, log_ptr);
- ut_a(total > (ulint) (log_ptr - log_start));
- total -= log_ptr - log_start;
- alloc = total;
- if (alloc > DYN_ARRAY_DATA_SIZE) {
- alloc = DYN_ARRAY_DATA_SIZE;
- }
- log_start = log_ptr = mlog_open(mtr, alloc);
- if (!log_ptr) {
- return(NULL); /* logging is disabled */
- }
- log_end = log_ptr + alloc;
- }
- mach_write_to_2(log_ptr, len);
- log_ptr += 2;
- }
- }
- if (size == 0) {
- mlog_close(mtr, log_ptr);
- log_ptr = NULL;
- } else if (log_ptr + size > log_end) {
- mlog_close(mtr, log_ptr);
- log_ptr = mlog_open(mtr, size);
- }
- return(log_ptr);
-}
-
-/************************************************************
-Parses a log record written by mlog_open_and_write_index. */
-
-byte*
-mlog_parse_index(
-/*=============*/
- /* out: parsed record end,
- NULL if not a complete record */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- /* out: new value of log_ptr */
- ibool comp, /* in: TRUE=compact record format */
- dict_index_t** index) /* out, own: dummy index */
-{
- ulint i, n, n_uniq;
- dict_table_t* table;
- dict_index_t* ind;
-
- ut_ad(comp == FALSE || comp == TRUE);
-
- if (comp) {
- if (end_ptr < ptr + 4) {
- return(NULL);
- }
- n = mach_read_from_2(ptr);
- ptr += 2;
- n_uniq = mach_read_from_2(ptr);
- ptr += 2;
- ut_ad(n_uniq <= n);
- if (end_ptr < ptr + n * 2) {
- return(NULL);
- }
- } else {
- n = n_uniq = 1;
- }
- table = dict_mem_table_create("LOG_DUMMY", DICT_HDR_SPACE, n,
- comp ? DICT_TF_COMPACT : 0);
- ind = dict_mem_index_create("LOG_DUMMY", "LOG_DUMMY",
- DICT_HDR_SPACE, 0, n);
- ind->table = table;
- ind->n_uniq = (unsigned int) n_uniq;
- if (n_uniq != n) {
- ut_a(n_uniq + DATA_ROLL_PTR <= n);
- ind->type = DICT_CLUSTERED;
- }
- if (comp) {
- for (i = 0; i < n; i++) {
- ulint len = mach_read_from_2(ptr);
- ptr += 2;
- /* The high-order bit of len is the NOT NULL flag;
- the rest is 0 or 0x7fff for variable-length fields,
- and 1..0x7ffe for fixed-length fields. */
- dict_mem_table_add_col(
- table, NULL, NULL,
- ((len + 1) & 0x7fff) <= 1
- ? DATA_BINARY : DATA_FIXBINARY,
- len & 0x8000 ? DATA_NOT_NULL : 0,
- len & 0x7fff);
-
- dict_index_add_col(ind, table, (dict_col_t*)
- dict_table_get_nth_col(table, i),
- 0);
- }
- dict_table_add_system_columns(table, table->heap);
- if (n_uniq != n) {
- /* Identify DB_TRX_ID and DB_ROLL_PTR in the index. */
- ut_a(DATA_TRX_ID_LEN
- == dict_index_get_nth_col(ind, DATA_TRX_ID - 1
- + n_uniq)->len);
- ut_a(DATA_ROLL_PTR_LEN
- == dict_index_get_nth_col(ind, DATA_ROLL_PTR - 1
- + n_uniq)->len);
- ind->fields[DATA_TRX_ID - 1 + n_uniq].col
- = &table->cols[n + DATA_TRX_ID];
- ind->fields[DATA_ROLL_PTR - 1 + n_uniq].col
- = &table->cols[n + DATA_ROLL_PTR];
- }
- }
- /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
- ind->cached = TRUE;
- *index = ind;
- return(ptr);
-}
diff --git a/storage/innobase/mtr/mtr0mtr.c b/storage/innobase/mtr/mtr0mtr.c
deleted file mode 100644
index 365fa15878a..00000000000
--- a/storage/innobase/mtr/mtr0mtr.c
+++ /dev/null
@@ -1,336 +0,0 @@
-/******************************************************
-Mini-transaction buffer
-
-(c) 1995 Innobase Oy
-
-Created 11/26/1995 Heikki Tuuri
-*******************************************************/
-
-#include "mtr0mtr.h"
-
-#ifdef UNIV_NONINL
-#include "mtr0mtr.ic"
-#endif
-
-#include "buf0buf.h"
-#include "page0types.h"
-#include "mtr0log.h"
-#include "log0log.h"
-
-/*******************************************************************
-Starts a mini-transaction and creates a mini-transaction handle
-and buffer in the memory buffer given by the caller. */
-
-mtr_t*
-mtr_start_noninline(
-/*================*/
- /* out: mtr buffer which also acts as
- the mtr handle */
- mtr_t* mtr) /* in: memory buffer for the mtr buffer */
-{
- return(mtr_start(mtr));
-}
-
-/*********************************************************************
-Releases the item in the slot given. */
-UNIV_INLINE
-void
-mtr_memo_slot_release(
-/*==================*/
- mtr_t* mtr, /* in: mtr */
- mtr_memo_slot_t* slot) /* in: memo slot */
-{
- void* object;
- ulint type;
-
- ut_ad(mtr && slot);
-
- object = slot->object;
- type = slot->type;
-
- if (UNIV_LIKELY(object != NULL)) {
- if (type <= MTR_MEMO_BUF_FIX) {
- buf_page_release((buf_block_t*)object, type, mtr);
- } else if (type == MTR_MEMO_S_LOCK) {
- rw_lock_s_unlock((rw_lock_t*)object);
-#ifdef UNIV_DEBUG
- } else if (type == MTR_MEMO_X_LOCK) {
- rw_lock_x_unlock((rw_lock_t*)object);
- } else {
- ut_ad(type == MTR_MEMO_MODIFY);
- ut_ad(mtr_memo_contains(mtr, object,
- MTR_MEMO_PAGE_X_FIX));
-#else
- } else {
- rw_lock_x_unlock((rw_lock_t*)object);
-#endif
- }
- }
-
- slot->object = NULL;
-}
-
-/**************************************************************
-Releases the mlocks and other objects stored in an mtr memo. They are released
-in the order opposite to which they were pushed to the memo. NOTE! It is
-essential that the x-rw-lock on a modified buffer page is not released before
-buf_page_note_modification is called for that page! Otherwise, some thread
-might race to modify it, and the flush list sort order on lsn would be
-destroyed. */
-UNIV_INLINE
-void
-mtr_memo_pop_all(
-/*=============*/
- mtr_t* mtr) /* in: mtr */
-{
- mtr_memo_slot_t* slot;
- dyn_array_t* memo;
- ulint offset;
-
- ut_ad(mtr);
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
- ut_ad(mtr->state == MTR_COMMITTING); /* Currently only used in
- commit */
- memo = &(mtr->memo);
-
- offset = dyn_array_get_data_size(memo);
-
- while (offset > 0) {
- offset -= sizeof(mtr_memo_slot_t);
- slot = dyn_array_get_element(memo, offset);
-
- mtr_memo_slot_release(mtr, slot);
- }
-}
-
-/****************************************************************
-Writes the contents of a mini-transaction log, if any, to the database log. */
-static
-void
-mtr_log_reserve_and_write(
-/*======================*/
- mtr_t* mtr) /* in: mtr */
-{
- dyn_array_t* mlog;
- dyn_block_t* block;
- ulint data_size;
- ibool success;
- byte* first_data;
-
- ut_ad(mtr);
-
- mlog = &(mtr->log);
-
- first_data = dyn_block_get_data(mlog);
-
- if (mtr->n_log_recs > 1) {
- mlog_catenate_ulint(mtr, MLOG_MULTI_REC_END, MLOG_1BYTE);
- } else {
- *first_data = (byte)((ulint)*first_data
- | MLOG_SINGLE_REC_FLAG);
- }
-
- if (mlog->heap == NULL) {
- mtr->end_lsn = log_reserve_and_write_fast(
- first_data, dyn_block_get_used(mlog),
- &(mtr->start_lsn), &success);
- if (success) {
-
- return;
- }
- }
-
- data_size = dyn_array_get_data_size(mlog);
-
- /* Open the database log for log_write_low */
- mtr->start_lsn = log_reserve_and_open(data_size);
-
- if (mtr->log_mode == MTR_LOG_ALL) {
-
- block = mlog;
-
- while (block != NULL) {
- log_write_low(dyn_block_get_data(block),
- dyn_block_get_used(block));
- block = dyn_array_get_next_block(mlog, block);
- }
- } else {
- ut_ad(mtr->log_mode == MTR_LOG_NONE);
- /* Do nothing */
- }
-
- mtr->end_lsn = log_close();
-}
-
-/*******************************************************************
-Commits a mini-transaction. */
-
-void
-mtr_commit(
-/*=======*/
- mtr_t* mtr) /* in: mini-transaction */
-{
- ut_ad(mtr);
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
- ut_ad(mtr->state == MTR_ACTIVE);
-#ifdef UNIV_DEBUG
- mtr->state = MTR_COMMITTING;
-#endif
- if (mtr->modifications) {
- mtr_log_reserve_and_write(mtr);
- }
-
- /* We first update the modification info to buffer pages, and only
- after that release the log mutex: this guarantees that when the log
- mutex is free, all buffer pages contain an up-to-date info of their
- modifications. This fact is used in making a checkpoint when we look
- at the oldest modification of any page in the buffer pool. It is also
- required when we insert modified buffer pages in to the flush list
- which must be sorted on oldest_modification. */
-
- mtr_memo_pop_all(mtr);
-
- if (mtr->modifications) {
- log_release();
- }
-
-#ifdef UNIV_DEBUG
- mtr->state = MTR_COMMITTED;
-#endif
- dyn_array_free(&(mtr->memo));
- dyn_array_free(&(mtr->log));
-}
-
-/**************************************************************
-Releases the latches stored in an mtr memo down to a savepoint.
-NOTE! The mtr must not have made changes to buffer pages after the
-savepoint, as these can be handled only by mtr_commit. */
-
-void
-mtr_rollback_to_savepoint(
-/*======================*/
- mtr_t* mtr, /* in: mtr */
- ulint savepoint) /* in: savepoint */
-{
- mtr_memo_slot_t* slot;
- dyn_array_t* memo;
- ulint offset;
-
- ut_ad(mtr);
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
- ut_ad(mtr->state == MTR_ACTIVE);
-
- memo = &(mtr->memo);
-
- offset = dyn_array_get_data_size(memo);
- ut_ad(offset >= savepoint);
-
- while (offset > savepoint) {
- offset -= sizeof(mtr_memo_slot_t);
-
- slot = dyn_array_get_element(memo, offset);
-
- ut_ad(slot->type != MTR_MEMO_MODIFY);
- mtr_memo_slot_release(mtr, slot);
- }
-}
-
-/*******************************************************
-Releases an object in the memo stack. */
-
-void
-mtr_memo_release(
-/*=============*/
- mtr_t* mtr, /* in: mtr */
- void* object, /* in: object */
- ulint type) /* in: object type: MTR_MEMO_S_LOCK, ... */
-{
- mtr_memo_slot_t* slot;
- dyn_array_t* memo;
- ulint offset;
-
- ut_ad(mtr);
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
- ut_ad(mtr->state == MTR_ACTIVE);
-
- memo = &(mtr->memo);
-
- offset = dyn_array_get_data_size(memo);
-
- while (offset > 0) {
- offset -= sizeof(mtr_memo_slot_t);
-
- slot = dyn_array_get_element(memo, offset);
-
- if ((object == slot->object) && (type == slot->type)) {
-
- mtr_memo_slot_release(mtr, slot);
-
- break;
- }
- }
-}
-
-/************************************************************
-Reads 1 - 4 bytes from a file page buffered in the buffer pool. */
-
-ulint
-mtr_read_ulint(
-/*===========*/
- /* out: value read */
- byte* ptr, /* in: pointer from where to read */
- ulint type, /* in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
- mtr_t* mtr __attribute__((unused)))
- /* in: mini-transaction handle */
-{
- ut_ad(mtr->state == MTR_ACTIVE);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(ptr),
- MTR_MEMO_PAGE_S_FIX)
- || mtr_memo_contains(mtr, buf_block_align(ptr),
- MTR_MEMO_PAGE_X_FIX));
- if (type == MLOG_1BYTE) {
- return(mach_read_from_1(ptr));
- } else if (type == MLOG_2BYTES) {
- return(mach_read_from_2(ptr));
- } else {
- ut_ad(type == MLOG_4BYTES);
- return(mach_read_from_4(ptr));
- }
-}
-
-/************************************************************
-Reads 8 bytes from a file page buffered in the buffer pool. */
-
-dulint
-mtr_read_dulint(
-/*============*/
- /* out: value read */
- byte* ptr, /* in: pointer from where to read */
- mtr_t* mtr __attribute__((unused)))
- /* in: mini-transaction handle */
-{
- ut_ad(mtr->state == MTR_ACTIVE);
- ut_ad(ptr && mtr);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(ptr),
- MTR_MEMO_PAGE_S_FIX)
- || mtr_memo_contains(mtr, buf_block_align(ptr),
- MTR_MEMO_PAGE_X_FIX));
- return(mach_read_from_8(ptr));
-}
-
-#ifdef UNIV_DEBUG
-/*************************************************************
-Prints info of an mtr handle. */
-
-void
-mtr_print(
-/*======*/
- mtr_t* mtr) /* in: mtr */
-{
- fprintf(stderr,
- "Mini-transaction handle: memo size %lu bytes"
- " log size %lu bytes\n",
- (ulong) dyn_array_get_data_size(&(mtr->memo)),
- (ulong) dyn_array_get_data_size(&(mtr->log)));
-}
-#endif /* UNIV_DEBUG */
diff --git a/storage/innobase/os/os0file.c b/storage/innobase/os/os0file.c
deleted file mode 100644
index 4a9d3334e7d..00000000000
--- a/storage/innobase/os/os0file.c
+++ /dev/null
@@ -1,4550 +0,0 @@
-/******************************************************
-The interface to the operating system file i/o primitives
-
-(c) 1995 Innobase Oy
-
-Created 10/21/1995 Heikki Tuuri
-*******************************************************/
-
-#include "os0file.h"
-#include "os0sync.h"
-#include "os0thread.h"
-#include "ut0mem.h"
-#include "srv0srv.h"
-#include "srv0start.h"
-#include "fil0fil.h"
-#include "buf0buf.h"
-
-#if defined(UNIV_HOTBACKUP) && defined(__WIN__)
-/* Add includes for the _stat() call to compile on Windows */
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <errno.h>
-#endif /* UNIV_HOTBACKUP */
-
-#ifdef POSIX_ASYNC_IO
-/* We assume in this case that the OS has standard Posix aio (at least SunOS
-2.6, HP-UX 11i and AIX 4.3 have) */
-
-#endif
-
-/* This specifies the file permissions InnoDB uses when it creates files in
-Unix; the value of os_innodb_umask is initialized in ha_innodb.cc to
-my_umask */
-
-#ifndef __WIN__
-ulint os_innodb_umask = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
-#else
-ulint os_innodb_umask = 0;
-#endif
-
-#ifdef UNIV_DO_FLUSH
-/* If the following is set to TRUE, we do not call os_file_flush in every
-os_file_write. We can set this TRUE when the doublewrite buffer is used. */
-ibool os_do_not_call_flush_at_each_write = FALSE;
-#else
-/* We do not call os_file_flush in every os_file_write. */
-#endif /* UNIV_DO_FLUSH */
-
-/* We use these mutexes to protect lseek + file i/o operation, if the
-OS does not provide an atomic pread or pwrite, or similar */
-#define OS_FILE_N_SEEK_MUTEXES 16
-os_mutex_t os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES];
-
-/* In simulated aio, merge at most this many consecutive i/os */
-#define OS_AIO_MERGE_N_CONSECUTIVE 64
-
-/* If this flag is TRUE, then we will use the native aio of the
-OS (provided we compiled Innobase with it in), otherwise we will
-use simulated aio we build below with threads */
-
-ibool os_aio_use_native_aio = FALSE;
-
-ibool os_aio_print_debug = FALSE;
-
-/* State for the state of an IO request in simulated AIO.
- Protocol for simulated aio:
- client requests IO: find slot with reserved = FALSE. Add entry with
- status = OS_AIO_NOT_ISSUED.
- IO thread wakes: find adjacent slots with reserved = TRUE and status =
- OS_AIO_NOT_ISSUED. Change status for slots to
- OS_AIO_ISSUED.
- IO operation completes: set status for slots to OS_AIO_DONE. set status
- for the first slot to OS_AIO_CLAIMED and return
- result for that slot.
- When there are multiple read and write threads, they all compete to execute
- the requests in the array (os_aio_array_t). This avoids the need to load
- balance requests at the time the request is made at the cost of waking all
- threads when a request is available.
-*/
-typedef enum {
- OS_AIO_NOT_ISSUED, /* Available to be processed by an IO thread. */
- OS_AIO_ISSUED, /* Being processed by an IO thread. */
- OS_AIO_DONE, /* Request processed. */
- OS_AIO_CLAIMED /* Result being returned to client. */
-} os_aio_status;
-
-/* The aio array slot structure */
-typedef struct os_aio_slot_struct os_aio_slot_t;
-
-struct os_aio_slot_struct{
- ibool is_read; /* TRUE if a read operation */
- ulint pos; /* index of the slot in the aio
- array */
- ibool reserved; /* TRUE if this slot is reserved */
- os_aio_status status; /* Status for current request. Valid when reserved
- is TRUE. Used only in simulated aio. */
- time_t reservation_time;/* time when reserved */
- ulint len; /* length of the block to read or
- write */
- byte* buf; /* buffer used in i/o */
- ulint type; /* OS_FILE_READ or OS_FILE_WRITE */
- ulint offset; /* 32 low bits of file offset in
- bytes */
- ulint offset_high; /* 32 high bits of file offset */
- os_file_t file; /* file where to read or write */
- const char* name; /* file name or path */
- fil_node_t* message1; /* message which is given by the */
- void* message2; /* the requester of an aio operation
- and which can be used to identify
- which pending aio operation was
- completed */
-#ifdef WIN_ASYNC_IO
- os_event_t event; /* event object we need in the
- OVERLAPPED struct */
- OVERLAPPED control; /* Windows control block for the
- aio request */
-#elif defined(POSIX_ASYNC_IO)
- struct aiocb control; /* Posix control block for aio
- request */
-#endif
-};
-
-/* The aio array structure */
-typedef struct os_aio_array_struct os_aio_array_t;
-
-struct os_aio_array_struct{
- os_mutex_t mutex; /* the mutex protecting the aio array */
- os_event_t not_full; /* The event which is set to the signaled
- state when there is space in the aio
- outside the ibuf segment */
- os_event_t is_empty; /* The event which is set to the signaled
- state when there are no pending i/os
- in this array */
- ulint n_slots; /* Total number of slots in the aio array.
- This must be divisible by n_threads. */
- ulint n_reserved;/* Number of reserved slots in the
- aio array outside the ibuf segment */
- os_aio_slot_t* slots; /* Pointer to the slots in the array */
-#ifdef __WIN__
- os_native_event_t* native_events;
- /* Pointer to an array of OS native event
- handles where we copied the handles from
- slots, in the same order. This can be used
- in WaitForMultipleObjects; used only in
- Windows */
-#endif
-};
-
-/* Array of events used in simulated aio */
-os_event_t* os_aio_segment_wait_events = NULL;
-
-/* Number of threads for reading and writing. */
-ulint os_aio_read_threads = 0;
-ulint os_aio_write_threads = 0;
-
-/* Number for the first global segment for reading. */
-const ulint os_aio_first_read_segment = 2;
-
-/* Number for the first global segment for writing. Set to
-2 + os_aio_read_write_threads. */
-ulint os_aio_first_write_segment = 0;
-
-/* The aio arrays for non-ibuf i/o and ibuf i/o, as well as sync aio. These
-are NULL when the module has not yet been initialized. */
-static os_aio_array_t* os_aio_read_array = NULL;
-static os_aio_array_t* os_aio_write_array = NULL;
-static os_aio_array_t* os_aio_ibuf_array = NULL;
-static os_aio_array_t* os_aio_log_array = NULL;
-static os_aio_array_t* os_aio_sync_array = NULL;
-
-/* Per thread buffer used for merged IO requests. Used by
-os_aio_simulated_handle so that a buffer doesn't have to be allocated
-for each request. */
-static char* os_aio_thread_buffer[SRV_MAX_N_IO_THREADS];
-static ulint os_aio_thread_buffer_size[SRV_MAX_N_IO_THREADS];
-
-/* Count pages read and written per thread */
-static ulint os_aio_thread_io_reads[SRV_MAX_N_IO_THREADS];
-static ulint os_aio_thread_io_writes[SRV_MAX_N_IO_THREADS];
-
-/* Number of IO operations done. One request can be for N pages. */
-static ulint os_aio_thread_io_requests[SRV_MAX_N_IO_THREADS];
-
-/* usecs spent blocked on an IO request */
-static double os_aio_thread_io_wait[SRV_MAX_N_IO_THREADS];
-/* max usecs spent blocked on an IO request */
-static double os_aio_thread_max_io_wait[SRV_MAX_N_IO_THREADS];
-
-/* Number of IO global segments. An IO handler thread is created for each
-global segment, except for the segment associated with os_aio_sync_array.
-Several segments can be associated with os_aio_{read,write}_array. One
-segment is created for each of the other arrays. This is also the number
-of valid entries in srv_io_thread_reads, srv_io_thread_writes,
-srv_io_thread_op_info, srv_io_thread_function and os_aio_segment_wait_events. */
-static ulint os_aio_n_segments = ULINT_UNDEFINED;
-
-/* Set to TRUE to temporarily block reads from being scheduled while a batch
-of read requests is added to allow them to be merged by the IO handler thread
-if they are adjacent. Declared volatile because we don't want this to be
-read from a register in a loop when another thread may change the value in
-memory.
-*/
-static volatile ibool os_aio_recommend_sleep_for_read_threads = FALSE;
-
-ulint os_n_file_reads = 0;
-ulint os_bytes_read_since_printout = 0;
-ulint os_n_file_writes = 0;
-ulint os_n_fsyncs = 0;
-ulint os_n_file_reads_old = 0;
-ulint os_n_file_writes_old = 0;
-ulint os_n_fsyncs_old = 0;
-time_t os_last_printout;
-
-ibool os_has_said_disk_full = FALSE;
-
-/* The mutex protecting the following counts of pending I/O operations */
-static os_mutex_t os_file_count_mutex;
-ulint os_file_n_pending_preads = 0;
-ulint os_file_n_pending_pwrites = 0;
-ulint os_n_pending_writes = 0;
-ulint os_n_pending_reads = 0;
-
-static double time_usecs() {
- ulint sec, ms;
- if (ut_usectime(&sec, &ms))
- return 0;
- else
- return sec * 1000000.0 + ms;
-}
-
-/***************************************************************************
-Gets the operating system version. Currently works only on Windows. */
-
-ulint
-os_get_os_version(void)
-/*===================*/
- /* out: OS_WIN95, OS_WIN31, OS_WINNT, OS_WIN2000 */
-{
-#ifdef __WIN__
- OSVERSIONINFO os_info;
-
- os_info.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
-
- ut_a(GetVersionEx(&os_info));
-
- if (os_info.dwPlatformId == VER_PLATFORM_WIN32s) {
- return(OS_WIN31);
- } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) {
- return(OS_WIN95);
- } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) {
- if (os_info.dwMajorVersion <= 4) {
- return(OS_WINNT);
- } else {
- return(OS_WIN2000);
- }
- } else {
- ut_error;
- return(0);
- }
-#else
- ut_error;
-
- return(0);
-#endif
-}
-
-/***************************************************************************
-Retrieves the last error number if an error occurs in a file io function.
-The number should be retrieved before any other OS calls (because they may
-overwrite the error number). If the number is not known to this program,
-the OS error number + 100 is returned. */
-
-ulint
-os_file_get_last_error(
-/*===================*/
- /* out: error number, or OS error
- number + 100 */
- ibool report_all_errors) /* in: TRUE if we want an error message
- printed of all errors */
-{
- ulint err;
-
-#ifdef __WIN__
-
- err = (ulint) GetLastError();
-
- if (report_all_errors
- || (err != ERROR_DISK_FULL && err != ERROR_FILE_EXISTS)) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Operating system error number %lu"
- " in a file operation.\n", (ulong) err);
-
- if (err == ERROR_PATH_NOT_FOUND) {
- fprintf(stderr,
- "InnoDB: The error means the system"
- " cannot find the path specified.\n");
-
- if (srv_is_being_started) {
- fprintf(stderr,
- "InnoDB: If you are installing InnoDB,"
- " remember that you must create\n"
- "InnoDB: directories yourself, InnoDB"
- " does not create them.\n");
- }
- } else if (err == ERROR_ACCESS_DENIED) {
- fprintf(stderr,
- "InnoDB: The error means mysqld does not have"
- " the access rights to\n"
- "InnoDB: the directory. It may also be"
- " you have created a subdirectory\n"
- "InnoDB: of the same name as a data file.\n");
- } else if (err == ERROR_SHARING_VIOLATION
- || err == ERROR_LOCK_VIOLATION) {
- fprintf(stderr,
- "InnoDB: The error means that another program"
- " is using InnoDB's files.\n"
- "InnoDB: This might be a backup or antivirus"
- " software or another instance\n"
- "InnoDB: of MySQL."
- " Please close it to get rid of this error.\n");
- } else {
- fprintf(stderr,
- "InnoDB: Some operating system error numbers"
- " are described at\n"
- "InnoDB: "
- "http://dev.mysql.com/doc/refman/5.1/en/"
- "operating-system-error-codes.html\n");
- }
- }
-
- fflush(stderr);
-
- if (err == ERROR_FILE_NOT_FOUND) {
- return(OS_FILE_NOT_FOUND);
- } else if (err == ERROR_DISK_FULL) {
- return(OS_FILE_DISK_FULL);
- } else if (err == ERROR_FILE_EXISTS) {
- return(OS_FILE_ALREADY_EXISTS);
- } else if (err == ERROR_SHARING_VIOLATION
- || err == ERROR_LOCK_VIOLATION) {
- return(OS_FILE_SHARING_VIOLATION);
- } else {
- return(100 + err);
- }
-#else
- err = (ulint) errno;
-
- if (report_all_errors
- || (err != ENOSPC && err != EEXIST)) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Operating system error number %lu"
- " in a file operation.\n", (ulong) err);
-
- if (err == ENOENT) {
- fprintf(stderr,
- "InnoDB: The error means the system"
- " cannot find the path specified.\n");
-
- if (srv_is_being_started) {
- fprintf(stderr,
- "InnoDB: If you are installing InnoDB,"
- " remember that you must create\n"
- "InnoDB: directories yourself, InnoDB"
- " does not create them.\n");
- }
- } else if (err == EACCES) {
- fprintf(stderr,
- "InnoDB: The error means mysqld does not have"
- " the access rights to\n"
- "InnoDB: the directory.\n");
- } else {
- if (strerror((int)err) != NULL) {
- fprintf(stderr,
- "InnoDB: Error number %lu"
- " means '%s'.\n",
- err, strerror((int)err));
- }
-
- fprintf(stderr,
- "InnoDB: Some operating system"
- " error numbers are described at\n"
- "InnoDB: "
- "http://dev.mysql.com/doc/refman/5.1/en/"
- "operating-system-error-codes.html\n");
- }
- }
-
- fflush(stderr);
-
- if (err == ENOSPC) {
- return(OS_FILE_DISK_FULL);
-#ifdef POSIX_ASYNC_IO
- } else if (err == EAGAIN) {
- return(OS_FILE_AIO_RESOURCES_RESERVED);
-#endif
- } else if (err == ENOENT) {
- return(OS_FILE_NOT_FOUND);
- } else if (err == EEXIST) {
- return(OS_FILE_ALREADY_EXISTS);
- } else if (err == EXDEV || err == ENOTDIR || err == EISDIR) {
- return(OS_FILE_PATH_ERROR);
- } else {
- return(100 + err);
- }
-#endif
-}
-
-/********************************************************************
-Does error handling when a file operation fails.
-Conditionally exits (calling exit(3)) based on should_exit value and the
-error type */
-
-static
-ibool
-os_file_handle_error_cond_exit(
-/*===========================*/
- /* out: TRUE if we should retry the
- operation */
- const char* name, /* in: name of a file or NULL */
- const char* operation, /* in: operation */
- ibool should_exit) /* in: call exit(3) if unknown error
- and this parameter is TRUE */
-{
- ulint err;
-
- err = os_file_get_last_error(FALSE);
-
- if (err == OS_FILE_DISK_FULL) {
- /* We only print a warning about disk full once */
-
- if (os_has_said_disk_full) {
-
- return(FALSE);
- }
-
- if (name) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Encountered a problem with"
- " file %s\n", name);
- }
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Disk is full. Try to clean the disk"
- " to free space.\n");
-
- os_has_said_disk_full = TRUE;
-
- fflush(stderr);
-
- return(FALSE);
- } else if (err == OS_FILE_AIO_RESOURCES_RESERVED) {
-
- return(TRUE);
- } else if (err == OS_FILE_ALREADY_EXISTS
- || err == OS_FILE_PATH_ERROR) {
-
- return(FALSE);
- } else if (err == OS_FILE_SHARING_VIOLATION) {
-
- os_thread_sleep(10000000); /* 10 sec */
- return(TRUE);
- } else {
- if (name) {
- fprintf(stderr, "InnoDB: File name %s\n", name);
- }
-
- fprintf(stderr, "InnoDB: File operation call: '%s'.\n",
- operation);
-
- if (should_exit) {
- fprintf(stderr, "InnoDB: Cannot continue operation.\n");
-
- fflush(stderr);
-
- exit(1);
- }
- }
-
- return(FALSE);
-}
-
-/********************************************************************
-Does error handling when a file operation fails. */
-static
-ibool
-os_file_handle_error(
-/*=================*/
- /* out: TRUE if we should retry the
- operation */
- const char* name, /* in: name of a file or NULL */
- const char* operation)/* in: operation */
-{
- /* exit in case of unknown error */
- return(os_file_handle_error_cond_exit(name, operation, TRUE));
-}
-
-/********************************************************************
-Does error handling when a file operation fails. */
-static
-ibool
-os_file_handle_error_no_exit(
-/*=========================*/
- /* out: TRUE if we should retry the
- operation */
- const char* name, /* in: name of a file or NULL */
- const char* operation)/* in: operation */
-{
- /* don't exit in case of unknown error */
- return(os_file_handle_error_cond_exit(name, operation, FALSE));
-}
-
-#undef USE_FILE_LOCK
-#define USE_FILE_LOCK
-#if defined(UNIV_HOTBACKUP) || defined(__WIN__) || defined(__NETWARE__)
-/* InnoDB Hot Backup does not lock the data files.
- * On Windows, mandatory locking is used.
- */
-# undef USE_FILE_LOCK
-#endif
-#ifdef USE_FILE_LOCK
-/********************************************************************
-Obtain an exclusive lock on a file. */
-static
-int
-os_file_lock(
-/*=========*/
- /* out: 0 on success */
- int fd, /* in: file descriptor */
- const char* name) /* in: file name */
-{
- struct flock lk;
- lk.l_type = F_WRLCK;
- lk.l_whence = SEEK_SET;
- lk.l_start = lk.l_len = 0;
- if (fcntl(fd, F_SETLK, &lk) == -1) {
- fprintf(stderr,
- "InnoDB: Unable to lock %s, error: %d\n", name, errno);
-
- if (errno == EAGAIN || errno == EACCES) {
- fprintf(stderr,
- "InnoDB: Check that you do not already have"
- " another mysqld process\n"
- "InnoDB: using the same InnoDB data"
- " or log files.\n");
- }
-
- return(-1);
- }
-
- return(0);
-}
-#endif /* USE_FILE_LOCK */
-
-/********************************************************************
-Creates the seek mutexes used in positioned reads and writes. */
-
-void
-os_io_init_simple(void)
-/*===================*/
-{
- ulint i;
-
- os_file_count_mutex = os_mutex_create(NULL);
-
- for (i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) {
- os_file_seek_mutexes[i] = os_mutex_create(NULL);
- }
-}
-
-#if !defined(UNIV_HOTBACKUP) && !defined(__NETWARE__)
-/*************************************************************************
-Creates a temporary file that will be deleted on close.
-This function is defined in ha_innodb.cc. */
-
-int
-innobase_mysql_tmpfile(void);
-/*========================*/
- /* out: temporary file descriptor, or < 0 on error */
-#endif /* !UNIV_HOTBACKUP && !__NETWARE__ */
-
-/***************************************************************************
-Creates a temporary file. This function is like tmpfile(3), but
-the temporary file is created in the MySQL temporary directory.
-On Netware, this function is like tmpfile(3), because the C run-time
-library of Netware does not expose the delete-on-close flag. */
-
-FILE*
-os_file_create_tmpfile(void)
-/*========================*/
- /* out: temporary file handle, or NULL on error */
-{
-#ifdef UNIV_HOTBACKUP
- ut_error;
-
- return(NULL);
-#else
-# ifdef __NETWARE__
- FILE* file = tmpfile();
-# else /* __NETWARE__ */
- FILE* file = NULL;
- int fd = innobase_mysql_tmpfile();
-
- if (fd >= 0) {
- file = fdopen(fd, "w+b");
- }
-# endif /* __NETWARE__ */
-
- if (!file) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: unable to create temporary file;"
- " errno: %d\n", errno);
-# ifndef __NETWARE__
- if (fd >= 0) {
- close(fd);
- }
-# endif /* !__NETWARE__ */
- }
-
- return(file);
-#endif /* UNIV_HOTBACKUP */
-}
-
-/***************************************************************************
-The os_file_opendir() function opens a directory stream corresponding to the
-directory named by the dirname argument. The directory stream is positioned
-at the first entry. In both Unix and Windows we automatically skip the '.'
-and '..' items at the start of the directory listing. */
-
-os_file_dir_t
-os_file_opendir(
-/*============*/
- /* out: directory stream, NULL if
- error */
- const char* dirname, /* in: directory name; it must not
- contain a trailing '\' or '/' */
- ibool error_is_fatal) /* in: TRUE if we should treat an
- error as a fatal error; if we try to
- open symlinks then we do not wish a
- fatal error if it happens not to be
- a directory */
-{
- os_file_dir_t dir;
-#ifdef __WIN__
- LPWIN32_FIND_DATA lpFindFileData;
- char path[OS_FILE_MAX_PATH + 3];
-
- ut_a(strlen(dirname) < OS_FILE_MAX_PATH);
-
- strcpy(path, dirname);
- strcpy(path + strlen(path), "\\*");
-
- /* Note that in Windows opening the 'directory stream' also retrieves
- the first entry in the directory. Since it is '.', that is no problem,
- as we will skip over the '.' and '..' entries anyway. */
-
- lpFindFileData = ut_malloc(sizeof(WIN32_FIND_DATA));
-
- dir = FindFirstFile((LPCTSTR) path, lpFindFileData);
-
- ut_free(lpFindFileData);
-
- if (dir == INVALID_HANDLE_VALUE) {
-
- if (error_is_fatal) {
- os_file_handle_error(dirname, "opendir");
- }
-
- return(NULL);
- }
-
- return(dir);
-#else
- dir = opendir(dirname);
-
- if (dir == NULL && error_is_fatal) {
- os_file_handle_error(dirname, "opendir");
- }
-
- return(dir);
-#endif
-}
-
-/***************************************************************************
-Closes a directory stream. */
-
-int
-os_file_closedir(
-/*=============*/
- /* out: 0 if success, -1 if failure */
- os_file_dir_t dir) /* in: directory stream */
-{
-#ifdef __WIN__
- BOOL ret;
-
- ret = FindClose(dir);
-
- if (!ret) {
- os_file_handle_error_no_exit(NULL, "closedir");
-
- return(-1);
- }
-
- return(0);
-#else
- int ret;
-
- ret = closedir(dir);
-
- if (ret) {
- os_file_handle_error_no_exit(NULL, "closedir");
- }
-
- return(ret);
-#endif
-}
-
-/***************************************************************************
-This function returns information of the next file in the directory. We jump
-over the '.' and '..' entries in the directory. */
-
-int
-os_file_readdir_next_file(
-/*======================*/
- /* out: 0 if ok, -1 if error, 1 if at the end
- of the directory */
- const char* dirname,/* in: directory name or path */
- os_file_dir_t dir, /* in: directory stream */
- os_file_stat_t* info) /* in/out: buffer where the info is returned */
-{
-#ifdef __WIN__
- LPWIN32_FIND_DATA lpFindFileData;
- BOOL ret;
-
- lpFindFileData = ut_malloc(sizeof(WIN32_FIND_DATA));
-next_file:
- ret = FindNextFile(dir, lpFindFileData);
-
- if (ret) {
- ut_a(strlen((char *) lpFindFileData->cFileName)
- < OS_FILE_MAX_PATH);
-
- if (strcmp((char *) lpFindFileData->cFileName, ".") == 0
- || strcmp((char *) lpFindFileData->cFileName, "..") == 0) {
-
- goto next_file;
- }
-
- strcpy(info->name, (char *) lpFindFileData->cFileName);
-
- info->size = (ib_longlong)(lpFindFileData->nFileSizeLow)
- + (((ib_longlong)(lpFindFileData->nFileSizeHigh))
- << 32);
-
- if (lpFindFileData->dwFileAttributes
- & FILE_ATTRIBUTE_REPARSE_POINT) {
- /* TODO: test Windows symlinks */
- /* TODO: MySQL has apparently its own symlink
- implementation in Windows, dbname.sym can
- redirect a database directory:
- http://dev.mysql.com/doc/refman/5.1/en/
- windows-symbolic-links.html */
- info->type = OS_FILE_TYPE_LINK;
- } else if (lpFindFileData->dwFileAttributes
- & FILE_ATTRIBUTE_DIRECTORY) {
- info->type = OS_FILE_TYPE_DIR;
- } else {
- /* It is probably safest to assume that all other
- file types are normal. Better to check them rather
- than blindly skip them. */
-
- info->type = OS_FILE_TYPE_FILE;
- }
- }
-
- ut_free(lpFindFileData);
-
- if (ret) {
- return(0);
- } else if (GetLastError() == ERROR_NO_MORE_FILES) {
-
- return(1);
- } else {
- os_file_handle_error_no_exit(dirname,
- "readdir_next_file");
- return(-1);
- }
-#else
- struct dirent* ent;
- char* full_path;
- int ret;
- struct stat statinfo;
-#ifdef HAVE_READDIR_R
- char dirent_buf[sizeof(struct dirent)
- + _POSIX_PATH_MAX + 100];
- /* In /mysys/my_lib.c, _POSIX_PATH_MAX + 1 is used as
- the max file name len; but in most standards, the
- length is NAME_MAX; we add 100 to be even safer */
-#endif
-
-next_file:
-
-#ifdef HAVE_READDIR_R
- ret = readdir_r(dir, (struct dirent*)dirent_buf, &ent);
-
- if (ret != 0) {
- fprintf(stderr,
- "InnoDB: cannot read directory %s, error %lu\n",
- dirname, (ulong)ret);
-
- return(-1);
- }
-
- if (ent == NULL) {
- /* End of directory */
-
- return(1);
- }
-
- ut_a(strlen(ent->d_name) < _POSIX_PATH_MAX + 100 - 1);
-#else
- ent = readdir(dir);
-
- if (ent == NULL) {
-
- return(1);
- }
-#endif
- ut_a(strlen(ent->d_name) < OS_FILE_MAX_PATH);
-
- if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) {
-
- goto next_file;
- }
-
- strcpy(info->name, ent->d_name);
-
- full_path = ut_malloc(strlen(dirname) + strlen(ent->d_name) + 10);
-
- sprintf(full_path, "%s/%s", dirname, ent->d_name);
-
- ret = stat(full_path, &statinfo);
-
- if (ret) {
- os_file_handle_error_no_exit(full_path, "stat");
-
- ut_free(full_path);
-
- return(-1);
- }
-
- info->size = (ib_longlong)statinfo.st_size;
-
- if (S_ISDIR(statinfo.st_mode)) {
- info->type = OS_FILE_TYPE_DIR;
- } else if (S_ISLNK(statinfo.st_mode)) {
- info->type = OS_FILE_TYPE_LINK;
- } else if (S_ISREG(statinfo.st_mode)) {
- info->type = OS_FILE_TYPE_FILE;
- } else {
- info->type = OS_FILE_TYPE_UNKNOWN;
- }
-
- ut_free(full_path);
-
- return(0);
-#endif
-}
-
-/*********************************************************************
-This function attempts to create a directory named pathname. The new directory
-gets default permissions. On Unix the permissions are (0770 & ~umask). If the
-directory exists already, nothing is done and the call succeeds, unless the
-fail_if_exists arguments is true. */
-
-ibool
-os_file_create_directory(
-/*=====================*/
- /* out: TRUE if call succeeds,
- FALSE on error */
- const char* pathname, /* in: directory name as
- null-terminated string */
- ibool fail_if_exists) /* in: if TRUE, pre-existing directory
- is treated as an error. */
-{
-#ifdef __WIN__
- BOOL rcode;
-
- rcode = CreateDirectory((LPCTSTR) pathname, NULL);
- if (!(rcode != 0
- || (GetLastError() == ERROR_ALREADY_EXISTS
- && !fail_if_exists))) {
- /* failure */
- os_file_handle_error(pathname, "CreateDirectory");
-
- return(FALSE);
- }
-
- return (TRUE);
-#else
- int rcode;
-
- rcode = mkdir(pathname, 0770);
-
- if (!(rcode == 0 || (errno == EEXIST && !fail_if_exists))) {
- /* failure */
- os_file_handle_error(pathname, "mkdir");
-
- return(FALSE);
- }
-
- return (TRUE);
-#endif
-}
-
-/********************************************************************
-A simple function to open or create a file. */
-
-os_file_t
-os_file_create_simple(
-/*==================*/
- /* out, own: handle to the file, not defined
- if error, error number can be retrieved with
- os_file_get_last_error */
- const char* name, /* in: name of the file or path as a
- null-terminated string */
- ulint create_mode,/* in: OS_FILE_OPEN if an existing file is
- opened (if does not exist, error), or
- OS_FILE_CREATE if a new file is created
- (if exists, error), or
- OS_FILE_CREATE_PATH if new file
- (if exists, error) and subdirectories along
- its path are created (if needed)*/
- ulint access_type,/* in: OS_FILE_READ_ONLY or
- OS_FILE_READ_WRITE */
- ibool* success)/* out: TRUE if succeed, FALSE if error */
-{
-#ifdef __WIN__
- os_file_t file;
- DWORD create_flag;
- DWORD access;
- DWORD attributes = 0;
- ibool retry;
-
-try_again:
- ut_a(name);
-
- if (create_mode == OS_FILE_OPEN) {
- create_flag = OPEN_EXISTING;
- } else if (create_mode == OS_FILE_CREATE) {
- create_flag = CREATE_NEW;
- } else if (create_mode == OS_FILE_CREATE_PATH) {
- /* create subdirs along the path if needed */
- *success = os_file_create_subdirs_if_needed(name);
- if (!*success) {
- ut_error;
- }
- create_flag = CREATE_NEW;
- create_mode = OS_FILE_CREATE;
- } else {
- create_flag = 0;
- ut_error;
- }
-
- if (access_type == OS_FILE_READ_ONLY) {
- access = GENERIC_READ;
- } else if (access_type == OS_FILE_READ_WRITE) {
- access = GENERIC_READ | GENERIC_WRITE;
- } else {
- access = 0;
- ut_error;
- }
-
- file = CreateFile((LPCTSTR) name,
- access,
- FILE_SHARE_READ | FILE_SHARE_WRITE,
- /* file can be read and written also
- by other processes */
- NULL, /* default security attributes */
- create_flag,
- attributes,
- NULL); /* no template file */
-
- if (file == INVALID_HANDLE_VALUE) {
- *success = FALSE;
-
- retry = os_file_handle_error(name,
- create_mode == OS_FILE_OPEN ?
- "open" : "create");
- if (retry) {
- goto try_again;
- }
- } else {
- *success = TRUE;
- }
-
- return(file);
-#else /* __WIN__ */
- os_file_t file;
- int create_flag;
- ibool retry;
-
-try_again:
- ut_a(name);
-
- if (create_mode == OS_FILE_OPEN) {
- if (access_type == OS_FILE_READ_ONLY) {
- create_flag = O_RDONLY;
- } else {
- create_flag = O_RDWR;
- }
- } else if (create_mode == OS_FILE_CREATE) {
- create_flag = O_RDWR | O_CREAT | O_EXCL;
- } else if (create_mode == OS_FILE_CREATE_PATH) {
- /* create subdirs along the path if needed */
- *success = os_file_create_subdirs_if_needed(name);
- if (!*success) {
- return (-1);
- }
- create_flag = O_RDWR | O_CREAT | O_EXCL;
- create_mode = OS_FILE_CREATE;
- } else {
- create_flag = 0;
- ut_error;
- }
-
- if (create_mode == OS_FILE_CREATE) {
- file = open(name, create_flag, S_IRUSR | S_IWUSR
- | S_IRGRP | S_IWGRP);
- } else {
- file = open(name, create_flag);
- }
-
- if (file == -1) {
- *success = FALSE;
-
- retry = os_file_handle_error(name,
- create_mode == OS_FILE_OPEN ?
- "open" : "create");
- if (retry) {
- goto try_again;
- }
-#ifdef USE_FILE_LOCK
- } else if (access_type == OS_FILE_READ_WRITE
- && os_file_lock(file, name)) {
- *success = FALSE;
- close(file);
- file = -1;
-#endif
- } else {
- *success = TRUE;
- }
-
- return(file);
-#endif /* __WIN__ */
-}
-
-/********************************************************************
-A simple function to open or create a file. */
-
-os_file_t
-os_file_create_simple_no_error_handling(
-/*====================================*/
- /* out, own: handle to the file, not defined
- if error, error number can be retrieved with
- os_file_get_last_error */
- const char* name, /* in: name of the file or path as a
- null-terminated string */
- ulint create_mode,/* in: OS_FILE_OPEN if an existing file
- is opened (if does not exist, error), or
- OS_FILE_CREATE if a new file is created
- (if exists, error) */
- ulint access_type,/* in: OS_FILE_READ_ONLY,
- OS_FILE_READ_WRITE, or
- OS_FILE_READ_ALLOW_DELETE; the last option is
- used by a backup program reading the file */
- ibool* success)/* out: TRUE if succeed, FALSE if error */
-{
-#ifdef __WIN__
- os_file_t file;
- DWORD create_flag;
- DWORD access;
- DWORD attributes = 0;
- DWORD share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE;
-
- ut_a(name);
-
- if (create_mode == OS_FILE_OPEN) {
- create_flag = OPEN_EXISTING;
- } else if (create_mode == OS_FILE_CREATE) {
- create_flag = CREATE_NEW;
- } else {
- create_flag = 0;
- ut_error;
- }
-
- if (access_type == OS_FILE_READ_ONLY) {
- access = GENERIC_READ;
- } else if (access_type == OS_FILE_READ_WRITE) {
- access = GENERIC_READ | GENERIC_WRITE;
- } else if (access_type == OS_FILE_READ_ALLOW_DELETE) {
- access = GENERIC_READ;
- share_mode = FILE_SHARE_DELETE | FILE_SHARE_READ
- | FILE_SHARE_WRITE; /* A backup program has to give
- mysqld the maximum freedom to
- do what it likes with the
- file */
- } else {
- access = 0;
- ut_error;
- }
-
- file = CreateFile((LPCTSTR) name,
- access,
- share_mode,
- NULL, /* default security attributes */
- create_flag,
- attributes,
- NULL); /* no template file */
-
- if (file == INVALID_HANDLE_VALUE) {
- *success = FALSE;
- } else {
- *success = TRUE;
- }
-
- return(file);
-#else /* __WIN__ */
- os_file_t file;
- int create_flag;
-
- ut_a(name);
-
- if (create_mode == OS_FILE_OPEN) {
- if (access_type == OS_FILE_READ_ONLY) {
- create_flag = O_RDONLY;
- } else {
- create_flag = O_RDWR;
- }
- } else if (create_mode == OS_FILE_CREATE) {
- create_flag = O_RDWR | O_CREAT | O_EXCL;
- } else {
- create_flag = 0;
- ut_error;
- }
-
- if (create_mode == OS_FILE_CREATE) {
- file = open(name, create_flag, S_IRUSR | S_IWUSR
- | S_IRGRP | S_IWGRP);
- } else {
- file = open(name, create_flag);
- }
-
- if (file == -1) {
- *success = FALSE;
-#ifdef USE_FILE_LOCK
- } else if (access_type == OS_FILE_READ_WRITE
- && os_file_lock(file, name)) {
- *success = FALSE;
- close(file);
- file = -1;
-#endif
- } else {
- *success = TRUE;
- }
-
- return(file);
-#endif /* __WIN__ */
-}
-
-/********************************************************************
-Tries to disable OS caching on an opened file descriptor. */
-
-void
-os_file_set_nocache(
-/*================*/
- int fd, /* in: file descriptor to alter */
- const char* file_name, /* in: used in the diagnostic message */
- const char* operation_name) /* in: used in the diagnostic message,
- we call os_file_set_nocache()
- immediately after opening or creating
- a file, so this is either "open" or
- "create" */
-{
- /* some versions of Solaris may not have DIRECTIO_ON */
-#if defined(UNIV_SOLARIS) && defined(DIRECTIO_ON)
- if (directio(fd, DIRECTIO_ON) == -1) {
- int errno_save;
- errno_save = (int)errno;
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Failed to set DIRECTIO_ON "
- "on file %s: %s: %s, continuing anyway\n",
- file_name, operation_name, strerror(errno_save));
- }
-#elif defined(O_DIRECT)
- if (fcntl(fd, F_SETFL, O_DIRECT) == -1) {
- int errno_save;
- errno_save = (int)errno;
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Failed to set O_DIRECT "
- "on file %s: %s: %s, continuing anyway\n",
- file_name, operation_name, strerror(errno_save));
- if (errno_save == EINVAL) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: O_DIRECT is known to result in "
- "'Invalid argument' on Linux on tmpfs, "
- "see MySQL Bug#26662\n");
- }
- }
-#endif
-}
-
-/********************************************************************
-Opens an existing file or creates a new. */
-
-os_file_t
-os_file_create(
-/*===========*/
- /* out, own: handle to the file, not defined
- if error, error number can be retrieved with
- os_file_get_last_error */
- const char* name, /* in: name of the file or path as a
- null-terminated string */
- ulint create_mode,/* in: OS_FILE_OPEN if an existing file
- is opened (if does not exist, error), or
- OS_FILE_CREATE if a new file is created
- (if exists, error),
- OS_FILE_OVERWRITE if a new file is created
- or an old overwritten;
- OS_FILE_OPEN_RAW, if a raw device or disk
- partition should be opened */
- ulint purpose,/* in: OS_FILE_AIO, if asynchronous,
- non-buffered i/o is desired,
- OS_FILE_NORMAL, if any normal file;
- NOTE that it also depends on type, os_aio_..
- and srv_.. variables whether we really use
- async i/o or unbuffered i/o: look in the
- function source code for the exact rules */
- ulint type, /* in: OS_DATA_FILE or OS_LOG_FILE */
- ibool* success)/* out: TRUE if succeed, FALSE if error */
-{
-#ifdef __WIN__
- os_file_t file;
- DWORD share_mode = FILE_SHARE_READ;
- DWORD create_flag;
- DWORD attributes;
- ibool retry;
-try_again:
- ut_a(name);
-
- if (create_mode == OS_FILE_OPEN_RAW) {
- create_flag = OPEN_EXISTING;
- share_mode = FILE_SHARE_WRITE;
- } else if (create_mode == OS_FILE_OPEN
- || create_mode == OS_FILE_OPEN_RETRY) {
- create_flag = OPEN_EXISTING;
- } else if (create_mode == OS_FILE_CREATE) {
- create_flag = CREATE_NEW;
- } else if (create_mode == OS_FILE_OVERWRITE) {
- create_flag = CREATE_ALWAYS;
- } else {
- create_flag = 0;
- ut_error;
- }
-
- if (purpose == OS_FILE_AIO) {
- /* If specified, use asynchronous (overlapped) io and no
- buffering of writes in the OS */
- attributes = 0;
-#ifdef WIN_ASYNC_IO
- if (os_aio_use_native_aio) {
- attributes = attributes | FILE_FLAG_OVERLAPPED;
- }
-#endif
-#ifdef UNIV_NON_BUFFERED_IO
- if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
- /* Do not use unbuffered i/o to log files because
- value 2 denotes that we do not flush the log at every
- commit, but only once per second */
- } else if (srv_win_file_flush_method
- == SRV_WIN_IO_UNBUFFERED) {
- attributes = attributes | FILE_FLAG_NO_BUFFERING;
- }
-#endif
- } else if (purpose == OS_FILE_NORMAL) {
- attributes = 0;
-#ifdef UNIV_NON_BUFFERED_IO
- if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
- /* Do not use unbuffered i/o to log files because
- value 2 denotes that we do not flush the log at every
- commit, but only once per second */
- } else if (srv_win_file_flush_method
- == SRV_WIN_IO_UNBUFFERED) {
- attributes = attributes | FILE_FLAG_NO_BUFFERING;
- }
-#endif
- } else {
- attributes = 0;
- ut_error;
- }
-
- file = CreateFile((LPCTSTR) name,
- GENERIC_READ | GENERIC_WRITE, /* read and write
- access */
- share_mode, /* File can be read also by other
- processes; we must give the read
- permission because of ibbackup. We do
- not give the write permission to
- others because if one would succeed to
- start 2 instances of mysqld on the
- SAME files, that could cause severe
- database corruption! When opening
- raw disk partitions, Microsoft manuals
- say that we must give also the write
- permission. */
- NULL, /* default security attributes */
- create_flag,
- attributes,
- NULL); /* no template file */
-
- if (file == INVALID_HANDLE_VALUE) {
- *success = FALSE;
-
- /* When srv_file_per_table is on, file creation failure may not
- be critical to the whole instance. Do not crash the server in
- case of unknown errors. */
- if (srv_file_per_table) {
- retry = os_file_handle_error_no_exit(name,
- create_mode == OS_FILE_CREATE ?
- "create" : "open");
- } else {
- retry = os_file_handle_error(name,
- create_mode == OS_FILE_CREATE ?
- "create" : "open");
- }
-
- if (retry) {
- goto try_again;
- }
- } else {
- *success = TRUE;
- }
-
- return(file);
-#else /* __WIN__ */
- os_file_t file;
- int create_flag;
- ibool retry;
- const char* mode_str = NULL;
- const char* type_str = NULL;
- const char* purpose_str = NULL;
-
-try_again:
- ut_a(name);
-
- if (create_mode == OS_FILE_OPEN || create_mode == OS_FILE_OPEN_RAW
- || create_mode == OS_FILE_OPEN_RETRY) {
- mode_str = "OPEN";
- create_flag = O_RDWR;
- } else if (create_mode == OS_FILE_CREATE) {
- mode_str = "CREATE";
- create_flag = O_RDWR | O_CREAT | O_EXCL;
- } else if (create_mode == OS_FILE_OVERWRITE) {
- mode_str = "OVERWRITE";
- create_flag = O_RDWR | O_CREAT | O_TRUNC;
- } else {
- create_flag = 0;
- ut_error;
- }
-
- if (type == OS_LOG_FILE) {
- type_str = "LOG";
- } else if (type == OS_DATA_FILE) {
- type_str = "DATA";
- } else {
- ut_error;
- }
-
- if (purpose == OS_FILE_AIO) {
- purpose_str = "AIO";
- } else if (purpose == OS_FILE_NORMAL) {
- purpose_str = "NORMAL";
- } else {
- ut_error;
- }
-
-#if 0
- fprintf(stderr, "Opening file %s, mode %s, type %s, purpose %s\n",
- name, mode_str, type_str, purpose_str);
-#endif
-#ifdef O_SYNC
- /* We let O_SYNC only affect log files; note that we map O_DSYNC to
- O_SYNC because the datasync options seemed to corrupt files in 2001
- in both Linux and Solaris */
- if (type == OS_LOG_FILE
- && srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
-
-# if 0
- fprintf(stderr, "Using O_SYNC for file %s\n", name);
-# endif
-
- create_flag = create_flag | O_SYNC;
- }
-#endif /* O_SYNC */
-
- file = open(name, create_flag, os_innodb_umask);
-
- if (file == -1) {
- *success = FALSE;
-
- /* When srv_file_per_table is on, file creation failure may not
- be critical to the whole instance. Do not crash the server in
- case of unknown errors. */
- if (srv_file_per_table) {
- retry = os_file_handle_error_no_exit(name,
- create_mode == OS_FILE_CREATE ?
- "create" : "open");
- } else {
- retry = os_file_handle_error(name,
- create_mode == OS_FILE_CREATE ?
- "create" : "open");
- }
-
- if (retry) {
- goto try_again;
- } else {
- return(file /* -1 */);
- }
- }
- /* else */
-
- *success = TRUE;
-
- /* We disable OS caching (O_DIRECT) only on data files */
- if (type != OS_LOG_FILE
- && srv_unix_file_flush_method == SRV_UNIX_O_DIRECT) {
-
- os_file_set_nocache(file, name, mode_str);
- }
-
-#ifdef USE_FILE_LOCK
- if (create_mode != OS_FILE_OPEN_RAW && os_file_lock(file, name)) {
-
- if (create_mode == OS_FILE_OPEN_RETRY) {
- int i;
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Retrying to lock"
- " the first data file\n",
- stderr);
- for (i = 0; i < 100; i++) {
- os_thread_sleep(1000000);
- if (!os_file_lock(file, name)) {
- *success = TRUE;
- return(file);
- }
- }
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Unable to open the first data file\n",
- stderr);
- }
-
- *success = FALSE;
- close(file);
- file = -1;
- }
-#endif /* USE_FILE_LOCK */
-
- return(file);
-#endif /* __WIN__ */
-}
-
-/***************************************************************************
-Deletes a file if it exists. The file has to be closed before calling this. */
-
-ibool
-os_file_delete_if_exists(
-/*=====================*/
- /* out: TRUE if success */
- const char* name) /* in: file path as a null-terminated string */
-{
-#ifdef __WIN__
- BOOL ret;
- ulint count = 0;
-loop:
- /* In Windows, deleting an .ibd file may fail if ibbackup is copying
- it */
-
- ret = DeleteFile((LPCTSTR)name);
-
- if (ret) {
- return(TRUE);
- }
-
- if (GetLastError() == ERROR_FILE_NOT_FOUND) {
- /* the file does not exist, this not an error */
-
- return(TRUE);
- }
-
- count++;
-
- if (count > 100 && 0 == (count % 10)) {
- fprintf(stderr,
- "InnoDB: Warning: cannot delete file %s\n"
- "InnoDB: Are you running ibbackup"
- " to back up the file?\n", name);
-
- os_file_get_last_error(TRUE); /* print error information */
- }
-
- os_thread_sleep(1000000); /* sleep for a second */
-
- if (count > 2000) {
-
- return(FALSE);
- }
-
- goto loop;
-#else
- int ret;
-
- ret = unlink((const char*)name);
-
- if (ret != 0 && errno != ENOENT) {
- os_file_handle_error_no_exit(name, "delete");
-
- return(FALSE);
- }
-
- return(TRUE);
-#endif
-}
-
-/***************************************************************************
-Deletes a file. The file has to be closed before calling this. */
-
-ibool
-os_file_delete(
-/*===========*/
- /* out: TRUE if success */
- const char* name) /* in: file path as a null-terminated string */
-{
-#ifdef __WIN__
- BOOL ret;
- ulint count = 0;
-loop:
- /* In Windows, deleting an .ibd file may fail if ibbackup is copying
- it */
-
- ret = DeleteFile((LPCTSTR)name);
-
- if (ret) {
- return(TRUE);
- }
-
- if (GetLastError() == ERROR_FILE_NOT_FOUND) {
- /* If the file does not exist, we classify this as a 'mild'
- error and return */
-
- return(FALSE);
- }
-
- count++;
-
- if (count > 100 && 0 == (count % 10)) {
- fprintf(stderr,
- "InnoDB: Warning: cannot delete file %s\n"
- "InnoDB: Are you running ibbackup"
- " to back up the file?\n", name);
-
- os_file_get_last_error(TRUE); /* print error information */
- }
-
- os_thread_sleep(1000000); /* sleep for a second */
-
- if (count > 2000) {
-
- return(FALSE);
- }
-
- goto loop;
-#else
- int ret;
-
- ret = unlink((const char*)name);
-
- if (ret != 0) {
- os_file_handle_error_no_exit(name, "delete");
-
- return(FALSE);
- }
-
- return(TRUE);
-#endif
-}
-
-/***************************************************************************
-Renames a file (can also move it to another directory). It is safest that the
-file is closed before calling this function. */
-
-ibool
-os_file_rename(
-/*===========*/
- /* out: TRUE if success */
- const char* oldpath,/* in: old file path as a null-terminated
- string */
- const char* newpath)/* in: new file path */
-{
-#ifdef __WIN__
- BOOL ret;
-
- ret = MoveFile((LPCTSTR)oldpath, (LPCTSTR)newpath);
-
- if (ret) {
- return(TRUE);
- }
-
- os_file_handle_error_no_exit(oldpath, "rename");
-
- return(FALSE);
-#else
- int ret;
-
- ret = rename((const char*)oldpath, (const char*)newpath);
-
- if (ret != 0) {
- os_file_handle_error_no_exit(oldpath, "rename");
-
- return(FALSE);
- }
-
- return(TRUE);
-#endif
-}
-
-/***************************************************************************
-Closes a file handle. In case of error, error number can be retrieved with
-os_file_get_last_error. */
-
-ibool
-os_file_close(
-/*==========*/
- /* out: TRUE if success */
- os_file_t file) /* in, own: handle to a file */
-{
-#ifdef __WIN__
- BOOL ret;
-
- ut_a(file);
-
- ret = CloseHandle(file);
-
- if (ret) {
- return(TRUE);
- }
-
- os_file_handle_error(NULL, "close");
-
- return(FALSE);
-#else
- int ret;
-
- ret = close(file);
-
- if (ret == -1) {
- os_file_handle_error(NULL, "close");
-
- return(FALSE);
- }
-
- return(TRUE);
-#endif
-}
-
-/***************************************************************************
-Closes a file handle. */
-
-ibool
-os_file_close_no_error_handling(
-/*============================*/
- /* out: TRUE if success */
- os_file_t file) /* in, own: handle to a file */
-{
-#ifdef __WIN__
- BOOL ret;
-
- ut_a(file);
-
- ret = CloseHandle(file);
-
- if (ret) {
- return(TRUE);
- }
-
- return(FALSE);
-#else
- int ret;
-
- ret = close(file);
-
- if (ret == -1) {
-
- return(FALSE);
- }
-
- return(TRUE);
-#endif
-}
-
-/***************************************************************************
-Gets a file size. */
-
-ibool
-os_file_get_size(
-/*=============*/
- /* out: TRUE if success */
- os_file_t file, /* in: handle to a file */
- ulint* size, /* out: least significant 32 bits of file
- size */
- ulint* size_high)/* out: most significant 32 bits of size */
-{
-#ifdef __WIN__
- DWORD high;
- DWORD low;
-
- low = GetFileSize(file, &high);
-
- if ((low == 0xFFFFFFFF) && (GetLastError() != NO_ERROR)) {
- return(FALSE);
- }
-
- *size = low;
- *size_high = high;
-
- return(TRUE);
-#else
- off_t offs;
-
- offs = lseek(file, 0, SEEK_END);
-
- if (offs == ((off_t)-1)) {
-
- return(FALSE);
- }
-
- if (sizeof(off_t) > 4) {
- *size = (ulint)(offs & 0xFFFFFFFFUL);
- *size_high = (ulint)(offs >> 32);
- } else {
- *size = (ulint) offs;
- *size_high = 0;
- }
-
- return(TRUE);
-#endif
-}
-
-/***************************************************************************
-Gets file size as a 64-bit integer ib_longlong. */
-
-ib_longlong
-os_file_get_size_as_iblonglong(
-/*===========================*/
- /* out: size in bytes, -1 if error */
- os_file_t file) /* in: handle to a file */
-{
- ulint size;
- ulint size_high;
- ibool success;
-
- success = os_file_get_size(file, &size, &size_high);
-
- if (!success) {
-
- return(-1);
- }
-
- return((((ib_longlong)size_high) << 32) + (ib_longlong)size);
-}
-
-/***************************************************************************
-Write the specified number of zeros to a newly created file. */
-
-ibool
-os_file_set_size(
-/*=============*/
- /* out: TRUE if success */
- const char* name, /* in: name of the file or path as a
- null-terminated string */
- os_file_t file, /* in: handle to a file */
- ulint size, /* in: least significant 32 bits of file
- size */
- ulint size_high)/* in: most significant 32 bits of size */
-{
- ib_longlong current_size;
- ib_longlong desired_size;
- ibool ret;
- byte* buf;
- byte* buf2;
- ulint buf_size;
-
- ut_a(size == (size & 0xFFFFFFFF));
-
- current_size = 0;
- desired_size = (ib_longlong)size + (((ib_longlong)size_high) << 32);
-
- /* Write up to 1 megabyte at a time. */
- buf_size = ut_min(64, (ulint) (desired_size / UNIV_PAGE_SIZE))
- * UNIV_PAGE_SIZE;
- buf2 = ut_malloc(buf_size + UNIV_PAGE_SIZE);
-
- /* Align the buffer for possible raw i/o */
- buf = ut_align(buf2, UNIV_PAGE_SIZE);
-
- /* Write buffer full of zeros */
- memset(buf, 0, buf_size);
-
- if (desired_size >= (ib_longlong)(100 * 1024 * 1024)) {
-
- fprintf(stderr, "InnoDB: Progress in MB:");
- }
-
- while (current_size < desired_size) {
- ulint n_bytes;
-
- if (desired_size - current_size < (ib_longlong) buf_size) {
- n_bytes = (ulint) (desired_size - current_size);
- } else {
- n_bytes = buf_size;
- }
-
- ret = os_file_write(name, file, buf,
- (ulint)(current_size & 0xFFFFFFFF),
- (ulint)(current_size >> 32),
- n_bytes);
- if (!ret) {
- ut_free(buf2);
- goto error_handling;
- }
-
- /* Print about progress for each 100 MB written */
- if ((ib_longlong) (current_size + n_bytes) / (ib_longlong)(100 * 1024 * 1024)
- != current_size / (ib_longlong)(100 * 1024 * 1024)) {
-
- fprintf(stderr, " %lu00",
- (ulong) ((current_size + n_bytes)
- / (ib_longlong)(100 * 1024 * 1024)));
- }
-
- current_size += n_bytes;
- }
-
- if (desired_size >= (ib_longlong)(100 * 1024 * 1024)) {
-
- fprintf(stderr, "\n");
- }
-
- ut_free(buf2);
-
- ret = os_file_flush(file);
-
- if (ret) {
- return(TRUE);
- }
-
-error_handling:
- return(FALSE);
-}
-
-/***************************************************************************
-Truncates a file at its current position. */
-
-ibool
-os_file_set_eof(
-/*============*/
- /* out: TRUE if success */
- FILE* file) /* in: file to be truncated */
-{
-#ifdef __WIN__
- HANDLE h = (HANDLE) _get_osfhandle(fileno(file));
- return(SetEndOfFile(h));
-#else /* __WIN__ */
- return(!ftruncate(fileno(file), ftell(file)));
-#endif /* __WIN__ */
-}
-
-#ifndef __WIN__
-/***************************************************************************
-Wrapper to fsync(2) that retries the call on some errors.
-Returns the value 0 if successful; otherwise the value -1 is returned and
-the global variable errno is set to indicate the error. */
-
-static
-int
-os_file_fsync(
-/*==========*/
- /* out: 0 if success, -1 otherwise */
- os_file_t file) /* in: handle to a file */
-{
- int ret;
- int failures;
- ibool retry;
-
- failures = 0;
-
- do {
- ret = fsync(file);
-
- os_n_fsyncs++;
-
- if (ret == -1 && errno == ENOLCK) {
-
- if (failures % 100 == 0) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: fsync(): "
- "No locks available; retrying\n");
- }
-
- os_thread_sleep(200000 /* 0.2 sec */);
-
- failures++;
-
- retry = TRUE;
- } else {
-
- retry = FALSE;
- }
- } while (retry);
-
- return(ret);
-}
-#endif /* !__WIN__ */
-
-/***************************************************************************
-Flushes the write buffers of a given file to the disk. */
-
-ibool
-os_file_flush(
-/*==========*/
- /* out: TRUE if success */
- os_file_t file) /* in, own: handle to a file */
-{
-#ifdef __WIN__
- BOOL ret;
-
- ut_a(file);
-
- os_n_fsyncs++;
-
- ret = FlushFileBuffers(file);
-
- if (ret) {
- return(TRUE);
- }
-
- /* Since Windows returns ERROR_INVALID_FUNCTION if the 'file' is
- actually a raw device, we choose to ignore that error if we are using
- raw disks */
-
- if (srv_start_raw_disk_in_use && GetLastError()
- == ERROR_INVALID_FUNCTION) {
- return(TRUE);
- }
-
- os_file_handle_error(NULL, "flush");
-
- /* It is a fatal error if a file flush does not succeed, because then
- the database can get corrupt on disk */
- ut_error;
-
- return(FALSE);
-#else
- int ret;
-
-#if defined(HAVE_DARWIN_THREADS)
-# ifndef F_FULLFSYNC
- /* The following definition is from the Mac OS X 10.3 <sys/fcntl.h> */
-# define F_FULLFSYNC 51 /* fsync + ask the drive to flush to the media */
-# elif F_FULLFSYNC != 51
-# error "F_FULLFSYNC != 51: ABI incompatibility with Mac OS X 10.3"
-# endif
- /* Apple has disabled fsync() for internal disk drives in OS X. That
- caused corruption for a user when he tested a power outage. Let us in
- OS X use a nonstandard flush method recommended by an Apple
- engineer. */
-
- if (!srv_have_fullfsync) {
- /* If we are not on an operating system that supports this,
- then fall back to a plain fsync. */
-
- ret = os_file_fsync(file);
- } else {
- ret = fcntl(file, F_FULLFSYNC, NULL);
-
- if (ret) {
- /* If we are not on a file system that supports this,
- then fall back to a plain fsync. */
- ret = os_file_fsync(file);
- }
- }
-#else
- ret = os_file_fsync(file);
-#endif
-
- if (ret == 0) {
- return(TRUE);
- }
-
- /* Since Linux returns EINVAL if the 'file' is actually a raw device,
- we choose to ignore that error if we are using raw disks */
-
- if (srv_start_raw_disk_in_use && errno == EINVAL) {
-
- return(TRUE);
- }
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Error: the OS said file flush did not succeed\n");
-
- os_file_handle_error(NULL, "flush");
-
- /* It is a fatal error if a file flush does not succeed, because then
- the database can get corrupt on disk */
- ut_error;
-
- return(FALSE);
-#endif
-}
-
-#ifndef __WIN__
-/***********************************************************************
-Does a synchronous read operation in Posix. */
-static
-ssize_t
-os_file_pread(
-/*==========*/
- /* out: number of bytes read, -1 if error */
- os_file_t file, /* in: handle to a file */
- void* buf, /* in: buffer where to read */
- ulint n, /* in: number of bytes to read */
- ulint offset, /* in: least significant 32 bits of file
- offset from where to read */
- ulint offset_high) /* in: most significant 32 bits of
- offset */
-{
- off_t offs;
- ssize_t n_bytes;
-
- ut_a((offset & 0xFFFFFFFFUL) == offset);
-
- /* If off_t is > 4 bytes in size, then we assume we can pass a
- 64-bit address */
-
- if (sizeof(off_t) > 4) {
- offs = (off_t)offset + (((off_t)offset_high) << 32);
-
- } else {
- offs = (off_t)offset;
-
- if (offset_high > 0) {
- fprintf(stderr,
- "InnoDB: Error: file read at offset > 4 GB\n");
- }
- }
-
- os_n_file_reads++;
-
-#if defined(HAVE_PREAD) && !defined(HAVE_BROKEN_PREAD)
- os_mutex_enter(os_file_count_mutex);
- os_file_n_pending_preads++;
- os_n_pending_reads++;
- os_mutex_exit(os_file_count_mutex);
-
- n_bytes = pread(file, buf, (ssize_t)n, offs);
-
- os_mutex_enter(os_file_count_mutex);
- os_file_n_pending_preads--;
- os_n_pending_reads--;
- os_mutex_exit(os_file_count_mutex);
-
- return(n_bytes);
-#else
- {
- off_t ret_offset;
- ssize_t ret;
- ulint i;
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_reads++;
- os_mutex_exit(os_file_count_mutex);
-
- /* Protect the seek / read operation with a mutex */
- i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
-
- os_mutex_enter(os_file_seek_mutexes[i]);
-
- ret_offset = lseek(file, offs, SEEK_SET);
-
- if (ret_offset < 0) {
- ret = -1;
- } else {
- ret = read(file, buf, (ssize_t)n);
- }
-
- os_mutex_exit(os_file_seek_mutexes[i]);
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_reads--;
- os_mutex_exit(os_file_count_mutex);
-
- return(ret);
- }
-#endif
-}
-
-/***********************************************************************
-Does a synchronous write operation in Posix. */
-static
-ssize_t
-os_file_pwrite(
-/*===========*/
- /* out: number of bytes written, -1 if error */
- os_file_t file, /* in: handle to a file */
- const void* buf, /* in: buffer from where to write */
- ulint n, /* in: number of bytes to write */
- ulint offset, /* in: least significant 32 bits of file
- offset where to write */
- ulint offset_high) /* in: most significant 32 bits of
- offset */
-{
- ssize_t ret;
- off_t offs;
-
- ut_a((offset & 0xFFFFFFFFUL) == offset);
-
- /* If off_t is > 4 bytes in size, then we assume we can pass a
- 64-bit address */
-
- if (sizeof(off_t) > 4) {
- offs = (off_t)offset + (((off_t)offset_high) << 32);
- } else {
- offs = (off_t)offset;
-
- if (offset_high > 0) {
- fprintf(stderr,
- "InnoDB: Error: file write"
- " at offset > 4 GB\n");
- }
- }
-
- os_n_file_writes++;
-
-#if defined(HAVE_PWRITE) && !defined(HAVE_BROKEN_PREAD)
- os_mutex_enter(os_file_count_mutex);
- os_file_n_pending_pwrites++;
- os_n_pending_writes++;
- os_mutex_exit(os_file_count_mutex);
-
- ret = pwrite(file, buf, (ssize_t)n, offs);
-
- os_mutex_enter(os_file_count_mutex);
- os_file_n_pending_pwrites--;
- os_n_pending_writes--;
- os_mutex_exit(os_file_count_mutex);
-
-# ifdef UNIV_DO_FLUSH
- if (srv_unix_file_flush_method != SRV_UNIX_LITTLESYNC
- && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
- && !os_do_not_call_flush_at_each_write) {
-
- /* Always do fsync to reduce the probability that when
- the OS crashes, a database page is only partially
- physically written to disk. */
-
- ut_a(TRUE == os_file_flush(file));
- }
-# endif /* UNIV_DO_FLUSH */
-
- return(ret);
-#else
- {
- off_t ret_offset;
- ulint i;
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_writes++;
- os_mutex_exit(os_file_count_mutex);
-
- /* Protect the seek / write operation with a mutex */
- i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
-
- os_mutex_enter(os_file_seek_mutexes[i]);
-
- ret_offset = lseek(file, offs, SEEK_SET);
-
- if (ret_offset < 0) {
- ret = -1;
-
- goto func_exit;
- }
-
- ret = write(file, buf, (ssize_t)n);
-
-# ifdef UNIV_DO_FLUSH
- if (srv_unix_file_flush_method != SRV_UNIX_LITTLESYNC
- && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
- && !os_do_not_call_flush_at_each_write) {
-
- /* Always do fsync to reduce the probability that when
- the OS crashes, a database page is only partially
- physically written to disk. */
-
- ut_a(TRUE == os_file_flush(file));
- }
-# endif /* UNIV_DO_FLUSH */
-
-func_exit:
- os_mutex_exit(os_file_seek_mutexes[i]);
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_writes--;
- os_mutex_exit(os_file_count_mutex);
-
- return(ret);
- }
-#endif
-}
-#endif
-
-/***********************************************************************
-Requests a synchronous positioned read operation. */
-
-ibool
-os_file_read(
-/*=========*/
- /* out: TRUE if request was
- successful, FALSE if fail */
- os_file_t file, /* in: handle to a file */
- void* buf, /* in: buffer where to read */
- ulint offset, /* in: least significant 32 bits of file
- offset where to read */
- ulint offset_high, /* in: most significant 32 bits of
- offset */
- ulint n) /* in: number of bytes to read */
-{
-#ifdef __WIN__
- BOOL ret;
- DWORD len;
- DWORD ret2;
- DWORD low;
- DWORD high;
- ibool retry;
- ulint i;
-
- ut_a((offset & 0xFFFFFFFFUL) == offset);
-
- os_n_file_reads++;
- os_bytes_read_since_printout += n;
-
-try_again:
- ut_ad(file);
- ut_ad(buf);
- ut_ad(n > 0);
-
- low = (DWORD) offset;
- high = (DWORD) offset_high;
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_reads++;
- os_mutex_exit(os_file_count_mutex);
-
- /* Protect the seek / read operation with a mutex */
- i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
-
- os_mutex_enter(os_file_seek_mutexes[i]);
-
- ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
-
- if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
-
- os_mutex_exit(os_file_seek_mutexes[i]);
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_reads--;
- os_mutex_exit(os_file_count_mutex);
-
- goto error_handling;
- }
-
- ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
-
- os_mutex_exit(os_file_seek_mutexes[i]);
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_reads--;
- os_mutex_exit(os_file_count_mutex);
-
- if (ret && len == n) {
- return(TRUE);
- }
-#else
- ibool retry;
- ssize_t ret;
-
- os_bytes_read_since_printout += n;
-
-try_again:
- ret = os_file_pread(file, buf, n, offset, offset_high);
-
- if ((ulint)ret == n) {
-
- return(TRUE);
- }
-
- fprintf(stderr,
- "InnoDB: Error: tried to read %lu bytes at offset %lu %lu.\n"
- "InnoDB: Was only able to read %ld.\n",
- (ulong)n, (ulong)offset_high,
- (ulong)offset, (long)ret);
-#endif
-#ifdef __WIN__
-error_handling:
-#endif
- retry = os_file_handle_error(NULL, "read");
-
- if (retry) {
- goto try_again;
- }
-
- fprintf(stderr,
- "InnoDB: Fatal error: cannot read from file."
- " OS error number %lu.\n",
-#ifdef __WIN__
- (ulong) GetLastError()
-#else
- (ulong) errno
-#endif
- );
- fflush(stderr);
-
- ut_error;
-
- return(FALSE);
-}
-
-/***********************************************************************
-Requests a synchronous positioned read operation. This function does not do
-any error handling. In case of error it returns FALSE. */
-
-ibool
-os_file_read_no_error_handling(
-/*===========================*/
- /* out: TRUE if request was
- successful, FALSE if fail */
- os_file_t file, /* in: handle to a file */
- void* buf, /* in: buffer where to read */
- ulint offset, /* in: least significant 32 bits of file
- offset where to read */
- ulint offset_high, /* in: most significant 32 bits of
- offset */
- ulint n) /* in: number of bytes to read */
-{
-#ifdef __WIN__
- BOOL ret;
- DWORD len;
- DWORD ret2;
- DWORD low;
- DWORD high;
- ibool retry;
- ulint i;
-
- ut_a((offset & 0xFFFFFFFFUL) == offset);
-
- os_n_file_reads++;
- os_bytes_read_since_printout += n;
-
-try_again:
- ut_ad(file);
- ut_ad(buf);
- ut_ad(n > 0);
-
- low = (DWORD) offset;
- high = (DWORD) offset_high;
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_reads++;
- os_mutex_exit(os_file_count_mutex);
-
- /* Protect the seek / read operation with a mutex */
- i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
-
- os_mutex_enter(os_file_seek_mutexes[i]);
-
- ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
-
- if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
-
- os_mutex_exit(os_file_seek_mutexes[i]);
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_reads--;
- os_mutex_exit(os_file_count_mutex);
-
- goto error_handling;
- }
-
- ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
-
- os_mutex_exit(os_file_seek_mutexes[i]);
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_reads--;
- os_mutex_exit(os_file_count_mutex);
-
- if (ret && len == n) {
- return(TRUE);
- }
-#else
- ibool retry;
- ssize_t ret;
-
- os_bytes_read_since_printout += n;
-
-try_again:
- ret = os_file_pread(file, buf, n, offset, offset_high);
-
- if ((ulint)ret == n) {
-
- return(TRUE);
- }
-#endif
-#ifdef __WIN__
-error_handling:
-#endif
- retry = os_file_handle_error_no_exit(NULL, "read");
-
- if (retry) {
- goto try_again;
- }
-
- return(FALSE);
-}
-
-/***********************************************************************
-Rewind file to its start, read at most size - 1 bytes from it to str, and
-NUL-terminate str. All errors are silently ignored. This function is
-mostly meant to be used with temporary files. */
-
-void
-os_file_read_string(
-/*================*/
- FILE* file, /* in: file to read from */
- char* str, /* in: buffer where to read */
- ulint size) /* in: size of buffer */
-{
- size_t flen;
-
- if (size == 0) {
- return;
- }
-
- rewind(file);
- flen = fread(str, 1, size - 1, file);
- str[flen] = '\0';
-}
-
-/***********************************************************************
-Requests a synchronous write operation. */
-
-ibool
-os_file_write(
-/*==========*/
- /* out: TRUE if request was
- successful, FALSE if fail */
- const char* name, /* in: name of the file or path as a
- null-terminated string */
- os_file_t file, /* in: handle to a file */
- const void* buf, /* in: buffer from which to write */
- ulint offset, /* in: least significant 32 bits of file
- offset where to write */
- ulint offset_high, /* in: most significant 32 bits of
- offset */
- ulint n) /* in: number of bytes to write */
-{
-#ifdef __WIN__
- BOOL ret;
- DWORD len;
- DWORD ret2;
- DWORD low;
- DWORD high;
- ulint i;
- ulint n_retries = 0;
- ulint err;
-
- ut_a((offset & 0xFFFFFFFF) == offset);
-
- os_n_file_writes++;
-
- ut_ad(file);
- ut_ad(buf);
- ut_ad(n > 0);
-retry:
- low = (DWORD) offset;
- high = (DWORD) offset_high;
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_writes++;
- os_mutex_exit(os_file_count_mutex);
-
- /* Protect the seek / write operation with a mutex */
- i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
-
- os_mutex_enter(os_file_seek_mutexes[i]);
-
- ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
-
- if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
-
- os_mutex_exit(os_file_seek_mutexes[i]);
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_writes--;
- os_mutex_exit(os_file_count_mutex);
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Error: File pointer positioning to"
- " file %s failed at\n"
- "InnoDB: offset %lu %lu. Operating system"
- " error number %lu.\n"
- "InnoDB: Some operating system error numbers"
- " are described at\n"
- "InnoDB: "
- "http://dev.mysql.com/doc/refman/5.1/en/"
- "operating-system-error-codes.html\n",
- name, (ulong) offset_high, (ulong) offset,
- (ulong) GetLastError());
-
- return(FALSE);
- }
-
- ret = WriteFile(file, buf, (DWORD) n, &len, NULL);
-
- /* Always do fsync to reduce the probability that when the OS crashes,
- a database page is only partially physically written to disk. */
-
-# ifdef UNIV_DO_FLUSH
- if (!os_do_not_call_flush_at_each_write) {
- ut_a(TRUE == os_file_flush(file));
- }
-# endif /* UNIV_DO_FLUSH */
-
- os_mutex_exit(os_file_seek_mutexes[i]);
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_writes--;
- os_mutex_exit(os_file_count_mutex);
-
- if (ret && len == n) {
-
- return(TRUE);
- }
-
- /* If some background file system backup tool is running, then, at
- least in Windows 2000, we may get here a specific error. Let us
- retry the operation 100 times, with 1 second waits. */
-
- if (GetLastError() == ERROR_LOCK_VIOLATION && n_retries < 100) {
-
- os_thread_sleep(1000000);
-
- n_retries++;
-
- goto retry;
- }
-
- if (!os_has_said_disk_full) {
-
- err = (ulint)GetLastError();
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Error: Write to file %s failed"
- " at offset %lu %lu.\n"
- "InnoDB: %lu bytes should have been written,"
- " only %lu were written.\n"
- "InnoDB: Operating system error number %lu.\n"
- "InnoDB: Check that your OS and file system"
- " support files of this size.\n"
- "InnoDB: Check also that the disk is not full"
- " or a disk quota exceeded.\n",
- name, (ulong) offset_high, (ulong) offset,
- (ulong) n, (ulong) len, (ulong) err);
-
- if (strerror((int)err) != NULL) {
- fprintf(stderr,
- "InnoDB: Error number %lu means '%s'.\n",
- (ulong) err, strerror((int)err));
- }
-
- fprintf(stderr,
- "InnoDB: Some operating system error numbers"
- " are described at\n"
- "InnoDB: "
- "http://dev.mysql.com/doc/refman/5.1/en/"
- "operating-system-error-codes.html\n");
-
- os_has_said_disk_full = TRUE;
- }
-
- return(FALSE);
-#else
- ssize_t ret;
-
- ret = os_file_pwrite(file, buf, n, offset, offset_high);
-
- if ((ulint)ret == n) {
-
- return(TRUE);
- }
-
- if (!os_has_said_disk_full) {
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Error: Write to file %s failed"
- " at offset %lu %lu.\n"
- "InnoDB: %lu bytes should have been written,"
- " only %ld were written.\n"
- "InnoDB: Operating system error number %lu.\n"
- "InnoDB: Check that your OS and file system"
- " support files of this size.\n"
- "InnoDB: Check also that the disk is not full"
- " or a disk quota exceeded.\n",
- name, offset_high, offset, n, (long int)ret,
- (ulint)errno);
- if (strerror(errno) != NULL) {
- fprintf(stderr,
- "InnoDB: Error number %lu means '%s'.\n",
- (ulint)errno, strerror(errno));
- }
-
- fprintf(stderr,
- "InnoDB: Some operating system error numbers"
- " are described at\n"
- "InnoDB: "
- "http://dev.mysql.com/doc/refman/5.1/en/"
- "operating-system-error-codes.html\n");
-
- os_has_said_disk_full = TRUE;
- }
-
- return(FALSE);
-#endif
-}
-
-/***********************************************************************
-Check the existence and type of the given file. */
-
-ibool
-os_file_status(
-/*===========*/
- /* out: TRUE if call succeeded */
- const char* path, /* in: pathname of the file */
- ibool* exists, /* out: TRUE if file exists */
- os_file_type_t* type) /* out: type of the file (if it exists) */
-{
-#ifdef __WIN__
- int ret;
- struct _stat statinfo;
-
- ret = _stat(path, &statinfo);
- if (ret && (errno == ENOENT || errno == ENOTDIR)) {
- /* file does not exist */
- *exists = FALSE;
- return(TRUE);
- } else if (ret) {
- /* file exists, but stat call failed */
-
- os_file_handle_error_no_exit(path, "stat");
-
- return(FALSE);
- }
-
- if (_S_IFDIR & statinfo.st_mode) {
- *type = OS_FILE_TYPE_DIR;
- } else if (_S_IFREG & statinfo.st_mode) {
- *type = OS_FILE_TYPE_FILE;
- } else {
- *type = OS_FILE_TYPE_UNKNOWN;
- }
-
- *exists = TRUE;
-
- return(TRUE);
-#else
- int ret;
- struct stat statinfo;
-
- ret = stat(path, &statinfo);
- if (ret && (errno == ENOENT || errno == ENOTDIR)) {
- /* file does not exist */
- *exists = FALSE;
- return(TRUE);
- } else if (ret) {
- /* file exists, but stat call failed */
-
- os_file_handle_error_no_exit(path, "stat");
-
- return(FALSE);
- }
-
- if (S_ISDIR(statinfo.st_mode)) {
- *type = OS_FILE_TYPE_DIR;
- } else if (S_ISLNK(statinfo.st_mode)) {
- *type = OS_FILE_TYPE_LINK;
- } else if (S_ISREG(statinfo.st_mode)) {
- *type = OS_FILE_TYPE_FILE;
- } else {
- *type = OS_FILE_TYPE_UNKNOWN;
- }
-
- *exists = TRUE;
-
- return(TRUE);
-#endif
-}
-
-/***********************************************************************
-This function returns information about the specified file */
-
-ibool
-os_file_get_status(
-/*===============*/
- /* out: TRUE if stat
- information found */
- const char* path, /* in: pathname of the file */
- os_file_stat_t* stat_info) /* information of a file in a
- directory */
-{
-#ifdef __WIN__
- int ret;
- struct _stat statinfo;
-
- ret = _stat(path, &statinfo);
- if (ret && (errno == ENOENT || errno == ENOTDIR)) {
- /* file does not exist */
-
- return(FALSE);
- } else if (ret) {
- /* file exists, but stat call failed */
-
- os_file_handle_error_no_exit(path, "stat");
-
- return(FALSE);
- }
- if (_S_IFDIR & statinfo.st_mode) {
- stat_info->type = OS_FILE_TYPE_DIR;
- } else if (_S_IFREG & statinfo.st_mode) {
- stat_info->type = OS_FILE_TYPE_FILE;
- } else {
- stat_info->type = OS_FILE_TYPE_UNKNOWN;
- }
-
- stat_info->ctime = statinfo.st_ctime;
- stat_info->atime = statinfo.st_atime;
- stat_info->mtime = statinfo.st_mtime;
- stat_info->size = statinfo.st_size;
-
- return(TRUE);
-#else
- int ret;
- struct stat statinfo;
-
- ret = stat(path, &statinfo);
-
- if (ret && (errno == ENOENT || errno == ENOTDIR)) {
- /* file does not exist */
-
- return(FALSE);
- } else if (ret) {
- /* file exists, but stat call failed */
-
- os_file_handle_error_no_exit(path, "stat");
-
- return(FALSE);
- }
-
- if (S_ISDIR(statinfo.st_mode)) {
- stat_info->type = OS_FILE_TYPE_DIR;
- } else if (S_ISLNK(statinfo.st_mode)) {
- stat_info->type = OS_FILE_TYPE_LINK;
- } else if (S_ISREG(statinfo.st_mode)) {
- stat_info->type = OS_FILE_TYPE_FILE;
- } else {
- stat_info->type = OS_FILE_TYPE_UNKNOWN;
- }
-
- stat_info->ctime = statinfo.st_ctime;
- stat_info->atime = statinfo.st_atime;
- stat_info->mtime = statinfo.st_mtime;
- stat_info->size = statinfo.st_size;
-
- return(TRUE);
-#endif
-}
-
-/* path name separator character */
-#ifdef __WIN__
-# define OS_FILE_PATH_SEPARATOR '\\'
-#else
-# define OS_FILE_PATH_SEPARATOR '/'
-#endif
-
-/********************************************************************
-The function os_file_dirname returns a directory component of a
-null-terminated pathname string. In the usual case, dirname returns
-the string up to, but not including, the final '/', and basename
-is the component following the final '/'. Trailing '/' charac­
-ters are not counted as part of the pathname.
-
-If path does not contain a slash, dirname returns the string ".".
-
-Concatenating the string returned by dirname, a "/", and the basename
-yields a complete pathname.
-
-The return value is a copy of the directory component of the pathname.
-The copy is allocated from heap. It is the caller responsibility
-to free it after it is no longer needed.
-
-The following list of examples (taken from SUSv2) shows the strings
-returned by dirname and basename for different paths:
-
- path dirname basename
- "/usr/lib" "/usr" "lib"
- "/usr/" "/" "usr"
- "usr" "." "usr"
- "/" "/" "/"
- "." "." "."
- ".." "." ".."
-*/
-
-char*
-os_file_dirname(
-/*============*/
- /* out, own: directory component of the
- pathname */
- const char* path) /* in: pathname */
-{
- /* Find the offset of the last slash */
- const char* last_slash = strrchr(path, OS_FILE_PATH_SEPARATOR);
- if (!last_slash) {
- /* No slash in the path, return "." */
-
- return(mem_strdup("."));
- }
-
- /* Ok, there is a slash */
-
- if (last_slash == path) {
- /* last slash is the first char of the path */
-
- return(mem_strdup("/"));
- }
-
- /* Non-trivial directory component */
-
- return(mem_strdupl(path, last_slash - path));
-}
-
-/********************************************************************
-Creates all missing subdirectories along the given path. */
-
-ibool
-os_file_create_subdirs_if_needed(
-/*=============================*/
- /* out: TRUE if call succeeded
- FALSE otherwise */
- const char* path) /* in: path name */
-{
- char* subdir;
- ibool success, subdir_exists;
- os_file_type_t type;
-
- subdir = os_file_dirname(path);
- if (strlen(subdir) == 1
- && (*subdir == OS_FILE_PATH_SEPARATOR || *subdir == '.')) {
- /* subdir is root or cwd, nothing to do */
- mem_free(subdir);
-
- return(TRUE);
- }
-
- /* Test if subdir exists */
- success = os_file_status(subdir, &subdir_exists, &type);
- if (success && !subdir_exists) {
- /* subdir does not exist, create it */
- success = os_file_create_subdirs_if_needed(subdir);
- if (!success) {
- mem_free(subdir);
-
- return(FALSE);
- }
- success = os_file_create_directory(subdir, FALSE);
- }
-
- mem_free(subdir);
-
- return(success);
-}
-
-/********************************************************************
-Returns a pointer to the nth slot in the aio array. */
-static
-os_aio_slot_t*
-os_aio_array_get_nth_slot(
-/*======================*/
- /* out: pointer to slot */
- os_aio_array_t* array, /* in: aio array */
- ulint index) /* in: index of the slot */
-{
- ut_a(index < array->n_slots);
-
- return((array->slots) + index);
-}
-
-/****************************************************************************
-Creates an aio wait array. */
-static
-os_aio_array_t*
-os_aio_array_create(
-/*================*/
- /* out, own: aio array */
- ulint n) /* in: maximum number of pending aio operations
- allowed */
-{
- os_aio_array_t* array;
- ulint i;
- os_aio_slot_t* slot;
-#ifdef WIN_ASYNC_IO
- OVERLAPPED* over;
-#endif
- ut_a(n > 0);
-
- array = ut_malloc(sizeof(os_aio_array_t));
-
- array->mutex = os_mutex_create(NULL);
- array->not_full = os_event_create(NULL);
- array->is_empty = os_event_create(NULL);
-
- os_event_set(array->is_empty);
-
- array->n_slots = n;
- array->n_reserved = 0;
- array->slots = ut_malloc(n * sizeof(os_aio_slot_t));
-#ifdef __WIN__
- array->native_events = ut_malloc(n * sizeof(os_native_event_t));
-#endif
- for (i = 0; i < n; i++) {
- slot = os_aio_array_get_nth_slot(array, i);
-
- slot->pos = i;
- slot->reserved = FALSE;
-#ifdef WIN_ASYNC_IO
- slot->event = os_event_create(NULL);
-
- over = &(slot->control);
-
- over->hEvent = slot->event->handle;
-
- *((array->native_events) + i) = over->hEvent;
-#endif
- }
-
- return(array);
-}
-
-/****************************************************************************
-Initializes the asynchronous io system. Calls also os_io_init_simple.
-Creates an aio array for each of non-ibuf read, non-ibuf write, ibuf IO,
-log IO, and synchronous IO. The caller must create i/o handler thread for all
-but the synchronous aio array. Multiple threads can access the same array for
-the non-ibuf read (prefetch) and write (flush dirty buffer pages) arrays.
-Return the number of AIO handler threads. */
-
-ulint
-os_aio_init(
-/*========*/
- ulint ios_per_array, /* in: maximum number of pending aio operations
- allowed per array */
- ulint n_read_threads, /* in: number of read threads */
- ulint n_write_threads, /* in: number of write threads */
- ulint n_slots_sync) /* in: number of slots in the sync aio array */
-{
- ulint i;
- ulint n_segments = 2 + n_read_threads + n_write_threads;
-#ifdef POSIX_ASYNC_IO
- sigset_t sigset;
-#endif
- ut_a(ios_per_array >= OS_AIO_N_PENDING_IOS_PER_THREAD);
- ut_a(n_read_threads >= 1 && n_read_threads <= 64);
- ut_a(n_write_threads >= 1 && n_write_threads <= 64);
- ut_a(n_segments < SRV_MAX_N_IO_THREADS);
-
- os_io_init_simple();
-
- for (i = 0; i < n_segments; i++) {
- srv_set_io_thread_op_info(i, "not started yet");
- os_aio_thread_io_reads[i] = 0;
- os_aio_thread_io_writes[i] = 0;
- os_aio_thread_io_requests[i] = 0;
- os_aio_thread_buffer[i] = 0;
- os_aio_thread_buffer_size[i] = 0;
- os_aio_thread_io_wait[i] = 0;
- os_aio_thread_max_io_wait[i] = 0;
- }
-
- os_aio_read_threads = n_read_threads;
- os_aio_write_threads = n_write_threads;
- os_aio_first_write_segment = os_aio_first_read_segment + os_aio_read_threads;
-
- fprintf(stderr,
- "InnoDB: ios_per_array %lu read threads %lu write threads %lu\n",
- ios_per_array, os_aio_read_threads, os_aio_write_threads);
-
- os_aio_ibuf_array = os_aio_array_create(ios_per_array);
-
- srv_io_thread_function[0] = "insert buffer thread";
-
- os_aio_log_array = os_aio_array_create(ios_per_array);
-
- srv_io_thread_function[1] = "log thread";
-
- os_aio_read_array = os_aio_array_create(ios_per_array);
- for (i = os_aio_first_read_segment; i < os_aio_first_write_segment; i++) {
- ut_a(i < SRV_MAX_N_IO_THREADS);
- srv_io_thread_function[i] = "read thread";
- }
-
- os_aio_write_array = os_aio_array_create(ios_per_array);
- for (i = os_aio_first_write_segment; i < n_segments; i++) {
- ut_a(i < SRV_MAX_N_IO_THREADS);
- srv_io_thread_function[i] = "write thread";
- }
-
- os_aio_sync_array = os_aio_array_create(n_slots_sync);
-
- os_aio_n_segments = 2 + os_aio_read_threads + os_aio_write_threads;
-
- os_aio_validate();
-
- os_aio_segment_wait_events = ut_malloc(n_segments * sizeof(void*));
-
- for (i = 0; i < n_segments; i++) {
- os_aio_segment_wait_events[i] = os_event_create(NULL);
- }
-
- os_last_printout = time(NULL);
-
-#ifdef POSIX_ASYNC_IO
- /* Block aio signals from the current thread and its children:
- for this to work, the current thread must be the first created
- in the database, so that all its children will inherit its
- signal mask */
-
- /* TODO: to work MySQL needs the SIGALARM signal; the following
- will not work yet! */
- sigemptyset(&sigset);
- sigaddset(&sigset, SIGRTMIN + 1 + 0);
- sigaddset(&sigset, SIGRTMIN + 1 + 1);
- sigaddset(&sigset, SIGRTMIN + 1 + 2);
- sigaddset(&sigset, SIGRTMIN + 1 + 3);
-
- pthread_sigmask(SIG_BLOCK, &sigset, NULL); */
-#endif
- return os_aio_n_segments;
-}
-
-#ifdef WIN_ASYNC_IO
-/****************************************************************************
-Wakes up all async i/o threads in the array in Windows async i/o at
-shutdown. */
-static
-void
-os_aio_array_wake_win_aio_at_shutdown(
-/*==================================*/
- os_aio_array_t* array) /* in: aio array */
-{
- ulint i;
-
- for (i = 0; i < array->n_slots; i++) {
-
- os_event_set((array->slots + i)->event);
- }
-}
-#endif
-
-/****************************************************************************
-Wakes up all async i/o threads so that they know to exit themselves in
-shutdown. */
-
-void
-os_aio_wake_all_threads_at_shutdown(void)
-/*=====================================*/
-{
- ulint i;
-
-#ifdef WIN_ASYNC_IO
- /* This code wakes up all ai/o threads in Windows native aio */
- os_aio_array_wake_win_aio_at_shutdown(os_aio_read_array);
- os_aio_array_wake_win_aio_at_shutdown(os_aio_write_array);
- os_aio_array_wake_win_aio_at_shutdown(os_aio_ibuf_array);
- os_aio_array_wake_win_aio_at_shutdown(os_aio_log_array);
-#endif
- /* This loop wakes up all simulated ai/o threads */
-
- for (i = 0; i < os_aio_n_segments; i++) {
-
- os_event_set(os_aio_segment_wait_events[i]);
- }
-}
-
-/****************************************************************************
-Waits until there are no pending writes in os_aio_write_array. There can
-be other, synchronous, pending writes. */
-
-void
-os_aio_wait_until_no_pending_writes(void)
-/*=====================================*/
-{
- os_event_wait(os_aio_write_array->is_empty);
-}
-
-/**************************************************************************
-Calculates aio array from global segment number. */
-static
-os_aio_array_t*
-os_aio_get_array(
-/*===============================*/
- /* out: aio wait array */
- ulint global_segment)/* in: global segment number */
-{
- ut_a(global_segment < os_aio_n_segments);
-
- if (global_segment == 0) {
- return os_aio_ibuf_array;
-
- } else if (global_segment == 1) {
- return os_aio_log_array;
-
- } else if (global_segment < os_aio_first_write_segment) {
- return os_aio_read_array;
-
- } else {
- return os_aio_write_array;
- }
-}
-
-/***********************************************************************
-Gets an integer value designating a specified aio array. This is used
-to give numbers to signals in Posix aio. */
-
-#if !defined(WIN_ASYNC_IO) && defined(POSIX_ASYNC_IO)
-static
-ulint
-os_aio_get_array_no(
-/*================*/
- os_aio_array_t* array) /* in: aio array */
-{
- if (array == os_aio_ibuf_array) {
-
- return(0);
-
- } else if (array == os_aio_log_array) {
-
- return(1);
-
- } else if (array == os_aio_read_array) {
-
- return(2);
- } else if (array == os_aio_write_array) {
-
- return(3);
- } else {
- ut_error;
-
- return(0);
- }
-}
-
-/***********************************************************************
-Gets the aio array for its number. */
-static
-os_aio_array_t*
-os_aio_get_array_from_no(
-/*=====================*/
- /* out: aio array */
- ulint n) /* in: array number */
-{
- if (n == 0) {
- return(os_aio_ibuf_array);
- } else if (n == 1) {
-
- return(os_aio_log_array);
- } else if (n == 2) {
-
- return(os_aio_read_array);
- } else if (n == 3) {
-
- return(os_aio_write_array);
- } else {
- ut_error;
-
- return(NULL);
- }
-}
-#endif /* if !defined(WIN_ASYNC_IO) && defined(POSIX_ASYNC_IO) */
-
-/***********************************************************************
-Requests for a slot in the aio array. If no slot is available, waits until
-not_full-event becomes signaled. */
-static
-os_aio_slot_t*
-os_aio_array_reserve_slot(
-/*======================*/
- /* out: pointer to slot */
- ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE */
- os_aio_array_t* array, /* in: aio array */
- fil_node_t* message1,/* in: message to be passed along with
- the aio operation */
- void* message2,/* in: message to be passed along with
- the aio operation */
- os_file_t file, /* in: file handle */
- const char* name, /* in: name of the file or path as a
- null-terminated string */
- void* buf, /* in: buffer where to read or from which
- to write */
- ulint offset, /* in: least significant 32 bits of file
- offset */
- ulint offset_high, /* in: most significant 32 bits of
- offset */
- ulint len) /* in: length of the block to read or write */
-{
- os_aio_slot_t* slot;
-#ifdef WIN_ASYNC_IO
- OVERLAPPED* control;
-
-#elif defined(POSIX_ASYNC_IO)
-
- struct aiocb* control;
-#endif
- ulint i;
-loop:
- os_mutex_enter(array->mutex);
-
- if (array->n_reserved == array->n_slots) {
- os_mutex_exit(array->mutex);
-
- if (!os_aio_use_native_aio) {
- /* If the handler threads are suspended, wake them
- so that we get more slots */
-
- os_aio_simulated_wake_handler_threads();
- }
-
- os_event_wait(array->not_full);
-
- goto loop;
- }
-
- for (i = 0;; i++) {
- slot = os_aio_array_get_nth_slot(array, i);
-
- if (slot->reserved == FALSE) {
- break;
- }
- }
- ut_a(i < array->n_slots);
- array->n_reserved++;
-
- if (array->n_reserved == 1) {
- os_event_reset(array->is_empty);
- }
-
- if (array->n_reserved == array->n_slots) {
- os_event_reset(array->not_full);
- }
-
- slot->reserved = TRUE;
- slot->reservation_time = time(NULL);
- slot->message1 = message1;
- slot->message2 = message2;
- slot->file = file;
- slot->name = name;
- slot->len = len;
- slot->type = type;
- slot->buf = buf;
- slot->offset = offset;
- slot->offset_high = offset_high;
- slot->status = OS_AIO_NOT_ISSUED;
-
-#ifdef WIN_ASYNC_IO
- control = &(slot->control);
- control->Offset = (DWORD)offset;
- control->OffsetHigh = (DWORD)offset_high;
- os_event_reset(slot->event);
-
-#elif defined(POSIX_ASYNC_IO)
-
-#if (UNIV_WORD_SIZE == 8)
- offset = offset + (offset_high << 32);
-#else
- ut_a(offset_high == 0);
-#endif
- control = &(slot->control);
- control->aio_fildes = file;
- control->aio_buf = buf;
- control->aio_nbytes = len;
- control->aio_offset = offset;
- control->aio_reqprio = 0;
- control->aio_sigevent.sigev_notify = SIGEV_SIGNAL;
- control->aio_sigevent.sigev_signo
- = SIGRTMIN + 1 + os_aio_get_array_no(array);
- /* TODO: How to choose the signal numbers? */
- /*
- fprintf(stderr, "AIO signal number %lu\n",
- (ulint) control->aio_sigevent.sigev_signo);
- */
- control->aio_sigevent.sigev_value.sival_ptr = slot;
-#endif
- os_mutex_exit(array->mutex);
-
- return(slot);
-}
-
-/***********************************************************************
-Frees a slot in the aio array. */
-static
-void
-os_aio_array_free_slot(
-/*===================*/
- os_aio_array_t* array, /* in: aio array */
- os_aio_slot_t* slot) /* in: pointer to slot */
-{
- ut_ad(array);
- ut_ad(slot);
-
- os_mutex_enter(array->mutex);
-
- ut_ad(slot->reserved);
-
- slot->reserved = FALSE;
- slot->status = OS_AIO_NOT_ISSUED;
-
- array->n_reserved--;
-
- if (array->n_reserved == array->n_slots - 1) {
- os_event_set(array->not_full);
- }
-
- if (array->n_reserved == 0) {
- os_event_set(array->is_empty);
- }
-
-#ifdef WIN_ASYNC_IO
- os_event_reset(slot->event);
-#endif
- os_mutex_exit(array->mutex);
-}
-
-/**************************************************************************
-Wakes up a simulated aio i/o-handler thread if it has something to do. */
-static
-void
-os_aio_simulated_wake_handler_thread(
-/*=================================*/
- os_aio_array_t* array) /* in: aio array for which wakeup is done */
-{
- os_aio_slot_t* slot;
- ulint n;
- ulint i;
-
- ut_ad(!os_aio_use_native_aio);
- n = array->n_slots;
-
- /* Look through n slots */
-
- os_mutex_enter(array->mutex);
-
- for (i = 0; i < n; i++) {
- slot = os_aio_array_get_nth_slot(array, i);
-
- if (slot->reserved &&
- (slot->status == OS_AIO_NOT_ISSUED ||
- slot->status == OS_AIO_DONE)) {
- /* Found an i/o request
- OS_AIO_NOT_ISSUED means the read or write request has
- * yet to be done. OS_AIO_DONE means the request has been
- * done but it was part of a set of requests merged into
- * one read or write call and was not the first block in
- * the request, so the handling of the IO completion for
- * that block has not been done. */
- break;
- }
- }
-
- os_mutex_exit(array->mutex);
-
- if (i < n) {
- if (array == os_aio_ibuf_array) {
- os_event_set(os_aio_segment_wait_events[0]);
-
- } else if (array == os_aio_log_array) {
- os_event_set(os_aio_segment_wait_events[1]);
-
- } else if (array == os_aio_read_array) {
- ulint x;
- for (x = os_aio_first_read_segment; x < os_aio_first_write_segment; x++)
- os_event_set(os_aio_segment_wait_events[x]);
-
- } else if (array == os_aio_write_array) {
- ulint x;
- for (x = os_aio_first_write_segment; x < os_aio_n_segments; x++)
- os_event_set(os_aio_segment_wait_events[x]);
-
- } else {
- ut_a(0);
- }
- }
-}
-
-/**************************************************************************
-Wakes up simulated aio i/o-handler threads if they have something to do. */
-
-void
-os_aio_simulated_wake_handler_threads(void)
-/*=======================================*/
-{
- if (os_aio_use_native_aio) {
- /* We do not use simulated aio: do nothing */
-
- return;
- }
-
- os_aio_recommend_sleep_for_read_threads = FALSE;
-
- os_aio_simulated_wake_handler_thread(os_aio_ibuf_array);
- os_aio_simulated_wake_handler_thread(os_aio_log_array);
- os_aio_simulated_wake_handler_thread(os_aio_read_array);
- os_aio_simulated_wake_handler_thread(os_aio_write_array);
-}
-
-/**************************************************************************
-This function can be called if one wants to post a batch of reads and
-prefers an i/o-handler thread to handle them all at once later. You must
-call os_aio_simulated_wake_handler_threads later to ensure the threads
-are not left sleeping! */
-
-void
-os_aio_simulated_put_read_threads_to_sleep(void)
-/*============================================*/
-{
- ulint g;
-
- /* TODO(mcallaghan): provide similar function for write? */
- os_aio_recommend_sleep_for_read_threads = TRUE;
-
- for (g = os_aio_first_read_segment; g < os_aio_first_write_segment; g++) {
- os_event_reset(os_aio_segment_wait_events[g]);
- }
-}
-
-/***********************************************************************
-Requests an asynchronous i/o operation. */
-
-ibool
-os_aio(
-/*===*/
- /* out: TRUE if request was queued
- successfully, FALSE if fail */
- ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE */
- ulint mode, /* in: OS_AIO_NORMAL, ..., possibly ORed
- to OS_AIO_SIMULATED_WAKE_LATER: the
- last flag advises this function not to wake
- i/o-handler threads, but the caller will
- do the waking explicitly later, in this
- way the caller can post several requests in
- a batch; NOTE that the batch must not be
- so big that it exhausts the slots in aio
- arrays! NOTE that a simulated batch
- may introduce hidden chances of deadlocks,
- because i/os are not actually handled until
- all have been posted: use with great
- caution! */
- const char* name, /* in: name of the file or path as a
- null-terminated string */
- os_file_t file, /* in: handle to a file */
- void* buf, /* in: buffer where to read or from which
- to write */
- ulint offset, /* in: least significant 32 bits of file
- offset where to read or write */
- ulint offset_high, /* in: most significant 32 bits of
- offset */
- ulint n, /* in: number of bytes to read or write */
- fil_node_t* message1,/* in: messages for the aio handler (these
- can be used to identify a completed aio
- operation); if mode is OS_AIO_SYNC, these
- are ignored */
- void* message2)
-{
- os_aio_array_t* array;
- os_aio_slot_t* slot;
-#ifdef WIN_ASYNC_IO
- ibool retval;
- BOOL ret = TRUE;
- DWORD len = (DWORD) n;
- struct fil_node_struct * dummy_mess1;
- void* dummy_mess2;
- ulint dummy_type;
-#endif
- ulint err = 0;
- ibool retry;
- ulint wake_later;
-
- ut_ad(file);
- ut_ad(buf);
- ut_ad(n > 0);
- ut_ad(n % OS_FILE_LOG_BLOCK_SIZE == 0);
- ut_ad(offset % OS_FILE_LOG_BLOCK_SIZE == 0);
- ut_ad(os_aio_validate());
-
- wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
- mode = mode & (~OS_AIO_SIMULATED_WAKE_LATER);
-
- if (mode == OS_AIO_SYNC
-#ifdef WIN_ASYNC_IO
- && !os_aio_use_native_aio
-#endif
- ) {
- /* This is actually an ordinary synchronous read or write:
- no need to use an i/o-handler thread. NOTE that if we use
- Windows async i/o, Windows does not allow us to use
- ordinary synchronous os_file_read etc. on the same file,
- therefore we have built a special mechanism for synchronous
- wait in the Windows case. */
-
- if (type == OS_FILE_READ) {
- return(os_file_read(file, buf, offset,
- offset_high, n));
- }
-
- ut_a(type == OS_FILE_WRITE);
-
- return(os_file_write(name, file, buf, offset, offset_high, n));
- }
-
-try_again:
- if (mode == OS_AIO_NORMAL) {
- if (type == OS_FILE_READ) {
- array = os_aio_read_array;
- } else {
- array = os_aio_write_array;
- }
- } else if (mode == OS_AIO_IBUF) {
- ut_ad(type == OS_FILE_READ);
- /* Reduce probability of deadlock bugs in connection with ibuf:
- do not let the ibuf i/o handler sleep */
-
- wake_later = FALSE;
-
- array = os_aio_ibuf_array;
- } else if (mode == OS_AIO_LOG) {
-
- array = os_aio_log_array;
- } else if (mode == OS_AIO_SYNC) {
- array = os_aio_sync_array;
- } else {
- array = NULL; /* Eliminate compiler warning */
- ut_error;
- }
-
- slot = os_aio_array_reserve_slot(type, array, message1, message2, file,
- name, buf, offset, offset_high, n);
- if (type == OS_FILE_READ) {
- if (os_aio_use_native_aio) {
-#ifdef WIN_ASYNC_IO
- os_n_file_reads++;
- os_bytes_read_since_printout += len;
-
- ret = ReadFile(file, buf, (DWORD)n, &len,
- &(slot->control));
-#elif defined(POSIX_ASYNC_IO)
- slot->control.aio_lio_opcode = LIO_READ;
- err = (ulint) aio_read(&(slot->control));
- fprintf(stderr, "Starting POSIX aio read %lu\n", err);
-#endif
- } else {
- if (!wake_later) {
- os_aio_simulated_wake_handler_thread(array);
- }
- }
- } else if (type == OS_FILE_WRITE) {
- if (os_aio_use_native_aio) {
-#ifdef WIN_ASYNC_IO
- os_n_file_writes++;
- ret = WriteFile(file, buf, (DWORD)n, &len,
- &(slot->control));
-#elif defined(POSIX_ASYNC_IO)
- slot->control.aio_lio_opcode = LIO_WRITE;
- err = (ulint) aio_write(&(slot->control));
- fprintf(stderr, "Starting POSIX aio write %lu\n", err);
-#endif
- } else {
- if (!wake_later) {
- os_aio_simulated_wake_handler_thread(array);
- }
- }
- } else {
- ut_error;
- }
-
-#ifdef WIN_ASYNC_IO
- if (os_aio_use_native_aio) {
- if ((ret && len == n)
- || (!ret && GetLastError() == ERROR_IO_PENDING)) {
- /* aio was queued successfully! */
-
- if (mode == OS_AIO_SYNC) {
- /* We want a synchronous i/o operation on a
- file where we also use async i/o: in Windows
- we must use the same wait mechanism as for
- async i/o */
-
- retval = os_aio_windows_handle(ULINT_UNDEFINED,
- slot->pos,
- &dummy_mess1,
- &dummy_mess2,
- &dummy_type);
-
- return(retval);
- }
-
- return(TRUE);
- }
-
- err = 1; /* Fall through the next if */
- }
-#endif
- if (err == 0) {
- /* aio was queued successfully! */
-
- return(TRUE);
- }
-
- os_aio_array_free_slot(array, slot);
-
- retry = os_file_handle_error(name,
- type == OS_FILE_READ
- ? "aio read" : "aio write");
- if (retry) {
-
- goto try_again;
- }
-
- return(FALSE);
-}
-
-#ifdef WIN_ASYNC_IO
-/**************************************************************************
-This function is only used in Windows asynchronous i/o.
-Waits for an aio operation to complete. This function is used to wait the
-for completed requests. The aio array of pending requests is divided
-into segments. The thread specifies which segment or slot it wants to wait
-for. NOTE: this function will also take care of freeing the aio slot,
-therefore no other thread is allowed to do the freeing! */
-
-ibool
-os_aio_windows_handle(
-/*==================*/
- /* out: TRUE if the aio operation succeeded */
- ulint global_segment, /* in: the number of the segment in the aio
- arrays to wait for; segment 0 is the ibuf
- i/o thread, segment 1 the log i/o thread,
- then follow the non-ibuf read threads, and as
- the last are the non-ibuf write threads; if
- this is ULINT_UNDEFINED, then it means that
- sync aio is used, and this parameter is
- ignored */
- ulint pos, /* this parameter is used only in sync aio:
- wait for the aio slot at this position */
- fil_node_t**message1, /* out: the messages passed with the aio
- request; note that also in the case where
- the aio operation failed, these output
- parameters are valid and can be used to
- restart the operation, for example */
- void** message2,
- ulint* type) /* out: OS_FILE_WRITE or ..._READ */
-{
- os_aio_array_t* array;
- os_aio_slot_t* slot;
- ulint n;
- ulint i;
- ibool ret_val;
- BOOL ret;
- DWORD len;
-
- if (global_segment == ULINT_UNDEFINED) {
- array = os_aio_sync_array;
- } else {
- array = os_aio_get_array(global_segment);
- }
-
- /* NOTE! We only access constant fields in os_aio_array. Therefore
- we do not have to acquire the protecting mutex yet */
-
- ut_ad(os_aio_validate());
-
- n = array->n_slots;
-
- if (array == os_aio_sync_array) {
- os_event_wait(os_aio_array_get_nth_slot(array, pos)->event);
- i = pos;
- } else {
- srv_set_io_thread_op_info(global_segment, "wait Windows aio");
- i = os_event_wait_multiple(n, (array->native_events));
- }
-
- os_mutex_enter(array->mutex);
-
- slot = os_aio_array_get_nth_slot(array, i);
-
- ut_a(slot->reserved);
-
- if (global_segment != ULINT_UNDEFINED) {
- srv_set_io_thread_op_info(global_segment,
- "get windows aio return value");
- }
-
- ret = GetOverlappedResult(slot->file, &(slot->control), &len, TRUE);
-
- *message1 = slot->message1;
- *message2 = slot->message2;
-
- *type = slot->type;
-
- if (ret && len == slot->len) {
- ret_val = TRUE;
-
-# ifdef UNIV_DO_FLUSH
- if (slot->type == OS_FILE_WRITE
- && !os_do_not_call_flush_at_each_write) {
- ut_a(TRUE == os_file_flush(slot->file));
- }
-# endif /* UNIV_DO_FLUSH */
- } else {
- os_file_handle_error(slot->name, "Windows aio");
-
- ret_val = FALSE;
- }
-
- os_mutex_exit(array->mutex);
-
- os_aio_array_free_slot(array, slot);
-
- return(ret_val);
-}
-#endif
-
-#ifdef POSIX_ASYNC_IO
-
-/**************************************************************************
-This function is only used in Posix asynchronous i/o. Waits for an aio
-operation to complete. */
-
-ibool
-os_aio_posix_handle(
-/*================*/
- /* out: TRUE if the aio operation succeeded */
- ulint array_no, /* in: array number 0 - 3 */
- fil_node_t**message1, /* out: the messages passed with the aio
- request; note that also in the case where
- the aio operation failed, these output
- parameters are valid and can be used to
- restart the operation, for example */
- void** message2)
-{
- os_aio_array_t* array;
- os_aio_slot_t* slot;
- siginfo_t info;
- sigset_t sigset;
- sigset_t proc_sigset;
- sigset_t thr_sigset;
- int ret;
- int i;
- int sig;
-
- sigemptyset(&sigset);
- sigaddset(&sigset, SIGRTMIN + 1 + array_no);
-
- pthread_sigmask(SIG_UNBLOCK, &sigset, NULL);
-
-#if 0
- sigprocmask(0, NULL, &proc_sigset);
- pthread_sigmask(0, NULL, &thr_sigset);
-
- for (i = 32 ; i < 40; i++) {
- fprintf(stderr, "%lu : %lu %lu\n", (ulint)i,
- (ulint) sigismember(&proc_sigset, i),
- (ulint) sigismember(&thr_sigset, i));
- }
-#endif
-
- ret = sigwaitinfo(&sigset, &info);
-
- if (sig != SIGRTMIN + 1 + array_no) {
-
- ut_error;
-
- return(FALSE);
- }
-
- fputs("Handling POSIX aio\n", stderr);
-
- array = os_aio_get_array_from_no(array_no);
-
- os_mutex_enter(array->mutex);
-
- slot = info.si_value.sival_ptr;
-
- ut_a(slot->reserved);
-
- *message1 = slot->message1;
- *message2 = slot->message2;
-
-# ifdef UNIV_DO_FLUSH
- if (slot->type == OS_FILE_WRITE
- && !os_do_not_call_flush_at_each_write) {
- ut_a(TRUE == os_file_flush(slot->file));
- }
-# endif /* UNIV_DO_FLUSH */
-
- os_mutex_exit(array->mutex);
-
- os_aio_array_free_slot(array, slot);
-
- return(TRUE);
-}
-#endif
-
-/**************************************************************************
-Do a 'last millisecond' check that the page end is sensible;
-reported page checksum errors from Linux seem to wipe over the page end. */
-static
-void
-os_file_check_page_trailers(
-/*========================*/
- byte* combined_buf, /* in: combined write buffer */
- ulint total_len) /* in: size of combined_buf, in bytes
- (a multiple of UNIV_PAGE_SIZE) */
-{
- ulint len;
-
- for (len = 0; len + UNIV_PAGE_SIZE <= total_len;
- len += UNIV_PAGE_SIZE) {
- byte* buf = combined_buf + len;
-
- if (UNIV_UNLIKELY
- (memcmp(buf + (FIL_PAGE_LSN + 4),
- buf + (UNIV_PAGE_SIZE
- - FIL_PAGE_END_LSN_OLD_CHKSUM + 4), 4))) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: ERROR: The page to be written"
- " seems corrupt!\n"
- "InnoDB: Writing a block of %lu bytes,"
- " currently at offset %lu\n",
- (ulong)total_len, (ulong)len);
- buf_page_print(buf);
- fprintf(stderr,
- "InnoDB: ERROR: The page to be written"
- " seems corrupt!\n");
- }
- }
-}
-
-/**************************************************************************
-Does simulated aio. This function should be called by an i/o-handler
-thread. */
-
-ibool
-os_aio_simulated_handle(
-/*====================*/
- /* out: TRUE if the aio operation succeeded */
- ulint global_segment, /* in: the number of the segment in the aio
- arrays to wait for; segment 0 is the ibuf
- i/o thread, segment 1 the log i/o thread,
- then follow the non-ibuf read threads, and as
- the last are the non-ibuf write threads */
- fil_node_t**message1, /* out: the messages passed with the aio
- request; note that also in the case where
- the aio operation failed, these output
- parameters are valid and can be used to
- restart the operation, for example */
- void** message2,
- ulint* type) /* out: OS_FILE_WRITE or ..._READ */
-{
- os_aio_array_t* array;
- os_aio_slot_t* slot;
- os_aio_slot_t* slot2;
- os_aio_slot_t* consecutive_ios[OS_AIO_MERGE_N_CONSECUTIVE];
- os_aio_slot_t* lowest_request;
- os_aio_slot_t* oldest_request;
- ulint n_consecutive;
- ulint total_len;
- ulint offs;
- ulint lowest_offset;
- ulint oldest_offset;
- ulint biggest_age;
- ulint age;
- byte* combined_buf;
- byte* combined_buf2;
- ibool ret;
- ulint n;
- ulint i;
-
- double start_usecs, stop_usecs, elapsed_usecs;
- time_t now;
- array = os_aio_get_array(global_segment);
-
-restart:
- /* NOTE! We only access constant fields in os_aio_array. Therefore
- we do not have to acquire the protecting mutex yet */
-
- srv_set_io_thread_op_info(global_segment,
- "looking for i/o requests (a)");
- ut_ad(os_aio_validate());
-
- n = array->n_slots;
-
- /* Look through n slots */
-
- if (array == os_aio_read_array
- && os_aio_recommend_sleep_for_read_threads) {
-
- /* Give other threads chance to add several i/os to the array
- at once. */
-
- goto recommended_sleep;
- }
-
- os_mutex_enter(array->mutex);
-
- srv_set_io_thread_op_info(global_segment,
- "looking for i/o requests (b)");
-
- /* Check if there is a slot for which the i/o has already been
- done */
-
- for (i = 0; i < n; i++) {
- slot = os_aio_array_get_nth_slot(array, i);
-
- if (slot->reserved && slot->status == OS_AIO_DONE) {
-
- if (os_aio_print_debug) {
- fprintf(stderr,
- "InnoDB: i/o for slot %lu"
- " already done, returning\n",
- (ulong) i);
- }
-
- ret = TRUE;
-
- goto slot_io_done;
- }
- }
-
- biggest_age = 0;
- now = time(NULL);
- oldest_request = lowest_request = NULL;
- oldest_offset = lowest_offset = ULINT_MAX;
-
- /* Find the oldest request and the request with the smallest offset */
- for (i = 0; i < n; i++) {
- slot = os_aio_array_get_nth_slot(array, i);
-
- if (slot->reserved && slot->status == OS_AIO_NOT_ISSUED) {
- age = (ulint)difftime(now, slot->reservation_time);
-
- /* If there are at least 2 seconds old requests, then pick the oldest
- one to prevent starvation. If several requests have the same age,
- then pick the one at the lowest offset. */
- if ((age >= 2 && age > biggest_age)
- || (age >= 2 && age == biggest_age
- && slot->offset < oldest_offset)) {
-
- /* Found an i/o request */
- biggest_age = age;
- oldest_request = slot;
- oldest_offset = slot->offset;
- }
-
- /* Look for an i/o request at the lowest offset in the array
- * (we ignore the high 32 bits of the offset) */
- if (slot->offset < lowest_offset) {
- /* Found an i/o request */
- lowest_request = slot;
-
-
-
- lowest_offset = slot->offset;
- }
- }
- }
-
- if (!lowest_request && !oldest_request) {
-
- /* No i/o requested at the moment */
-
- goto wait_for_io;
- }
-
- if (oldest_request) {
- slot = oldest_request;
- } else {
- slot = lowest_request;
- }
- consecutive_ios[0] = slot;
- n_consecutive = 1;
-
- /* Check if there are several consecutive blocks to read or write */
-
-consecutive_loop:
- for (i = 0; i < n; i++) {
- slot2 = os_aio_array_get_nth_slot(array, i);
-
- if (slot2->reserved && slot2 != slot
- && slot2->offset == slot->offset + slot->len
- /* check that sum does not wrap over */
- && slot->offset + slot->len > slot->offset
- && slot2->offset_high == slot->offset_high
- && slot2->type == slot->type
- && slot2->file == slot->file
- && slot2->status == OS_AIO_NOT_ISSUED) {
-
- /* Found a consecutive i/o request */
-
- consecutive_ios[n_consecutive] = slot2;
- n_consecutive++;
-
- slot = slot2;
-
- if (n_consecutive < OS_AIO_MERGE_N_CONSECUTIVE &&
- n_consecutive < srv_max_merged_io) {
-
- goto consecutive_loop;
- } else {
- break;
- }
- }
- }
-
- srv_set_io_thread_op_info(global_segment, "consecutive i/o requests");
-
- /* We have now collected n_consecutive i/o requests in the array;
- allocate a single buffer which can hold all data, and perform the
- i/o */
-
- total_len = 0;
- slot = consecutive_ios[0];
-
- for (i = 0; i < n_consecutive; i++) {
- total_len += consecutive_ios[i]->len;
- ut_a(consecutive_ios[i]->status == OS_AIO_NOT_ISSUED);
- consecutive_ios[i]->status = OS_AIO_ISSUED;
- }
-
- if (n_consecutive == 1) {
- /* We can use the buffer of the i/o request */
- combined_buf = slot->buf;
- combined_buf2 = NULL;
- } else {
- if ((total_len + UNIV_PAGE_SIZE) > os_aio_thread_buffer_size[global_segment]) {
-
- if (os_aio_thread_buffer[global_segment])
- ut_free(os_aio_thread_buffer[global_segment]);
-
- os_aio_thread_buffer[global_segment] = ut_malloc(total_len + UNIV_PAGE_SIZE);
-
- os_aio_thread_buffer_size[global_segment] = total_len + UNIV_PAGE_SIZE;
- }
- combined_buf2 = os_aio_thread_buffer[global_segment];
-
- ut_a(combined_buf2);
-
- combined_buf = ut_align(combined_buf2, UNIV_PAGE_SIZE);
- }
-
- /* We release the array mutex for the time of the i/o: NOTE that
- this assumes that there is just one i/o-handler thread serving
- a single segment of slots! */
-
- ut_a(slot->reserved);
- ut_a(slot->status == OS_AIO_ISSUED);
-
- os_mutex_exit(array->mutex);
-
- if (slot->type == OS_FILE_WRITE && n_consecutive > 1) {
- /* Copy the buffers to the combined buffer */
- offs = 0;
-
- for (i = 0; i < n_consecutive; i++) {
-
- ut_memcpy(combined_buf + offs, consecutive_ios[i]->buf,
- consecutive_ios[i]->len);
- offs += consecutive_ios[i]->len;
- }
- }
-
- srv_set_io_thread_op_info(global_segment, "doing file i/o");
-
- if (os_aio_print_debug) {
- fprintf(stderr,
- "InnoDB: doing i/o of type %lu at offset %lu %lu,"
- " length %lu\n",
- (ulong) slot->type, (ulong) slot->offset_high,
- (ulong) slot->offset, (ulong) total_len);
- }
-
- /* Do the i/o with ordinary, synchronous i/o functions: */
- if (slot->type == OS_FILE_WRITE) {
- os_aio_thread_io_writes[global_segment] += n_consecutive;
- if (array == os_aio_write_array) {
- if ((total_len % UNIV_PAGE_SIZE != 0)
- || (slot->offset % UNIV_PAGE_SIZE != 0)) {
- fprintf(stderr,
- "InnoDB: Error: trying a displaced"
- " write to %s %lu %lu, len %lu\n",
- slot->name, (ulong) slot->offset_high,
- (ulong) slot->offset,
- (ulong) total_len);
- ut_error;
- }
-
- os_file_check_page_trailers(combined_buf, total_len);
- }
- start_usecs = time_usecs();
- ret = os_file_write(slot->name, slot->file, combined_buf,
- slot->offset, slot->offset_high,
- total_len);
- stop_usecs = time_usecs();
- elapsed_usecs = stop_usecs - start_usecs;
- if (elapsed_usecs < 0) elapsed_usecs = 0;
-
- if (array == os_aio_write_array) {
- os_file_check_page_trailers(combined_buf, total_len);
- }
- } else {
- start_usecs = time_usecs();
- os_aio_thread_io_reads[global_segment] += n_consecutive;
- ret = os_file_read(slot->file, combined_buf,
- slot->offset, slot->offset_high, total_len);
- stop_usecs = time_usecs();
- elapsed_usecs = stop_usecs - start_usecs;
- if (elapsed_usecs < 0) elapsed_usecs = 0;
- }
- if (elapsed_usecs > os_aio_thread_max_io_wait[global_segment])
- os_aio_thread_max_io_wait[global_segment] = elapsed_usecs;
- os_aio_thread_io_wait[global_segment] += elapsed_usecs;
- os_aio_thread_io_requests[global_segment]++;
-
- ut_a(ret);
- srv_set_io_thread_op_info(global_segment, "file i/o done");
-
-#if 0
- fprintf(stderr,
- "aio: %lu consecutive %lu:th segment, first offs %lu blocks\n",
- n_consecutive, global_segment, slot->offset / UNIV_PAGE_SIZE);
-#endif
-
- if (slot->type == OS_FILE_READ && n_consecutive > 1) {
- /* Copy the combined buffer to individual buffers */
- offs = 0;
-
- for (i = 0; i < n_consecutive; i++) {
-
- ut_memcpy(consecutive_ios[i]->buf, combined_buf + offs,
- consecutive_ios[i]->len);
- offs += consecutive_ios[i]->len;
- }
- }
-
- os_mutex_enter(array->mutex);
-
- /* Mark the i/os done in slots */
-
- for (i = 0; i < n_consecutive; i++) {
- ut_a(consecutive_ios[i]->status == OS_AIO_ISSUED);
- consecutive_ios[i]->status = OS_AIO_DONE;
- }
-
- /* We return the messages for the first slot now, and if there were
- several slots, the messages will be returned with subsequent calls
- of this function */
-
-slot_io_done:
-
- ut_a(slot->reserved);
- ut_a(slot->status == OS_AIO_DONE);
- slot->status = OS_AIO_CLAIMED;
-
- *message1 = slot->message1;
- *message2 = slot->message2;
-
- *type = slot->type;
-
- os_mutex_exit(array->mutex);
-
- os_aio_array_free_slot(array, slot);
- srv_set_io_thread_op_info(global_segment, "exited handler");
-
- return(ret);
-
-wait_for_io:
- srv_set_io_thread_op_info(global_segment, "resetting wait event");
-
- /* We wait here until there again can be i/os in the segment
- of this thread */
-
- os_event_reset(os_aio_segment_wait_events[global_segment]);
-
- os_mutex_exit(array->mutex);
-
-recommended_sleep:
- srv_set_io_thread_op_info(global_segment, "waiting for i/o request");
-
- os_event_wait(os_aio_segment_wait_events[global_segment]);
-
- if (os_aio_print_debug) {
- fprintf(stderr,
- "InnoDB: i/o handler thread for i/o"
- " segment %lu wakes up\n",
- (ulong) global_segment);
- }
-
- goto restart;
-}
-
-/**************************************************************************
-Validates the consistency of an aio array. */
-static
-ibool
-os_aio_array_validate(
-/*==================*/
- /* out: TRUE if ok */
- os_aio_array_t* array) /* in: aio wait array */
-{
- os_aio_slot_t* slot;
- ulint n_reserved = 0;
- ulint i;
-
- ut_a(array);
-
- os_mutex_enter(array->mutex);
-
- ut_a(array->n_slots > 0);
-
- for (i = 0; i < array->n_slots; i++) {
- slot = os_aio_array_get_nth_slot(array, i);
-
- if (slot->reserved) {
- n_reserved++;
- ut_a(slot->len > 0);
- }
- }
-
- ut_a(array->n_reserved == n_reserved);
-
- os_mutex_exit(array->mutex);
-
- return(TRUE);
-}
-
-/**************************************************************************
-Validates the consistency the aio system. */
-
-ibool
-os_aio_validate(void)
-/*=================*/
- /* out: TRUE if ok */
-{
- os_aio_array_validate(os_aio_read_array);
- os_aio_array_validate(os_aio_write_array);
- os_aio_array_validate(os_aio_ibuf_array);
- os_aio_array_validate(os_aio_log_array);
- os_aio_array_validate(os_aio_sync_array);
-
- return(TRUE);
-}
-
-/**************************************************************************
-Prints info of the aio arrays. */
-
-void
-os_aio_print(
-/*=========*/
- FILE* file) /* in: file where to print */
-{
- os_aio_array_t* array;
- os_aio_slot_t* slot;
- ulint n_reserved;
- time_t current_time;
- double time_elapsed;
- double avg_bytes_read;
- ulint i;
- ulint num_issued, num_done, num_claimed;
-
- for (i = 0; i < os_aio_n_segments; i++) {
- fprintf(file,
- "I/O thread %lu state: %s (%s) reads %lu writes %lu "
- "requests %lu io secs %lf io msecs/request %lf max_io_wait %lf",
- i, srv_io_thread_op_info[i], srv_io_thread_function[i],
- os_aio_thread_io_reads[i], os_aio_thread_io_writes[i],
- os_aio_thread_io_requests[i],
- os_aio_thread_io_wait[i] / 1000000.0,
- os_aio_thread_io_requests[i] ?
- os_aio_thread_io_wait[i] / os_aio_thread_io_requests[i] / 1000.0 : 0.0,
- os_aio_thread_max_io_wait[i] / 1000.0);
-
-#ifndef __WIN__
- if (os_aio_segment_wait_events[i]->is_set) {
- fprintf(file, " ev set");
- }
-#endif
-
- fprintf(file, "\n");
- }
-
- fputs("Pending normal aio reads:", file);
-
- array = os_aio_read_array;
-loop:
- ut_a(array);
-
- os_mutex_enter(array->mutex);
-
- ut_a(array->n_slots > 0);
- n_reserved = 0;
- num_done = num_issued = num_claimed = 0;
-
- for (i = 0; i < array->n_slots; i++) {
- slot = os_aio_array_get_nth_slot(array, i);
-
- if (slot->reserved) {
- if (slot->status == OS_AIO_ISSUED)
- num_issued++;
- else if (slot->status == OS_AIO_DONE)
- num_done++;
- else {
- ut_ad(slot->status == OS_AIO_CLAIMED);
- num_claimed++;
- }
- n_reserved++;
-#if 0
- fprintf(stderr, "Reserved slot, messages %p %p\n",
- (void*) slot->message1,
- (void*) slot->message2);
-#endif
- ut_a(slot->len > 0);
- }
- }
-
- ut_a(array->n_reserved == n_reserved);
-
- fprintf(file, " %lu", (ulong) n_reserved);
-
- os_mutex_exit(array->mutex);
-
- if (array == os_aio_read_array) {
- fputs(", aio writes:", file);
-
- array = os_aio_write_array;
-
- goto loop;
- }
-
- if (array == os_aio_write_array) {
- fputs(",\n ibuf aio reads:", file);
- array = os_aio_ibuf_array;
-
- goto loop;
- }
-
- if (array == os_aio_ibuf_array) {
- fputs(", log i/o's:", file);
- array = os_aio_log_array;
-
- goto loop;
- }
-
- if (array == os_aio_log_array) {
- fputs(", sync i/o's:", file);
- array = os_aio_sync_array;
-
- goto loop;
- }
-
- putc('\n', file);
- fprintf(file,
- "Summary of background IO slot status: %lu issued, "
- "%lu done, %lu claimed, sleep set %d\n",
- num_issued, num_done, num_claimed,
- (int)os_aio_recommend_sleep_for_read_threads);
-
- putc('\n', file);
- current_time = time(NULL);
- time_elapsed = 0.001 + difftime(current_time, os_last_printout);
-
- fprintf(file,
- "Pending flushes (fsync) log: %lu; buffer pool: %lu\n"
- "%lu OS file reads, %lu OS file writes, %lu OS fsyncs\n",
- (ulong) fil_n_pending_log_flushes,
- (ulong) fil_n_pending_tablespace_flushes,
- (ulong) os_n_file_reads, (ulong) os_n_file_writes,
- (ulong) os_n_fsyncs);
-
- if (os_file_n_pending_preads != 0 || os_file_n_pending_pwrites != 0) {
- fprintf(file,
- "%lu pending preads, %lu pending pwrites\n",
- (ulong) os_file_n_pending_preads,
- (ulong) os_file_n_pending_pwrites);
- }
-
- if (os_n_file_reads == os_n_file_reads_old) {
- avg_bytes_read = 0.0;
- } else {
- avg_bytes_read = (double) os_bytes_read_since_printout
- / (os_n_file_reads - os_n_file_reads_old);
- }
-
- fprintf(file,
- "%.2f reads/s, %lu avg bytes/read,"
- " %.2f writes/s, %.2f fsyncs/s\n",
- (os_n_file_reads - os_n_file_reads_old)
- / time_elapsed,
- (ulong)avg_bytes_read,
- (os_n_file_writes - os_n_file_writes_old)
- / time_elapsed,
- (os_n_fsyncs - os_n_fsyncs_old)
- / time_elapsed);
-
- os_n_file_reads_old = os_n_file_reads;
- os_n_file_writes_old = os_n_file_writes;
- os_n_fsyncs_old = os_n_fsyncs;
- os_bytes_read_since_printout = 0;
-
- os_last_printout = current_time;
-}
-
-/**************************************************************************
-Refreshes the statistics used to print per-second averages. */
-
-void
-os_aio_refresh_stats(void)
-/*======================*/
-{
- os_n_file_reads_old = os_n_file_reads;
- os_n_file_writes_old = os_n_file_writes;
- os_n_fsyncs_old = os_n_fsyncs;
- os_bytes_read_since_printout = 0;
-
- os_last_printout = time(NULL);
-}
-
-#ifdef UNIV_DEBUG
-/**************************************************************************
-Checks that all slots in the system have been freed, that is, there are
-no pending io operations. */
-
-ibool
-os_aio_all_slots_free(void)
-/*=======================*/
- /* out: TRUE if all free */
-{
- os_aio_array_t* array;
- ulint n_res = 0;
-
- array = os_aio_read_array;
-
- os_mutex_enter(array->mutex);
-
- n_res += array->n_reserved;
-
- os_mutex_exit(array->mutex);
-
- array = os_aio_write_array;
-
- os_mutex_enter(array->mutex);
-
- n_res += array->n_reserved;
-
- os_mutex_exit(array->mutex);
-
- array = os_aio_ibuf_array;
-
- os_mutex_enter(array->mutex);
-
- n_res += array->n_reserved;
-
- os_mutex_exit(array->mutex);
-
- array = os_aio_log_array;
-
- os_mutex_enter(array->mutex);
-
- n_res += array->n_reserved;
-
- os_mutex_exit(array->mutex);
-
- array = os_aio_sync_array;
-
- os_mutex_enter(array->mutex);
-
- n_res += array->n_reserved;
-
- os_mutex_exit(array->mutex);
-
- if (n_res == 0) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-#endif /* UNIV_DEBUG */
diff --git a/storage/innobase/os/os0proc.c b/storage/innobase/os/os0proc.c
deleted file mode 100644
index a99fe8b6a0e..00000000000
--- a/storage/innobase/os/os0proc.c
+++ /dev/null
@@ -1,674 +0,0 @@
-/******************************************************
-The interface to the operating system
-process control primitives
-
-(c) 1995 Innobase Oy
-
-Created 9/30/1995 Heikki Tuuri
-*******************************************************/
-
-#include "os0proc.h"
-#ifdef UNIV_NONINL
-#include "os0proc.ic"
-#endif
-
-#include "ut0mem.h"
-#include "ut0byte.h"
-
-
-/*
-How to get AWE to compile on Windows?
--------------------------------------
-
-In the project settings of the innobase project the Visual C++ source,
-__WIN2000__ has to be defined.
-
-The Visual C++ has to be relatively recent and _WIN32_WINNT has to be
-defined to a value >= 0x0500 when windows.h is included.
-
-#define _WIN32_WINNT 0x0500
-
-Where does AWE work?
--------------------
-
-See the error message in os_awe_allocate_physical_mem().
-
-How to assign privileges for mysqld to use AWE?
------------------------------------------------
-
-See the error message in os_awe_enable_lock_pages_in_mem().
-
-Use Windows AWE functions in this order
----------------------------------------
-
-(1) os_awe_enable_lock_pages_in_mem();
-(2) os_awe_allocate_physical_mem();
-(3) os_awe_allocate_virtual_mem_window();
-(4) os_awe_map_physical_mem_to_window().
-
-To test 'AWE' in a computer which does not have the AWE API,
-you can compile with UNIV_SIMULATE_AWE defined in this file.
-*/
-
-#ifdef UNIV_SIMULATE_AWE
-/* If we simulate AWE, we allocate the 'physical memory' here */
-byte* os_awe_simulate_mem;
-ulint os_awe_simulate_mem_size;
-os_awe_t* os_awe_simulate_page_info;
-byte* os_awe_simulate_window;
-ulint os_awe_simulate_window_size;
-/* In simulated AWE the following contains a NULL pointer or a pointer
-to a mapped 'physical page' for each 4 kB page in the AWE window */
-byte** os_awe_simulate_map;
-#endif
-
-#ifdef __WIN2000__
-os_awe_t* os_awe_page_info;
-ulint os_awe_n_pages;
-byte* os_awe_window;
-ulint os_awe_window_size;
-#endif
-
-ibool os_use_large_pages;
-/* Large page size. This may be a boot-time option on some platforms */
-ulint os_large_page_size;
-
-/********************************************************************
-Windows AWE support. Tries to enable the "lock pages in memory" privilege for
-the current process so that the current process can allocate memory-locked
-virtual address space to act as the window where AWE maps physical memory. */
-
-ibool
-os_awe_enable_lock_pages_in_mem(void)
-/*=================================*/
- /* out: TRUE if success, FALSE if error;
- prints error info to stderr if no success */
-{
-#ifdef UNIV_SIMULATE_AWE
-
- return(TRUE);
-
-#elif defined(__WIN2000__)
- struct {
- DWORD Count;
- LUID_AND_ATTRIBUTES Privilege[1];
- } Info;
- HANDLE hProcess;
- HANDLE Token;
- BOOL Result;
-
- hProcess = GetCurrentProcess();
-
- /* Open the token of the current process */
-
- Result = OpenProcessToken(hProcess,
- TOKEN_ADJUST_PRIVILEGES, &Token);
- if (Result != TRUE) {
- fprintf(stderr,
- "InnoDB: AWE: Cannot open process token, error %lu\n",
- (ulint)GetLastError());
- return(FALSE);
- }
-
- Info.Count = 1;
-
- Info.Privilege[0].Attributes = SE_PRIVILEGE_ENABLED;
-
- /* Get the local unique identifier (LUID) of the SE_LOCK_MEMORY
- privilege */
-
- Result = LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME,
- &(Info.Privilege[0].Luid));
- if (Result != TRUE) {
- fprintf(stderr,
- "InnoDB: AWE: Cannot get local privilege"
- " value for %s, error %lu.\n",
- SE_LOCK_MEMORY_NAME, (ulint)GetLastError());
-
- return(FALSE);
- }
-
- /* Try to adjust the privilege */
-
- Result = AdjustTokenPrivileges(Token, FALSE,
- (PTOKEN_PRIVILEGES)&Info,
- 0, NULL, NULL);
- /* Check the result */
-
- if (Result != TRUE) {
- fprintf(stderr,
- "InnoDB: AWE: Cannot adjust process token privileges,"
- " error %u.\n",
- GetLastError());
- return(FALSE);
- } else if (GetLastError() != ERROR_SUCCESS) {
- fprintf(stderr,
- "InnoDB: AWE: Cannot enable SE_LOCK_MEMORY privilege,"
- " error %lu.\n"
- "InnoDB: In Windows XP Home you cannot use AWE."
- " In Windows 2000 and XP\n"
- "InnoDB: Professional you must go to the"
- " Control Panel, to\n"
- "InnoDB: Security Settings, to Local Policies,"
- " and enable\n"
- "InnoDB: the 'lock pages in memory' privilege"
- " for the user who runs\n"
- "InnoDB: the MySQL server.\n", GetLastError());
-
- return(FALSE);
- }
-
- CloseHandle(Token);
-
- return(TRUE);
-#else
-#ifdef __WIN__
- fprintf(stderr,
- "InnoDB: AWE: Error: to use AWE you must use"
- " a ...-nt MySQL executable.\n");
-#endif
- return(FALSE);
-#endif
-}
-
-/********************************************************************
-Allocates physical RAM memory up to 64 GB in an Intel 32-bit x86
-processor. */
-
-ibool
-os_awe_allocate_physical_mem(
-/*=========================*/
- /* out: TRUE if success */
- os_awe_t** page_info, /* out, own: array of opaque data containing
- the info for allocated physical memory pages;
- each allocated 4 kB physical memory page has
- one slot of type os_awe_t in the array */
- ulint n_megabytes) /* in: number of megabytes to allocate */
-{
-#ifdef UNIV_SIMULATE_AWE
- os_awe_simulate_page_info = ut_malloc
- (sizeof(os_awe_t) * n_megabytes
- * ((1024 * 1024) / OS_AWE_X86_PAGE_SIZE));
-
- os_awe_simulate_mem
- = ut_align(ut_malloc(4096 + 1024 * 1024 * n_megabytes), 4096);
- os_awe_simulate_mem_size = n_megabytes * 1024 * 1024;
-
- *page_info = os_awe_simulate_page_info;
-
- return(TRUE);
-
-#elif defined(__WIN2000__)
- BOOL bResult;
- os_awe_t NumberOfPages; /* Question: why does Windows
- use the name ULONG_PTR for
- a scalar integer type? Maybe
- because we may also refer to
- &NumberOfPages? */
- os_awe_t NumberOfPagesInitial;
- SYSTEM_INFO sSysInfo;
- int PFNArraySize;
-
- if (n_megabytes > 64 * 1024) {
-
- fprintf(stderr,
- "InnoDB: AWE: Error: tried to allocate %lu MB.\n"
- "InnoDB: AWE cannot allocate more than"
- " 64 GB in any computer.\n", n_megabytes);
-
- return(FALSE);
- }
-
- GetSystemInfo(&sSysInfo); /* fill the system information structure */
-
- if ((ulint)OS_AWE_X86_PAGE_SIZE != (ulint)sSysInfo.dwPageSize) {
- fprintf(stderr,
- "InnoDB: AWE: Error: this computer has a page size"
- " of %lu.\n"
- "InnoDB: Should be 4096 bytes for"
- " InnoDB AWE support to work.\n",
- (ulint)sSysInfo.dwPageSize);
-
- return(FALSE);
- }
-
- /* Calculate the number of pages of memory to request */
-
- NumberOfPages = n_megabytes * ((1024 * 1024) / OS_AWE_X86_PAGE_SIZE);
-
- /* Calculate the size of page_info for allocated physical pages */
-
- PFNArraySize = NumberOfPages * sizeof(os_awe_t);
-
- *page_info = (os_awe_t*)HeapAlloc(GetProcessHeap(), 0, PFNArraySize);
-
- if (*page_info == NULL) {
- fprintf(stderr,
- "InnoDB: AWE: Failed to allocate page info"
- " array from process heap, error %lu\n",
- (ulint)GetLastError());
-
- return(FALSE);
- }
-
- ut_total_allocated_memory += PFNArraySize;
-
- /* Enable this process' privilege to lock pages to physical memory */
-
- if (!os_awe_enable_lock_pages_in_mem()) {
-
- return(FALSE);
- }
-
- /* Allocate the physical memory */
-
- NumberOfPagesInitial = NumberOfPages;
-
- os_awe_page_info = *page_info;
- os_awe_n_pages = (ulint)NumberOfPages;
-
- /* Compilation note: if the compiler complains the function is not
- defined, see the note at the start of this file */
-
- bResult = AllocateUserPhysicalPages(GetCurrentProcess(),
- &NumberOfPages, *page_info);
- if (bResult != TRUE) {
- fprintf(stderr,
- "InnoDB: AWE: Cannot allocate physical pages,"
- " error %lu.\n",
- (ulint)GetLastError());
-
- return(FALSE);
- }
-
- if (NumberOfPagesInitial != NumberOfPages) {
- fprintf(stderr,
- "InnoDB: AWE: Error: allocated only %lu pages"
- " of %lu requested.\n"
- "InnoDB: Check that you have enough free RAM.\n"
- "InnoDB: In Windows XP Professional and"
- " 2000 Professional\n"
- "InnoDB: Windows PAE size is max 4 GB."
- " In 2000 and .NET\n"
- "InnoDB: Advanced Servers and 2000 Datacenter Server"
- " it is 32 GB,\n"
- "InnoDB: and in .NET Datacenter Server it is 64 GB.\n"
- "InnoDB: A Microsoft web page said that"
- " the processor must be an Intel\n"
- "InnoDB: processor.\n",
- (ulint)NumberOfPages,
- (ulint)NumberOfPagesInitial);
-
- return(FALSE);
- }
-
- fprintf(stderr,
- "InnoDB: Using Address Windowing Extensions (AWE);"
- " allocated %lu MB\n",
- n_megabytes);
-
- return(TRUE);
-#else
- UT_NOT_USED(n_megabytes);
- UT_NOT_USED(page_info);
-
- return(FALSE);
-#endif
-}
-
-/********************************************************************
-Allocates a window in the virtual address space where we can map then
-pages of physical memory. */
-
-byte*
-os_awe_allocate_virtual_mem_window(
-/*===============================*/
- /* out, own: allocated memory, or NULL if did not
- succeed */
- ulint size) /* in: virtual memory allocation size in bytes, must
- be < 2 GB */
-{
-#ifdef UNIV_SIMULATE_AWE
- ulint i;
-
- os_awe_simulate_window = ut_align(ut_malloc(4096 + size), 4096);
- os_awe_simulate_window_size = size;
-
- os_awe_simulate_map = ut_malloc(sizeof(byte*) * (size / 4096));
-
- for (i = 0; i < (size / 4096); i++) {
- *(os_awe_simulate_map + i) = NULL;
- }
-
- return(os_awe_simulate_window);
-
-#elif defined(__WIN2000__)
- byte* ptr;
-
- if (size > (ulint)0x7FFFFFFFUL) {
- fprintf(stderr,
- "InnoDB: AWE: Cannot allocate %lu bytes"
- " of virtual memory\n", size);
-
- return(NULL);
- }
-
- ptr = VirtualAlloc(NULL, (SIZE_T)size, MEM_RESERVE | MEM_PHYSICAL,
- PAGE_READWRITE);
- if (ptr == NULL) {
- fprintf(stderr,
- "InnoDB: AWE: Cannot allocate %lu bytes"
- " of virtual memory, error %lu\n",
- size, (ulint)GetLastError());
-
- return(NULL);
- }
-
- os_awe_window = ptr;
- os_awe_window_size = size;
-
- ut_total_allocated_memory += size;
-
- return(ptr);
-#else
- UT_NOT_USED(size);
-
- return(NULL);
-#endif
-}
-
-/********************************************************************
-With this function you can map parts of physical memory allocated with
-the ..._allocate_physical_mem to the virtual address space allocated with
-the previous function. Intel implements this so that the process page
-tables are updated accordingly. A test on a 1.5 GHz AMD processor and XP
-showed that this takes < 1 microsecond, much better than the estimated 80 us
-for copying a 16 kB page memory to memory. But, the operation will at least
-partially invalidate the translation lookaside buffer (TLB) of all
-processors. Under a real-world load the performance hit may be bigger. */
-
-ibool
-os_awe_map_physical_mem_to_window(
-/*==============================*/
- /* out: TRUE if success; the function
- calls exit(1) in case of an error */
- byte* ptr, /* in: a page-aligned pointer to
- somewhere in the virtual address
- space window; we map the physical mem
- pages here */
- ulint n_mem_pages, /* in: number of 4 kB mem pages to
- map */
- os_awe_t* page_info) /* in: array of page infos for those
- pages; each page has one slot in the
- array */
-{
-#ifdef UNIV_SIMULATE_AWE
- ulint i;
- byte** map;
- byte* page;
- byte* phys_page;
-
- ut_a(ptr >= os_awe_simulate_window);
- ut_a(ptr < os_awe_simulate_window + os_awe_simulate_window_size);
- ut_a(page_info >= os_awe_simulate_page_info);
- ut_a(page_info < os_awe_simulate_page_info
- + (os_awe_simulate_mem_size / 4096));
-
- /* First look if some other 'physical pages' are mapped at ptr,
- and copy them back to where they were if yes */
-
- map = os_awe_simulate_map
- + ((ulint)(ptr - os_awe_simulate_window)) / 4096;
- page = ptr;
-
- for (i = 0; i < n_mem_pages; i++) {
- if (*map != NULL) {
- ut_memcpy(*map, page, 4096);
- }
- map++;
- page += 4096;
- }
-
- /* Then copy to ptr the 'physical pages' determined by page_info; we
- assume page_info is a segment of the array we created at the start */
-
- phys_page = os_awe_simulate_mem
- + (ulint)(page_info - os_awe_simulate_page_info)
- * 4096;
-
- ut_memcpy(ptr, phys_page, n_mem_pages * 4096);
-
- /* Update the map */
-
- map = os_awe_simulate_map
- + ((ulint)(ptr - os_awe_simulate_window)) / 4096;
-
- for (i = 0; i < n_mem_pages; i++) {
- *map = phys_page;
-
- map++;
- phys_page += 4096;
- }
-
- return(TRUE);
-
-#elif defined(__WIN2000__)
- BOOL bResult;
- os_awe_t n_pages;
-
- n_pages = (os_awe_t)n_mem_pages;
-
- if (!(ptr >= os_awe_window)) {
- fprintf(stderr,
- "InnoDB: AWE: Error: trying to map to address %lx"
- " but AWE window start %lx\n",
- (ulint)ptr, (ulint)os_awe_window);
- ut_a(0);
- }
-
- if (!(ptr <= os_awe_window + os_awe_window_size - UNIV_PAGE_SIZE)) {
- fprintf(stderr,
- "InnoDB: AWE: Error: trying to map to address %lx"
- " but AWE window end %lx\n",
- (ulint)ptr, (ulint)os_awe_window + os_awe_window_size);
- ut_a(0);
- }
-
- if (!(page_info >= os_awe_page_info)) {
- fprintf(stderr,
- "InnoDB: AWE: Error: trying to map page info"
- " at %lx but array start %lx\n",
- (ulint)page_info, (ulint)os_awe_page_info);
- ut_a(0);
- }
-
- if (!(page_info <= os_awe_page_info + (os_awe_n_pages - 4))) {
- fprintf(stderr,
- "InnoDB: AWE: Error: trying to map page info"
- " at %lx but array end %lx\n",
- (ulint)page_info,
- (ulint)(os_awe_page_info + os_awe_n_pages));
- ut_a(0);
- }
-
- bResult = MapUserPhysicalPages((PVOID)ptr, n_pages, page_info);
-
- if (bResult != TRUE) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: AWE: Mapping of %lu physical pages"
- " to address %lx failed,\n"
- "InnoDB: error %lu.\n"
- "InnoDB: Cannot continue operation.\n",
- n_mem_pages, (ulint)ptr, (ulint)GetLastError());
- exit(1);
- }
-
- return(TRUE);
-#else
- UT_NOT_USED(ptr);
- UT_NOT_USED(n_mem_pages);
- UT_NOT_USED(page_info);
-
- return(FALSE);
-#endif
-}
-
-/********************************************************************
-Converts the current process id to a number. It is not guaranteed that the
-number is unique. In Linux returns the 'process number' of the current
-thread. That number is the same as one sees in 'top', for example. In Linux
-the thread id is not the same as one sees in 'top'. */
-
-ulint
-os_proc_get_number(void)
-/*====================*/
-{
-#ifdef __WIN__
- return((ulint)GetCurrentProcessId());
-#else
- return((ulint)getpid());
-#endif
-}
-
-/********************************************************************
-Allocates non-cacheable memory. */
-
-void*
-os_mem_alloc_nocache(
-/*=================*/
- /* out: allocated memory */
- ulint n) /* in: number of bytes */
-{
-#ifdef __WIN__
- void* ptr;
-
- ptr = VirtualAlloc(NULL, n, MEM_COMMIT,
- PAGE_READWRITE | PAGE_NOCACHE);
- ut_a(ptr);
-
- return(ptr);
-#else
- return(ut_malloc(n));
-#endif
-}
-
-/********************************************************************
-Allocates large pages memory. */
-
-void*
-os_mem_alloc_large(
-/*===============*/
- /* out: allocated memory */
- ulint n, /* in: number of bytes */
- ibool set_to_zero, /* in: TRUE if allocated memory
- should be set to zero if
- UNIV_SET_MEM_TO_ZERO is defined */
- ibool assert_on_error)/* in: if TRUE, we crash mysqld if
- the memory cannot be allocated */
-{
-#ifdef HAVE_LARGE_PAGES
- ulint size;
- int shmid;
- void *ptr = NULL;
- struct shmid_ds buf;
-
- if (!os_use_large_pages || !os_large_page_size) {
- goto skip;
- }
-
-#ifdef UNIV_LINUX
- /* Align block size to os_large_page_size */
- size = ((n - 1) & ~(os_large_page_size - 1)) + os_large_page_size;
-
- shmid = shmget(IPC_PRIVATE, (size_t)size, SHM_HUGETLB | SHM_R | SHM_W);
- if (shmid < 0) {
- fprintf(stderr, "InnoDB: HugeTLB: Warning: Failed to allocate"
- " %lu bytes. errno %d\n", n, errno);
- } else {
- ptr = shmat(shmid, NULL, 0);
- if (ptr == (void *)-1) {
- fprintf(stderr, "InnoDB: HugeTLB: Warning: Failed to"
- " attach shared memory segment, errno %d\n",
- errno);
- }
-
- /* Remove the shared memory segment so that it will be
- automatically freed after memory is detached or
- process exits */
- shmctl(shmid, IPC_RMID, &buf);
- }
-#endif
-
- if (ptr) {
- if (set_to_zero) {
-#ifdef UNIV_SET_MEM_TO_ZERO
- memset(ptr, '\0', size);
-#endif
- }
-
- return(ptr);
- }
-
- fprintf(stderr, "InnoDB HugeTLB: Warning: Using conventional"
- " memory pool\n");
-skip:
-#endif /* HAVE_LARGE_PAGES */
-
- return(ut_malloc_low(n, set_to_zero, assert_on_error));
-}
-
-/********************************************************************
-Frees large pages memory. */
-
-void
-os_mem_free_large(
-/*==============*/
- void *ptr) /* in: number of bytes */
-{
-#ifdef HAVE_LARGE_PAGES
- if (os_use_large_pages && os_large_page_size
-#ifdef UNIV_LINUX
- && !shmdt(ptr)
-#endif
- ) {
- return;
- }
-#endif
-
- ut_free(ptr);
-}
-
-/********************************************************************
-Sets the priority boost for threads released from waiting within the current
-process. */
-
-void
-os_process_set_priority_boost(
-/*==========================*/
- ibool do_boost) /* in: TRUE if priority boost should be done,
- FALSE if not */
-{
-#ifdef __WIN__
- ibool no_boost;
-
- if (do_boost) {
- no_boost = FALSE;
- } else {
- no_boost = TRUE;
- }
-
-#if TRUE != 1
-# error "TRUE != 1"
-#endif
-
- /* Does not do anything currently!
- SetProcessPriorityBoost(GetCurrentProcess(), no_boost);
- */
- fputs("Warning: process priority boost setting"
- " currently not functional!\n",
- stderr);
-#else
- UT_NOT_USED(do_boost);
-#endif
-}
diff --git a/storage/innobase/os/os0sync.c b/storage/innobase/os/os0sync.c
deleted file mode 100644
index 18fd38f3f9b..00000000000
--- a/storage/innobase/os/os0sync.c
+++ /dev/null
@@ -1,753 +0,0 @@
-/******************************************************
-The interface to the operating system
-synchronization primitives.
-
-(c) 1995 Innobase Oy
-
-Created 9/6/1995 Heikki Tuuri
-*******************************************************/
-
-#include "os0sync.h"
-#ifdef UNIV_NONINL
-#include "os0sync.ic"
-#endif
-
-#ifdef __WIN__
-#include <windows.h>
-#endif
-
-#include "ut0mem.h"
-#include "srv0start.h"
-
-/* Type definition for an operating system mutex struct */
-struct os_mutex_struct{
- os_event_t event; /* Used by sync0arr.c for queing threads */
- void* handle; /* OS handle to mutex */
- ulint count; /* we use this counter to check
- that the same thread does not
- recursively lock the mutex: we
- do not assume that the OS mutex
- supports recursive locking, though
- NT seems to do that */
- UT_LIST_NODE_T(os_mutex_str_t) os_mutex_list;
- /* list of all 'slow' OS mutexes created */
-};
-
-/* Mutex protecting counts and the lists of OS mutexes and events */
-os_mutex_t os_sync_mutex;
-ibool os_sync_mutex_inited = FALSE;
-ibool os_sync_free_called = FALSE;
-
-/* This is incremented by 1 in os_thread_create and decremented by 1 in
-os_thread_exit */
-ulint os_thread_count = 0;
-
-/* The list of all events created */
-UT_LIST_BASE_NODE_T(os_event_struct_t) os_event_list;
-
-/* The list of all OS 'slow' mutexes */
-UT_LIST_BASE_NODE_T(os_mutex_str_t) os_mutex_list;
-
-ulint os_event_count = 0;
-ulint os_mutex_count = 0;
-ulint os_fast_mutex_count = 0;
-
-/* Because a mutex is embedded inside an event and there is an
-event embedded inside a mutex, on free, this generates a recursive call.
-This version of the free event function doesn't acquire the global lock */
-static void os_event_free_internal(os_event_t event);
-
-/*************************************************************
-Initializes global event and OS 'slow' mutex lists. */
-
-void
-os_sync_init(void)
-/*==============*/
-{
- UT_LIST_INIT(os_event_list);
- UT_LIST_INIT(os_mutex_list);
-
- os_sync_mutex = os_mutex_create(NULL);
-
- os_sync_mutex_inited = TRUE;
-}
-
-/*************************************************************
-Frees created events and OS 'slow' mutexes. */
-
-void
-os_sync_free(void)
-/*==============*/
-{
- os_event_t event;
- os_mutex_t mutex;
-
- os_sync_free_called = TRUE;
- event = UT_LIST_GET_FIRST(os_event_list);
-
- while (event) {
-
- os_event_free(event);
-
- event = UT_LIST_GET_FIRST(os_event_list);
- }
-
- mutex = UT_LIST_GET_FIRST(os_mutex_list);
-
- while (mutex) {
- if (mutex == os_sync_mutex) {
- /* Set the flag to FALSE so that we do not try to
- reserve os_sync_mutex any more in remaining freeing
- operations in shutdown */
- os_sync_mutex_inited = FALSE;
- }
-
- os_mutex_free(mutex);
-
- mutex = UT_LIST_GET_FIRST(os_mutex_list);
- }
- os_sync_free_called = FALSE;
-}
-
-/*************************************************************
-Creates an event semaphore, i.e., a semaphore which may just have two
-states: signaled and nonsignaled. The created event is manual reset: it
-must be reset explicitly by calling sync_os_reset_event. */
-
-os_event_t
-os_event_create(
-/*============*/
- /* out: the event handle */
- const char* name) /* in: the name of the event, if NULL
- the event is created without a name */
-{
-#ifdef __WIN__
- os_event_t event;
-
- event = ut_malloc(sizeof(struct os_event_struct));
-
- event->handle = CreateEvent(NULL, /* No security attributes */
- TRUE, /* Manual reset */
- FALSE, /* Initial state nonsignaled */
- (LPCTSTR) name);
- if (!event->handle) {
- fprintf(stderr,
- "InnoDB: Could not create a Windows event semaphore;"
- " Windows error %lu\n",
- (ulong) GetLastError());
- }
-#else /* Unix */
- os_event_t event;
-
- UT_NOT_USED(name);
-
- event = ut_malloc(sizeof(struct os_event_struct));
-
- os_fast_mutex_init(&(event->os_mutex));
-
-#if defined(UNIV_HOTBACKUP) && defined(UNIV_HPUX10)
- ut_a(0 == pthread_cond_init(&(event->cond_var),
- pthread_condattr_default));
-#else
- ut_a(0 == pthread_cond_init(&(event->cond_var), NULL));
-#endif
- event->is_set = FALSE;
-
- /* We return this value in os_event_reset(), which can then be
- be used to pass to the os_event_wait_low(). The value of zero
- is reserved in os_event_wait_low() for the case when the
- caller does not want to pass any signal_count value. To
- distinguish between the two cases we initialize signal_count
- to 1 here. */
- event->signal_count = 1;
-#endif /* __WIN__ */
-
- /* The os_sync_mutex can be NULL because during startup an event
- can be created [ because it's embedded in the mutex/rwlock ] before
- this module has been initialized */
- if (os_sync_mutex != NULL) {
- os_mutex_enter(os_sync_mutex);
- }
-
- /* Put to the list of events */
- UT_LIST_ADD_FIRST(os_event_list, os_event_list, event);
-
- os_event_count++;
-
- if (os_sync_mutex != NULL) {
- os_mutex_exit(os_sync_mutex);
- }
-
- return(event);
-}
-
-#ifdef __WIN__
-/*************************************************************
-Creates an auto-reset event semaphore, i.e., an event which is automatically
-reset when a single thread is released. Works only in Windows. */
-
-os_event_t
-os_event_create_auto(
-/*=================*/
- /* out: the event handle */
- const char* name) /* in: the name of the event, if NULL
- the event is created without a name */
-{
- os_event_t event;
-
- event = ut_malloc(sizeof(struct os_event_struct));
-
- event->handle = CreateEvent(NULL, /* No security attributes */
- FALSE, /* Auto-reset */
- FALSE, /* Initial state nonsignaled */
- (LPCTSTR) name);
-
- if (!event->handle) {
- fprintf(stderr,
- "InnoDB: Could not create a Windows auto"
- " event semaphore; Windows error %lu\n",
- (ulong) GetLastError());
- }
-
- /* Put to the list of events */
- os_mutex_enter(os_sync_mutex);
-
- UT_LIST_ADD_FIRST(os_event_list, os_event_list, event);
-
- os_event_count++;
-
- os_mutex_exit(os_sync_mutex);
-
- return(event);
-}
-#endif
-
-/**************************************************************
-Sets an event semaphore to the signaled state: lets waiting threads
-proceed. */
-
-void
-os_event_set(
-/*=========*/
- os_event_t event) /* in: event to set */
-{
-#ifdef __WIN__
- ut_a(event);
- ut_a(SetEvent(event->handle));
-#else
- ut_a(event);
-
- os_fast_mutex_lock(&(event->os_mutex));
-
- if (event->is_set) {
- /* Do nothing */
- } else {
- event->is_set = TRUE;
- event->signal_count += 1;
- ut_a(0 == pthread_cond_broadcast(&(event->cond_var)));
- }
-
- os_fast_mutex_unlock(&(event->os_mutex));
-#endif
-}
-
-/**************************************************************
-Resets an event semaphore to the nonsignaled state. Waiting threads will
-stop to wait for the event.
-The return value should be passed to os_even_wait_low() if it is desired
-that this thread should not wait in case of an intervening call to
-os_event_set() between this os_event_reset() and the
-os_event_wait_low() call. See comments for os_event_wait_low(). */
-
-ib_longlong
-os_event_reset(
-/*===========*/
- /* out: current signal_count. */
- os_event_t event) /* in: event to reset */
-{
- ib_longlong ret = 0;
-
-#ifdef __WIN__
- ut_a(event);
-
- ut_a(ResetEvent(event->handle));
-#else
- ut_a(event);
-
- os_fast_mutex_lock(&(event->os_mutex));
-
- if (!event->is_set) {
- /* Do nothing */
- } else {
- event->is_set = FALSE;
- }
- ret = event->signal_count;
-
- os_fast_mutex_unlock(&(event->os_mutex));
-#endif
- return(ret);
-}
-
-/**************************************************************
-Frees an event object, without acquiring the global lock. */
-static
-void
-os_event_free_internal(
-/*===================*/
- os_event_t event) /* in: event to free */
-{
-#ifdef __WIN__
- ut_a(event);
-
- ut_a(CloseHandle(event->handle));
-#else
- ut_a(event);
-
- /* This is to avoid freeing the mutex twice */
- os_fast_mutex_free(&(event->os_mutex));
-
- ut_a(0 == pthread_cond_destroy(&(event->cond_var)));
-#endif
- /* Remove from the list of events */
-
- UT_LIST_REMOVE(os_event_list, os_event_list, event);
-
- os_event_count--;
-
- ut_free(event);
-}
-
-/**************************************************************
-Frees an event object. */
-
-void
-os_event_free(
-/*==========*/
- os_event_t event) /* in: event to free */
-
-{
-#ifdef __WIN__
- ut_a(event);
-
- ut_a(CloseHandle(event->handle));
-#else
- ut_a(event);
-
- os_fast_mutex_free(&(event->os_mutex));
- ut_a(0 == pthread_cond_destroy(&(event->cond_var)));
-#endif
- /* Remove from the list of events */
-
- os_mutex_enter(os_sync_mutex);
-
- UT_LIST_REMOVE(os_event_list, os_event_list, event);
-
- os_event_count--;
-
- os_mutex_exit(os_sync_mutex);
-
- ut_free(event);
-}
-
-/**************************************************************
-Waits for an event object until it is in the signaled state. If
-srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS this also exits the
-waiting thread when the event becomes signaled (or immediately if the
-event is already in the signaled state).
-
-Typically, if the event has been signalled after the os_event_reset()
-we'll return immediately because event->is_set == TRUE.
-There are, however, situations (e.g.: sync_array code) where we may
-lose this information. For example:
-
-thread A calls os_event_reset()
-thread B calls os_event_set() [event->is_set == TRUE]
-thread C calls os_event_reset() [event->is_set == FALSE]
-thread A calls os_event_wait() [infinite wait!]
-thread C calls os_event_wait() [infinite wait!]
-
-Where such a scenario is possible, to avoid infinite wait, the
-value returned by os_event_reset() should be passed in as
-reset_sig_count. */
-
-void
-os_event_wait_low(
-/*==============*/
- os_event_t event, /* in: event to wait */
- ib_longlong reset_sig_count)/* in: zero or the value
- returned by previous call of
- os_event_reset(). */
-{
-#ifdef __WIN__
- DWORD err;
-
- ut_a(event);
-
- UT_NOT_USED(reset_sig_count);
-
- /* Specify an infinite time limit for waiting */
- err = WaitForSingleObject(event->handle, INFINITE);
-
- ut_a(err == WAIT_OBJECT_0);
-
- if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
- os_thread_exit(NULL);
- }
-#else
- ib_longlong old_signal_count;
-
- os_fast_mutex_lock(&(event->os_mutex));
-
- if (reset_sig_count) {
- old_signal_count = reset_sig_count;
- } else {
- old_signal_count = event->signal_count;
- }
-
- for (;;) {
- if (event->is_set == TRUE
- || event->signal_count != old_signal_count) {
-
- os_fast_mutex_unlock(&(event->os_mutex));
-
- if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
-
- os_thread_exit(NULL);
- }
- /* Ok, we may return */
-
- return;
- }
-
- pthread_cond_wait(&(event->cond_var), &(event->os_mutex));
-
- /* Solaris manual said that spurious wakeups may occur: we
- have to check if the event really has been signaled after
- we came here to wait */
- }
-#endif
-}
-
-/**************************************************************
-Waits for an event object until it is in the signaled state or
-a timeout is exceeded. In Unix the timeout is always infinite. */
-
-ulint
-os_event_wait_time(
-/*===============*/
- /* out: 0 if success, OS_SYNC_TIME_EXCEEDED if
- timeout was exceeded */
- os_event_t event, /* in: event to wait */
- ulint time) /* in: timeout in microseconds, or
- OS_SYNC_INFINITE_TIME */
-{
-#ifdef __WIN__
- DWORD err;
-
- ut_a(event);
-
- if (time != OS_SYNC_INFINITE_TIME) {
- err = WaitForSingleObject(event->handle, (DWORD) time / 1000);
- } else {
- err = WaitForSingleObject(event->handle, INFINITE);
- }
-
- if (err == WAIT_OBJECT_0) {
-
- return(0);
- } else if (err == WAIT_TIMEOUT) {
-
- return(OS_SYNC_TIME_EXCEEDED);
- } else {
- ut_error;
- return(1000000); /* dummy value to eliminate compiler warn. */
- }
-#else
- UT_NOT_USED(time);
-
- /* In Posix this is just an ordinary, infinite wait */
-
- os_event_wait(event);
-
- return(0);
-#endif
-}
-
-#ifdef __WIN__
-/**************************************************************
-Waits for any event in an OS native event array. Returns if even a single
-one is signaled or becomes signaled. */
-
-ulint
-os_event_wait_multiple(
-/*===================*/
- /* out: index of the event
- which was signaled */
- ulint n, /* in: number of events in the
- array */
- os_native_event_t* native_event_array)
- /* in: pointer to an array of event
- handles */
-{
- DWORD index;
-
- ut_a(native_event_array);
- ut_a(n > 0);
-
- index = WaitForMultipleObjects((DWORD) n, native_event_array,
- FALSE, /* Wait for any 1 event */
- INFINITE); /* Infinite wait time
- limit */
- ut_a(index >= WAIT_OBJECT_0); /* NOTE: Pointless comparision */
- ut_a(index < WAIT_OBJECT_0 + n);
-
- if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
- os_thread_exit(NULL);
- }
-
- return(index - WAIT_OBJECT_0);
-}
-#endif
-
-/*************************************************************
-Creates an operating system mutex semaphore. Because these are slow, the
-mutex semaphore of InnoDB itself (mutex_t) should be used where possible. */
-
-os_mutex_t
-os_mutex_create(
-/*============*/
- /* out: the mutex handle */
- const char* name) /* in: the name of the mutex, if NULL
- the mutex is created without a name */
-{
-#ifdef __WIN__
- HANDLE mutex;
- os_mutex_t mutex_str;
-
- mutex = CreateMutex(NULL, /* No security attributes */
- FALSE, /* Initial state: no owner */
- (LPCTSTR) name);
- ut_a(mutex);
-#else
- os_fast_mutex_t* mutex;
- os_mutex_t mutex_str;
-
- UT_NOT_USED(name);
-
- mutex = ut_malloc(sizeof(os_fast_mutex_t));
-
- os_fast_mutex_init(mutex);
-#endif
- mutex_str = ut_malloc(sizeof(os_mutex_str_t));
-
- mutex_str->handle = mutex;
- mutex_str->count = 0;
- mutex_str->event = os_event_create(NULL);
-
- if (os_sync_mutex_inited) {
- /* When creating os_sync_mutex itself we cannot reserve it */
- os_mutex_enter(os_sync_mutex);
- }
-
- UT_LIST_ADD_FIRST(os_mutex_list, os_mutex_list, mutex_str);
-
- os_mutex_count++;
-
- if (os_sync_mutex_inited) {
- os_mutex_exit(os_sync_mutex);
- }
-
- return(mutex_str);
-}
-
-/**************************************************************
-Acquires ownership of a mutex semaphore. */
-
-void
-os_mutex_enter(
-/*===========*/
- os_mutex_t mutex) /* in: mutex to acquire */
-{
-#ifdef __WIN__
- DWORD err;
-
- ut_a(mutex);
-
- /* Specify infinite time limit for waiting */
- err = WaitForSingleObject(mutex->handle, INFINITE);
-
- ut_a(err == WAIT_OBJECT_0);
-
- (mutex->count)++;
- ut_a(mutex->count == 1);
-#else
- os_fast_mutex_lock(mutex->handle);
-
- (mutex->count)++;
-
- ut_a(mutex->count == 1);
-#endif
-}
-
-/**************************************************************
-Releases ownership of a mutex. */
-
-void
-os_mutex_exit(
-/*==========*/
- os_mutex_t mutex) /* in: mutex to release */
-{
- ut_a(mutex);
-
- ut_a(mutex->count == 1);
-
- (mutex->count)--;
-#ifdef __WIN__
- ut_a(ReleaseMutex(mutex->handle));
-#else
- os_fast_mutex_unlock(mutex->handle);
-#endif
-}
-
-/**************************************************************
-Frees a mutex object. */
-
-void
-os_mutex_free(
-/*==========*/
- os_mutex_t mutex) /* in: mutex to free */
-{
- ut_a(mutex);
-
- if (!os_sync_free_called) {
- os_event_free_internal(mutex->event);
- }
-
- if (os_sync_mutex_inited) {
- os_mutex_enter(os_sync_mutex);
- }
-
- UT_LIST_REMOVE(os_mutex_list, os_mutex_list, mutex);
-
- os_mutex_count--;
-
- if (os_sync_mutex_inited) {
- os_mutex_exit(os_sync_mutex);
- }
-
-#ifdef __WIN__
- ut_a(CloseHandle(mutex->handle));
-
- ut_free(mutex);
-#else
- os_fast_mutex_free(mutex->handle);
- ut_free(mutex->handle);
- ut_free(mutex);
-#endif
-}
-
-/*************************************************************
-Initializes an operating system fast mutex semaphore. */
-
-void
-os_fast_mutex_init(
-/*===============*/
- os_fast_mutex_t* fast_mutex) /* in: fast mutex */
-{
-#ifdef __WIN__
- ut_a(fast_mutex);
-
- InitializeCriticalSection((LPCRITICAL_SECTION) fast_mutex);
-#else
-#if defined(UNIV_HOTBACKUP) && defined(UNIV_HPUX10)
- ut_a(0 == pthread_mutex_init(fast_mutex, pthread_mutexattr_default));
-#else
- ut_a(0 == pthread_mutex_init(fast_mutex, MY_MUTEX_INIT_FAST));
-#endif
-#endif
- if (os_sync_mutex_inited) {
- /* When creating os_sync_mutex itself (in Unix) we cannot
- reserve it */
-
- os_mutex_enter(os_sync_mutex);
- }
-
- os_fast_mutex_count++;
-
- if (os_sync_mutex_inited) {
- os_mutex_exit(os_sync_mutex);
- }
-}
-
-/**************************************************************
-Acquires ownership of a fast mutex. */
-
-void
-os_fast_mutex_lock(
-/*===============*/
- os_fast_mutex_t* fast_mutex) /* in: mutex to acquire */
-{
-#ifdef __WIN__
- EnterCriticalSection((LPCRITICAL_SECTION) fast_mutex);
-#else
- pthread_mutex_lock(fast_mutex);
-#endif
-}
-
-/**************************************************************
-Releases ownership of a fast mutex. */
-
-void
-os_fast_mutex_unlock(
-/*=================*/
- os_fast_mutex_t* fast_mutex) /* in: mutex to release */
-{
-#ifdef __WIN__
- LeaveCriticalSection(fast_mutex);
-#else
- pthread_mutex_unlock(fast_mutex);
-#endif
-}
-
-/**************************************************************
-Frees a mutex object. */
-
-void
-os_fast_mutex_free(
-/*===============*/
- os_fast_mutex_t* fast_mutex) /* in: mutex to free */
-{
-#ifdef __WIN__
- ut_a(fast_mutex);
-
- DeleteCriticalSection((LPCRITICAL_SECTION) fast_mutex);
-#else
- int ret;
-
- ret = pthread_mutex_destroy(fast_mutex);
-
- if (ret != 0) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: error: return value %lu when calling\n"
- "InnoDB: pthread_mutex_destroy().\n", (ulint)ret);
- fprintf(stderr,
- "InnoDB: Byte contents of the pthread mutex at %p:\n",
- (void*) fast_mutex);
- ut_print_buf(stderr, fast_mutex, sizeof(os_fast_mutex_t));
- fprintf(stderr, "\n");
- }
-#endif
- if (os_sync_mutex_inited) {
- /* When freeing the last mutexes, we have
- already freed os_sync_mutex */
-
- os_mutex_enter(os_sync_mutex);
- }
-
- os_fast_mutex_count--;
-
- if (os_sync_mutex_inited) {
- os_mutex_exit(os_sync_mutex);
- }
-}
diff --git a/storage/innobase/os/os0thread.c b/storage/innobase/os/os0thread.c
deleted file mode 100644
index a0b1e51d359..00000000000
--- a/storage/innobase/os/os0thread.c
+++ /dev/null
@@ -1,358 +0,0 @@
-/******************************************************
-The interface to the operating system thread control primitives
-
-(c) 1995 Innobase Oy
-
-Created 9/8/1995 Heikki Tuuri
-*******************************************************/
-
-#include "os0thread.h"
-#ifdef UNIV_NONINL
-#include "os0thread.ic"
-#endif
-
-#ifdef __WIN__
-#include <windows.h>
-#endif
-
-#include "srv0srv.h"
-#include "os0sync.h"
-
-/*******************************************************************
-Compares two thread ids for equality. */
-
-ibool
-os_thread_eq(
-/*=========*/
- /* out: TRUE if equal */
- os_thread_id_t a, /* in: OS thread or thread id */
- os_thread_id_t b) /* in: OS thread or thread id */
-{
-#ifdef __WIN__
- if (a == b) {
- return(TRUE);
- }
-
- return(FALSE);
-#else
- if (pthread_equal(a, b)) {
- return(TRUE);
- }
-
- return(FALSE);
-#endif
-}
-
-/********************************************************************
-Converts an OS thread id to a ulint. It is NOT guaranteed that the ulint is
-unique for the thread though! */
-
-ulint
-os_thread_pf(
-/*=========*/
- os_thread_id_t a)
-{
-#ifdef UNIV_HPUX10
- /* In HP-UX-10.20 a pthread_t is a struct of 3 fields: field1, field2,
- field3. We do not know if field1 determines the thread uniquely. */
-
- return((ulint)(a.field1));
-#else
- return((ulint)a);
-#endif
-}
-
-/*********************************************************************
-Returns the thread identifier of current thread. Currently the thread
-identifier in Unix is the thread handle itself. Note that in HP-UX
-pthread_t is a struct of 3 fields. */
-
-os_thread_id_t
-os_thread_get_curr_id(void)
-/*=======================*/
-{
-#ifdef __WIN__
- return(GetCurrentThreadId());
-#else
- return(pthread_self());
-#endif
-}
-
-/********************************************************************
-Creates a new thread of execution. The execution starts from
-the function given. The start function takes a void* parameter
-and returns an ulint. */
-
-os_thread_t
-os_thread_create(
-/*=============*/
- /* out: handle to the thread */
-#ifndef __WIN__
- os_posix_f_t start_f,
-#else
- ulint (*start_f)(void*), /* in: pointer to function
- from which to start */
-#endif
- void* arg, /* in: argument to start
- function */
- os_thread_id_t* thread_id) /* out: id of the created
- thread, or NULL */
-{
-#ifdef __WIN__
- os_thread_t thread;
- DWORD win_thread_id;
-
- os_mutex_enter(os_sync_mutex);
- os_thread_count++;
- os_mutex_exit(os_sync_mutex);
-
- thread = CreateThread(NULL, /* no security attributes */
- 0, /* default size stack */
- (LPTHREAD_START_ROUTINE)start_f,
- arg,
- 0, /* thread runs immediately */
- &win_thread_id);
-
- if (srv_set_thread_priorities) {
-
- /* Set created thread priority the same as a normal query
- in MYSQL: we try to prevent starvation of threads by
- assigning same priority QUERY_PRIOR to all */
-
- ut_a(SetThreadPriority(thread, srv_query_thread_priority));
- }
-
- if (thread_id) {
- *thread_id = win_thread_id;
- }
-
- return(thread);
-#else
- int ret;
- os_thread_t pthread;
- pthread_attr_t attr;
-
-#if !(defined(UNIV_HOTBACKUP) && defined(UNIV_HPUX10))
- pthread_attr_init(&attr);
-#endif
-
-#ifdef UNIV_AIX
- /* We must make sure a thread stack is at least 32 kB, otherwise
- InnoDB might crash; we do not know if the default stack size on
- AIX is always big enough. An empirical test on AIX-4.3 suggested
- the size was 96 kB, though. */
-
- ret = pthread_attr_setstacksize(&attr,
- (size_t)(PTHREAD_STACK_MIN
- + 32 * 1024));
- if (ret) {
- fprintf(stderr,
- "InnoDB: Error: pthread_attr_setstacksize"
- " returned %d\n", ret);
- exit(1);
- }
-#endif
-#ifdef __NETWARE__
- ret = pthread_attr_setstacksize(&attr,
- (size_t) NW_THD_STACKSIZE);
- if (ret) {
- fprintf(stderr,
- "InnoDB: Error: pthread_attr_setstacksize"
- " returned %d\n", ret);
- exit(1);
- }
-#endif
- os_mutex_enter(os_sync_mutex);
- os_thread_count++;
- os_mutex_exit(os_sync_mutex);
-
-#if defined(UNIV_HOTBACKUP) && defined(UNIV_HPUX10)
- ret = pthread_create(&pthread, pthread_attr_default, start_f, arg);
-#else
- ret = pthread_create(&pthread, &attr, start_f, arg);
-#endif
- if (ret) {
- fprintf(stderr,
- "InnoDB: Error: pthread_create returned %d\n", ret);
- exit(1);
- }
-
-#if !(defined(UNIV_HOTBACKUP) && defined(UNIV_HPUX10))
- pthread_attr_destroy(&attr);
-#endif
- if (srv_set_thread_priorities) {
-
- my_pthread_setprio(pthread, srv_query_thread_priority);
- }
-
- if (thread_id) {
- *thread_id = pthread;
- }
-
- return(pthread);
-#endif
-}
-
-/*********************************************************************
-Exits the current thread. */
-
-void
-os_thread_exit(
-/*===========*/
- void* exit_value) /* in: exit value; in Windows this void*
- is cast as a DWORD */
-{
-#ifdef UNIV_DEBUG_THREAD_CREATION
- fprintf(stderr, "Thread exits, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
-#endif
- os_mutex_enter(os_sync_mutex);
- os_thread_count--;
- os_mutex_exit(os_sync_mutex);
-
-#ifdef __WIN__
- ExitThread((DWORD)exit_value);
-#else
- pthread_exit(exit_value);
-#endif
-}
-
-#ifdef HAVE_PTHREAD_JOIN
-int
-os_thread_join(
-/*===========*/
- os_thread_id_t thread_id) /* in: id of the thread to join */
-{
- return(pthread_join(thread_id, NULL));
-}
-#endif
-/*********************************************************************
-Returns handle to the current thread. */
-
-os_thread_t
-os_thread_get_curr(void)
-/*====================*/
-{
-#ifdef __WIN__
- return(GetCurrentThread());
-#else
- return(pthread_self());
-#endif
-}
-
-/*********************************************************************
-Advises the os to give up remainder of the thread's time slice. */
-
-void
-os_thread_yield(void)
-/*=================*/
-{
-#if defined(__WIN__)
- Sleep(0);
-#elif (defined(HAVE_SCHED_YIELD) && defined(HAVE_SCHED_H))
- sched_yield();
-#elif defined(HAVE_PTHREAD_YIELD_ZERO_ARG)
- pthread_yield();
-#elif defined(HAVE_PTHREAD_YIELD_ONE_ARG)
- pthread_yield(0);
-#else
- os_thread_sleep(0);
-#endif
-}
-
-/*********************************************************************
-The thread sleeps at least the time given in microseconds. */
-
-void
-os_thread_sleep(
-/*============*/
- ulint tm) /* in: time in microseconds */
-{
-#ifdef __WIN__
- Sleep((DWORD) tm / 1000);
-#elif defined(__NETWARE__)
- delay(tm / 1000);
-#else
- struct timeval t;
-
- t.tv_sec = tm / 1000000;
- t.tv_usec = tm % 1000000;
-
- select(0, NULL, NULL, NULL, &t);
-#endif
-}
-
-/**********************************************************************
-Sets a thread priority. */
-
-void
-os_thread_set_priority(
-/*===================*/
- os_thread_t handle, /* in: OS handle to the thread */
- ulint pri) /* in: priority */
-{
-#ifdef __WIN__
- int os_pri;
-
- if (pri == OS_THREAD_PRIORITY_BACKGROUND) {
- os_pri = THREAD_PRIORITY_BELOW_NORMAL;
- } else if (pri == OS_THREAD_PRIORITY_NORMAL) {
- os_pri = THREAD_PRIORITY_NORMAL;
- } else if (pri == OS_THREAD_PRIORITY_ABOVE_NORMAL) {
- os_pri = THREAD_PRIORITY_HIGHEST;
- } else {
- ut_error;
- }
-
- ut_a(SetThreadPriority(handle, os_pri));
-#else
- UT_NOT_USED(handle);
- UT_NOT_USED(pri);
-#endif
-}
-
-/**********************************************************************
-Gets a thread priority. */
-
-ulint
-os_thread_get_priority(
-/*===================*/
- /* out: priority */
- os_thread_t handle __attribute__((unused)))
- /* in: OS handle to the thread */
-{
-#ifdef __WIN__
- int os_pri;
- ulint pri;
-
- os_pri = GetThreadPriority(handle);
-
- if (os_pri == THREAD_PRIORITY_BELOW_NORMAL) {
- pri = OS_THREAD_PRIORITY_BACKGROUND;
- } else if (os_pri == THREAD_PRIORITY_NORMAL) {
- pri = OS_THREAD_PRIORITY_NORMAL;
- } else if (os_pri == THREAD_PRIORITY_HIGHEST) {
- pri = OS_THREAD_PRIORITY_ABOVE_NORMAL;
- } else {
- ut_error;
- }
-
- return(pri);
-#else
- return(0);
-#endif
-}
-
-/**********************************************************************
-Gets the last operating system error code for the calling thread. */
-
-ulint
-os_thread_get_last_error(void)
-/*==========================*/
-{
-#ifdef __WIN__
- return(GetLastError());
-#else
- return(0);
-#endif
-}
diff --git a/storage/innobase/page/page0cur.c b/storage/innobase/page/page0cur.c
deleted file mode 100644
index 70b7de194fd..00000000000
--- a/storage/innobase/page/page0cur.c
+++ /dev/null
@@ -1,1510 +0,0 @@
-/************************************************************************
-The page cursor
-
-(c) 1994-1996 Innobase Oy
-
-Created 10/4/1994 Heikki Tuuri
-*************************************************************************/
-
-#include "page0cur.h"
-#ifdef UNIV_NONINL
-#include "page0cur.ic"
-#endif
-
-#include "rem0cmp.h"
-#include "mtr0log.h"
-#include "log0recv.h"
-#include "rem0cmp.h"
-#include "srv0srv.h"
-#include "ut0ut.h"
-
-static ulint page_rnd = 976722341;
-
-#ifdef PAGE_CUR_ADAPT
-# ifdef UNIV_SEARCH_PERF_STAT
-ulint page_cur_short_succ = 0;
-# endif /* UNIV_SEARCH_PERF_STAT */
-
-/***********************************************************************
-This is a linear congruential generator PRNG. Returns a pseudo random
-number between 0 and 2^64-1 inclusive. The formula and the constants
-being used are:
-X[n+1] = (a * X[n] + c) mod m
-where:
-X[0] = ut_usectime()
-a = 1103515245 (3^5 * 5 * 7 * 129749)
-c = 12345 (3 * 5 * 823)
-m = 18446744073709551616 (2^64)
-*/
-#define LCG_a 1103515245
-#define LCG_c 12345
-static
-unsigned long long
-page_cur_lcg_prng()
-/*===============*/
- /* out: number between 0 and 2^64-1 */
-{
- static unsigned long long lcg_current = 0;
- static ibool initialized = FALSE;
- ulint time_sec;
- ulint time_ms;
-
- if (!initialized) {
- ut_usectime(&time_sec, &time_ms);
- lcg_current = (unsigned long long) (time_sec * 1000000
- + time_ms);
- initialized = TRUE;
- }
-
- /* no need to "% 2^64" explicitly because lcg_current is
- 64 bit and this will be done anyway */
- lcg_current = LCG_a * lcg_current + LCG_c;
-
- return(lcg_current);
-}
-
-/********************************************************************
-Tries a search shortcut based on the last insert. */
-UNIV_INLINE
-ibool
-page_cur_try_search_shortcut(
-/*=========================*/
- /* out: TRUE on success */
- page_t* page, /* in: index page */
- dict_index_t* index, /* in: record descriptor */
- dtuple_t* tuple, /* in: data tuple */
- ulint* iup_matched_fields,
- /* in/out: already matched fields in upper
- limit record */
- ulint* iup_matched_bytes,
- /* in/out: already matched bytes in a field
- not yet completely matched */
- ulint* ilow_matched_fields,
- /* in/out: already matched fields in lower
- limit record */
- ulint* ilow_matched_bytes,
- /* in/out: already matched bytes in a field
- not yet completely matched */
- page_cur_t* cursor) /* out: page cursor */
-{
- rec_t* rec;
- rec_t* next_rec;
- ulint low_match;
- ulint low_bytes;
- ulint up_match;
- ulint up_bytes;
-#ifdef UNIV_SEARCH_DEBUG
- page_cur_t cursor2;
-#endif
- ibool success = FALSE;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- ut_ad(dtuple_check_typed(tuple));
-
- rec = page_header_get_ptr(page, PAGE_LAST_INSERT);
- offsets = rec_get_offsets(rec, index, offsets,
- dtuple_get_n_fields(tuple), &heap);
-
- ut_ad(rec);
- ut_ad(page_rec_is_user_rec(rec));
-
- ut_pair_min(&low_match, &low_bytes,
- *ilow_matched_fields, *ilow_matched_bytes,
- *iup_matched_fields, *iup_matched_bytes);
-
- up_match = low_match;
- up_bytes = low_bytes;
-
- if (page_cmp_dtuple_rec_with_match(tuple, rec, offsets,
- &low_match, &low_bytes) < 0) {
- goto exit_func;
- }
-
- next_rec = page_rec_get_next(rec);
- offsets = rec_get_offsets(next_rec, index, offsets,
- dtuple_get_n_fields(tuple), &heap);
-
- if (page_cmp_dtuple_rec_with_match(tuple, next_rec, offsets,
- &up_match, &up_bytes) >= 0) {
- goto exit_func;
- }
-
- cursor->rec = rec;
-
-#ifdef UNIV_SEARCH_DEBUG
- page_cur_search_with_match(page, index, tuple, PAGE_CUR_DBG,
- iup_matched_fields,
- iup_matched_bytes,
- ilow_matched_fields,
- ilow_matched_bytes,
- &cursor2);
- ut_a(cursor2.rec == cursor->rec);
-
- if (next_rec != page_get_supremum_rec(page)) {
-
- ut_a(*iup_matched_fields == up_match);
- ut_a(*iup_matched_bytes == up_bytes);
- }
-
- ut_a(*ilow_matched_fields == low_match);
- ut_a(*ilow_matched_bytes == low_bytes);
-#endif
- if (!page_rec_is_supremum(next_rec)) {
-
- *iup_matched_fields = up_match;
- *iup_matched_bytes = up_bytes;
- }
-
- *ilow_matched_fields = low_match;
- *ilow_matched_bytes = low_bytes;
-
-#ifdef UNIV_SEARCH_PERF_STAT
- page_cur_short_succ++;
-#endif
- success = TRUE;
-exit_func:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(success);
-}
-
-#endif
-
-#ifdef PAGE_CUR_LE_OR_EXTENDS
-/********************************************************************
-Checks if the nth field in a record is a character type field which extends
-the nth field in tuple, i.e., the field is longer or equal in length and has
-common first characters. */
-static
-ibool
-page_cur_rec_field_extends(
-/*=======================*/
- /* out: TRUE if rec field
- extends tuple field */
- dtuple_t* tuple, /* in: data tuple */
- rec_t* rec, /* in: record */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint n) /* in: compare nth field */
-{
- dtype_t* type;
- dfield_t* dfield;
- byte* rec_f;
- ulint rec_f_len;
-
- ut_ad(rec_offs_validate(rec, NULL, offsets));
- dfield = dtuple_get_nth_field(tuple, n);
-
- type = dfield_get_type(dfield);
-
- rec_f = rec_get_nth_field(rec, offsets, n, &rec_f_len);
-
- if (type->mtype == DATA_VARCHAR
- || type->mtype == DATA_CHAR
- || type->mtype == DATA_FIXBINARY
- || type->mtype == DATA_BINARY
- || type->mtype == DATA_BLOB
- || type->mtype == DATA_VARMYSQL
- || type->mtype == DATA_MYSQL) {
-
- if (dfield_get_len(dfield) != UNIV_SQL_NULL
- && rec_f_len != UNIV_SQL_NULL
- && rec_f_len >= dfield_get_len(dfield)
- && !cmp_data_data_slow(type,
- dfield_get_data(dfield),
- dfield_get_len(dfield),
- rec_f, dfield_get_len(dfield))) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-#endif /* PAGE_CUR_LE_OR_EXTENDS */
-
-/********************************************************************
-Searches the right position for a page cursor. */
-
-void
-page_cur_search_with_match(
-/*=======================*/
- page_t* page, /* in: index page */
- dict_index_t* index, /* in: record descriptor */
- dtuple_t* tuple, /* in: data tuple */
- ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
- or PAGE_CUR_GE */
- ulint* iup_matched_fields,
- /* in/out: already matched fields in upper
- limit record */
- ulint* iup_matched_bytes,
- /* in/out: already matched bytes in a field
- not yet completely matched */
- ulint* ilow_matched_fields,
- /* in/out: already matched fields in lower
- limit record */
- ulint* ilow_matched_bytes,
- /* in/out: already matched bytes in a field
- not yet completely matched */
- page_cur_t* cursor) /* out: page cursor */
-{
- ulint up;
- ulint low;
- ulint mid;
- page_dir_slot_t* slot;
- rec_t* up_rec;
- rec_t* low_rec;
- rec_t* mid_rec;
- ulint up_matched_fields;
- ulint up_matched_bytes;
- ulint low_matched_fields;
- ulint low_matched_bytes;
- ulint cur_matched_fields;
- ulint cur_matched_bytes;
- int cmp;
-#ifdef UNIV_SEARCH_DEBUG
- int dbg_cmp;
- ulint dbg_matched_fields;
- ulint dbg_matched_bytes;
-#endif
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- ut_ad(page && tuple && iup_matched_fields && iup_matched_bytes
- && ilow_matched_fields && ilow_matched_bytes && cursor);
- ut_ad(dtuple_validate(tuple));
- ut_ad(dtuple_check_typed(tuple));
-#ifdef UNIV_DEBUG
-# ifdef PAGE_CUR_DBG
- if (mode != PAGE_CUR_DBG)
-# endif /* PAGE_CUR_DBG */
-# ifdef PAGE_CUR_LE_OR_EXTENDS
- if (mode != PAGE_CUR_LE_OR_EXTENDS)
-# endif /* PAGE_CUR_LE_OR_EXTENDS */
- ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE
- || mode == PAGE_CUR_G || mode == PAGE_CUR_GE);
-#endif /* UNIV_DEBUG */
-
- page_check_dir(page);
-
-#ifdef PAGE_CUR_ADAPT
- if ((page_header_get_field(page, PAGE_LEVEL) == 0)
- && (mode == PAGE_CUR_LE)
- && (page_header_get_field(page, PAGE_N_DIRECTION) > 3)
- && (page_header_get_ptr(page, PAGE_LAST_INSERT))
- && (page_header_get_field(page, PAGE_DIRECTION) == PAGE_RIGHT)) {
-
- if (page_cur_try_search_shortcut(
- page, index, tuple,
- iup_matched_fields, iup_matched_bytes,
- ilow_matched_fields, ilow_matched_bytes,
- cursor)) {
- return;
- }
- }
-# ifdef PAGE_CUR_DBG
- if (mode == PAGE_CUR_DBG) {
- mode = PAGE_CUR_LE;
- }
-# endif
-#endif
-
- /* The following flag does not work for non-latin1 char sets because
- cmp_full_field does not tell how many bytes matched */
-#ifdef PAGE_CUR_LE_OR_EXTENDS
- ut_a(mode != PAGE_CUR_LE_OR_EXTENDS);
-#endif /* PAGE_CUR_LE_OR_EXTENDS */
-
- /* If mode PAGE_CUR_G is specified, we are trying to position the
- cursor to answer a query of the form "tuple < X", where tuple is
- the input parameter, and X denotes an arbitrary physical record on
- the page. We want to position the cursor on the first X which
- satisfies the condition. */
-
- up_matched_fields = *iup_matched_fields;
- up_matched_bytes = *iup_matched_bytes;
- low_matched_fields = *ilow_matched_fields;
- low_matched_bytes = *ilow_matched_bytes;
-
- /* Perform binary search. First the search is done through the page
- directory, after that as a linear search in the list of records
- owned by the upper limit directory slot. */
-
- low = 0;
- up = page_dir_get_n_slots(page) - 1;
-
- /* Perform binary search until the lower and upper limit directory
- slots come to the distance 1 of each other */
-
- while (up - low > 1) {
- mid = (low + up) / 2;
- slot = page_dir_get_nth_slot(page, mid);
- mid_rec = page_dir_slot_get_rec(slot);
-
- ut_pair_min(&cur_matched_fields, &cur_matched_bytes,
- low_matched_fields, low_matched_bytes,
- up_matched_fields, up_matched_bytes);
-
- offsets = rec_get_offsets(mid_rec, index, offsets,
- dtuple_get_n_fields_cmp(tuple),
- &heap);
-
- cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, offsets,
- &cur_matched_fields,
- &cur_matched_bytes);
- if (UNIV_LIKELY(cmp > 0)) {
-low_slot_match:
- low = mid;
- low_matched_fields = cur_matched_fields;
- low_matched_bytes = cur_matched_bytes;
-
- } else if (UNIV_EXPECT(cmp, -1)) {
-#ifdef PAGE_CUR_LE_OR_EXTENDS
- if (mode == PAGE_CUR_LE_OR_EXTENDS
- && page_cur_rec_field_extends(
- tuple, mid_rec, offsets,
- cur_matched_fields)) {
-
- goto low_slot_match;
- }
-#endif /* PAGE_CUR_LE_OR_EXTENDS */
-up_slot_match:
- up = mid;
- up_matched_fields = cur_matched_fields;
- up_matched_bytes = cur_matched_bytes;
-
- } else if (mode == PAGE_CUR_G || mode == PAGE_CUR_LE
-#ifdef PAGE_CUR_LE_OR_EXTENDS
- || mode == PAGE_CUR_LE_OR_EXTENDS
-#endif /* PAGE_CUR_LE_OR_EXTENDS */
- ) {
-
- goto low_slot_match;
- } else {
-
- goto up_slot_match;
- }
- }
-
- slot = page_dir_get_nth_slot(page, low);
- low_rec = page_dir_slot_get_rec(slot);
- slot = page_dir_get_nth_slot(page, up);
- up_rec = page_dir_slot_get_rec(slot);
-
- /* Perform linear search until the upper and lower records come to
- distance 1 of each other. */
-
- while (page_rec_get_next(low_rec) != up_rec) {
-
- mid_rec = page_rec_get_next(low_rec);
-
- ut_pair_min(&cur_matched_fields, &cur_matched_bytes,
- low_matched_fields, low_matched_bytes,
- up_matched_fields, up_matched_bytes);
-
- offsets = rec_get_offsets(mid_rec, index, offsets,
- dtuple_get_n_fields_cmp(tuple),
- &heap);
-
- cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, offsets,
- &cur_matched_fields,
- &cur_matched_bytes);
- if (UNIV_LIKELY(cmp > 0)) {
-low_rec_match:
- low_rec = mid_rec;
- low_matched_fields = cur_matched_fields;
- low_matched_bytes = cur_matched_bytes;
-
- } else if (UNIV_EXPECT(cmp, -1)) {
-#ifdef PAGE_CUR_LE_OR_EXTENDS
- if (mode == PAGE_CUR_LE_OR_EXTENDS
- && page_cur_rec_field_extends(
- tuple, mid_rec, offsets,
- cur_matched_fields)) {
-
- goto low_rec_match;
- }
-#endif /* PAGE_CUR_LE_OR_EXTENDS */
-up_rec_match:
- up_rec = mid_rec;
- up_matched_fields = cur_matched_fields;
- up_matched_bytes = cur_matched_bytes;
- } else if (mode == PAGE_CUR_G || mode == PAGE_CUR_LE
-#ifdef PAGE_CUR_LE_OR_EXTENDS
- || mode == PAGE_CUR_LE_OR_EXTENDS
-#endif /* PAGE_CUR_LE_OR_EXTENDS */
- ) {
-
- goto low_rec_match;
- } else {
-
- goto up_rec_match;
- }
- }
-
-#ifdef UNIV_SEARCH_DEBUG
-
- /* Check that the lower and upper limit records have the
- right alphabetical order compared to tuple. */
- dbg_matched_fields = 0;
- dbg_matched_bytes = 0;
-
- offsets = rec_get_offsets(low_rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- dbg_cmp = page_cmp_dtuple_rec_with_match(tuple, low_rec, offsets,
- &dbg_matched_fields,
- &dbg_matched_bytes);
- if (mode == PAGE_CUR_G) {
- ut_a(dbg_cmp >= 0);
- } else if (mode == PAGE_CUR_GE) {
- ut_a(dbg_cmp == 1);
- } else if (mode == PAGE_CUR_L) {
- ut_a(dbg_cmp == 1);
- } else if (mode == PAGE_CUR_LE) {
- ut_a(dbg_cmp >= 0);
- }
-
- if (low_rec != page_get_infimum_rec(page)) {
-
- ut_a(low_matched_fields == dbg_matched_fields);
- ut_a(low_matched_bytes == dbg_matched_bytes);
- }
-
- dbg_matched_fields = 0;
- dbg_matched_bytes = 0;
-
- offsets = rec_get_offsets(up_rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- dbg_cmp = page_cmp_dtuple_rec_with_match(tuple, up_rec, offsets,
- &dbg_matched_fields,
- &dbg_matched_bytes);
- if (mode == PAGE_CUR_G) {
- ut_a(dbg_cmp == -1);
- } else if (mode == PAGE_CUR_GE) {
- ut_a(dbg_cmp <= 0);
- } else if (mode == PAGE_CUR_L) {
- ut_a(dbg_cmp <= 0);
- } else if (mode == PAGE_CUR_LE) {
- ut_a(dbg_cmp == -1);
- }
-
- if (up_rec != page_get_supremum_rec(page)) {
-
- ut_a(up_matched_fields == dbg_matched_fields);
- ut_a(up_matched_bytes == dbg_matched_bytes);
- }
-#endif
- if (mode <= PAGE_CUR_GE) {
- cursor->rec = up_rec;
- } else {
- cursor->rec = low_rec;
- }
-
- *iup_matched_fields = up_matched_fields;
- *iup_matched_bytes = up_matched_bytes;
- *ilow_matched_fields = low_matched_fields;
- *ilow_matched_bytes = low_matched_bytes;
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-}
-
-/***************************************************************
-Positions a page cursor on a randomly chosen user record on a page. If there
-are no user records, sets the cursor on the infimum record. */
-
-void
-page_cur_open_on_rnd_user_rec(
-/*==========================*/
- page_t* page, /* in: page */
- page_cur_t* cursor) /* in/out: page cursor */
-{
- ulint rnd;
- rec_t* rec;
-
- if (page_get_n_recs(page) == 0) {
- page_cur_position(page_get_infimum_rec(page), cursor);
-
- return;
- }
-
- if (srv_use_legacy_cardinality_algorithm) {
- page_rnd += 87584577;
-
- rnd = page_rnd % page_get_n_recs(page);
- } else {
- rnd = (ulint) page_cur_lcg_prng() % page_get_n_recs(page);
- }
-
- rec = page_get_infimum_rec(page);
-
- rec = page_rec_get_next(rec);
-
- while (rnd > 0) {
- rec = page_rec_get_next(rec);
-
- rnd--;
- }
-
- page_cur_position(rec, cursor);
-}
-
-/***************************************************************
-Writes the log record of a record insert on a page. */
-static
-void
-page_cur_insert_rec_write_log(
-/*==========================*/
- rec_t* insert_rec, /* in: inserted physical record */
- ulint rec_size, /* in: insert_rec size */
- rec_t* cursor_rec, /* in: record the
- cursor is pointing to */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- ulint cur_rec_size;
- ulint extra_size;
- ulint cur_extra_size;
- ulint min_rec_size;
- byte* ins_ptr;
- byte* cur_ptr;
- ulint extra_info_yes;
- byte* log_ptr;
- byte* log_end;
- ulint i;
- ulint comp;
-
- ut_a(rec_size < UNIV_PAGE_SIZE);
- ut_ad(buf_frame_align(insert_rec) == buf_frame_align(cursor_rec));
- ut_ad(!page_rec_is_comp(insert_rec)
- == !dict_table_is_comp(index->table));
- comp = page_rec_is_comp(insert_rec);
-
- {
- mem_heap_t* heap = NULL;
- ulint cur_offs_[REC_OFFS_NORMAL_SIZE];
- ulint ins_offs_[REC_OFFS_NORMAL_SIZE];
-
- ulint* cur_offs;
- ulint* ins_offs;
-
- *cur_offs_ = (sizeof cur_offs_) / sizeof *cur_offs_;
- *ins_offs_ = (sizeof ins_offs_) / sizeof *ins_offs_;
-
- cur_offs = rec_get_offsets(cursor_rec, index, cur_offs_,
- ULINT_UNDEFINED, &heap);
- ins_offs = rec_get_offsets(insert_rec, index, ins_offs_,
- ULINT_UNDEFINED, &heap);
-
- extra_size = rec_offs_extra_size(ins_offs);
- cur_extra_size = rec_offs_extra_size(cur_offs);
- ut_ad(rec_size == rec_offs_size(ins_offs));
- cur_rec_size = rec_offs_size(cur_offs);
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- }
-
- ins_ptr = insert_rec - extra_size;
-
- i = 0;
-
- if (cur_extra_size == extra_size) {
- min_rec_size = ut_min(cur_rec_size, rec_size);
-
- cur_ptr = cursor_rec - cur_extra_size;
-
- /* Find out the first byte in insert_rec which differs from
- cursor_rec; skip the bytes in the record info */
-
- for (;;) {
- if (i >= min_rec_size) {
-
- break;
- } else if (*ins_ptr == *cur_ptr) {
- i++;
- ins_ptr++;
- cur_ptr++;
- } else if ((i < extra_size)
- && (i >= extra_size
- - (comp
- ? REC_N_NEW_EXTRA_BYTES
- : REC_N_OLD_EXTRA_BYTES))) {
- i = extra_size;
- ins_ptr = insert_rec;
- cur_ptr = cursor_rec;
- } else {
- break;
- }
- }
- }
-
- if (mtr_get_log_mode(mtr) != MTR_LOG_SHORT_INSERTS) {
-
- log_ptr = mlog_open_and_write_index(mtr, insert_rec, index,
- comp
- ? MLOG_COMP_REC_INSERT
- : MLOG_REC_INSERT,
- 2 + 5 + 1 + 5 + 5
- + MLOG_BUF_MARGIN);
-
- if (!log_ptr) {
- /* Logging in mtr is switched off during crash
- recovery: in that case mlog_open returns NULL */
- return;
- }
-
- log_end = &log_ptr[2 + 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN];
- /* Write the cursor rec offset as a 2-byte ulint */
- mach_write_to_2(log_ptr, cursor_rec
- - buf_frame_align(cursor_rec));
- log_ptr += 2;
- } else {
- log_ptr = mlog_open(mtr, 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN);
- if (!log_ptr) {
- /* Logging in mtr is switched off during crash
- recovery: in that case mlog_open returns NULL */
- return;
- }
- log_end = &log_ptr[5 + 1 + 5 + 5 + MLOG_BUF_MARGIN];
- }
-
- if ((rec_get_info_and_status_bits(insert_rec, comp)
- != rec_get_info_and_status_bits(cursor_rec, comp))
- || (extra_size != cur_extra_size)
- || (rec_size != cur_rec_size)) {
-
- extra_info_yes = 1;
- } else {
- extra_info_yes = 0;
- }
-
- /* Write the record end segment length and the extra info storage
- flag */
- log_ptr += mach_write_compressed(log_ptr, 2 * (rec_size - i)
- + extra_info_yes);
- if (extra_info_yes) {
- /* Write the info bits */
- mach_write_to_1(log_ptr,
- rec_get_info_and_status_bits(insert_rec,
- comp));
- log_ptr++;
-
- /* Write the record origin offset */
- log_ptr += mach_write_compressed(log_ptr, extra_size);
-
- /* Write the mismatch index */
- log_ptr += mach_write_compressed(log_ptr, i);
-
- ut_a(i < UNIV_PAGE_SIZE);
- ut_a(extra_size < UNIV_PAGE_SIZE);
- }
-
- /* Write to the log the inserted index record end segment which
- differs from the cursor record */
-
- rec_size -= i;
-
- if (log_ptr + rec_size <= log_end) {
- memcpy(log_ptr, ins_ptr, rec_size);
- mlog_close(mtr, log_ptr + rec_size);
- } else {
- mlog_close(mtr, log_ptr);
- ut_a(rec_size < UNIV_PAGE_SIZE);
- mlog_catenate_string(mtr, ins_ptr, rec_size);
- }
-}
-
-/***************************************************************
-Parses a log record of a record insert on a page. */
-
-byte*
-page_cur_parse_insert_rec(
-/*======================*/
- /* out: end of log record or NULL */
- ibool is_short,/* in: TRUE if short inserts */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- dict_index_t* index, /* in: record descriptor */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr) /* in: mtr or NULL */
-{
- ulint extra_info_yes;
- ulint offset = 0; /* remove warning */
- ulint origin_offset;
- ulint end_seg_len;
- ulint mismatch_index;
- rec_t* cursor_rec;
- byte buf1[1024];
- byte* buf;
- byte* ptr2 = ptr;
- ulint info_and_status_bits = 0; /* remove warning */
- page_cur_t cursor;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- if (!is_short) {
- /* Read the cursor rec offset as a 2-byte ulint */
-
- if (end_ptr < ptr + 2) {
-
- return(NULL);
- }
-
- offset = mach_read_from_2(ptr);
-
- if (offset >= UNIV_PAGE_SIZE) {
-
- recv_sys->found_corrupt_log = TRUE;
-
- return(NULL);
- }
-
- ptr += 2;
- }
-
- ptr = mach_parse_compressed(ptr, end_ptr, &end_seg_len);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- extra_info_yes = end_seg_len & 0x1UL;
- end_seg_len >>= 1;
-
- if (end_seg_len >= UNIV_PAGE_SIZE) {
- recv_sys->found_corrupt_log = TRUE;
-
- return(NULL);
- }
-
- if (extra_info_yes) {
- /* Read the info bits */
-
- if (end_ptr < ptr + 1) {
-
- return(NULL);
- }
-
- info_and_status_bits = mach_read_from_1(ptr);
- ptr++;
-
- ptr = mach_parse_compressed(ptr, end_ptr, &origin_offset);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- ut_a(origin_offset < UNIV_PAGE_SIZE);
-
- ptr = mach_parse_compressed(ptr, end_ptr, &mismatch_index);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- ut_a(mismatch_index < UNIV_PAGE_SIZE);
- }
-
- if (end_ptr < ptr + end_seg_len) {
-
- return(NULL);
- }
-
- if (page == NULL) {
-
- return(ptr + end_seg_len);
- }
-
- ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
-
- /* Read from the log the inserted index record end segment which
- differs from the cursor record */
-
- if (is_short) {
- cursor_rec = page_rec_get_prev(page_get_supremum_rec(page));
- } else {
- cursor_rec = page + offset;
- }
-
- offsets = rec_get_offsets(cursor_rec, index, offsets,
- ULINT_UNDEFINED, &heap);
-
- if (extra_info_yes == 0) {
- info_and_status_bits = rec_get_info_and_status_bits(
- cursor_rec, page_is_comp(page));
- origin_offset = rec_offs_extra_size(offsets);
- mismatch_index = rec_offs_size(offsets) - end_seg_len;
- }
-
- if (mismatch_index + end_seg_len < sizeof buf1) {
- buf = buf1;
- } else {
- buf = mem_alloc(mismatch_index + end_seg_len);
- }
-
- /* Build the inserted record to buf */
-
- if (mismatch_index >= UNIV_PAGE_SIZE) {
- fprintf(stderr,
- "Is short %lu, info_and_status_bits %lu, offset %lu, "
- "o_offset %lu\n"
- "mismatch index %lu, end_seg_len %lu\n"
- "parsed len %lu\n",
- (ulong) is_short, (ulong) info_and_status_bits,
- (ulong) offset,
- (ulong) origin_offset,
- (ulong) mismatch_index, (ulong) end_seg_len,
- (ulong) (ptr - ptr2));
-
- fputs("Dump of 300 bytes of log:\n", stderr);
- ut_print_buf(stderr, ptr2, 300);
-
- buf_page_print(page);
-
- ut_error;
- }
-
- ut_memcpy(buf, rec_get_start(cursor_rec, offsets), mismatch_index);
- ut_memcpy(buf + mismatch_index, ptr, end_seg_len);
-
- rec_set_info_and_status_bits(buf + origin_offset, page_is_comp(page),
- info_and_status_bits);
-
- page_cur_position(cursor_rec, &cursor);
-
- offsets = rec_get_offsets(buf + origin_offset, index, offsets,
- ULINT_UNDEFINED, &heap);
- page_cur_rec_insert(&cursor, buf + origin_offset, index, offsets, mtr);
-
- if (buf != buf1) {
-
- mem_free(buf);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- return(ptr + end_seg_len);
-}
-
-/***************************************************************
-Inserts a record next to page cursor. Returns pointer to inserted record if
-succeed, i.e., enough space available, NULL otherwise. The record to be
-inserted can be in a data tuple or as a physical record. The other parameter
-must then be NULL. The cursor stays at the same position. */
-
-rec_t*
-page_cur_insert_rec_low(
-/*====================*/
- /* out: pointer to record if succeed, NULL
- otherwise */
- page_cur_t* cursor, /* in: a page cursor */
- dtuple_t* tuple, /* in: pointer to a data tuple or NULL */
- dict_index_t* index, /* in: record descriptor */
- rec_t* rec, /* in: pointer to a physical record or NULL */
- ulint* offsets,/* in: rec_get_offsets(rec, index) or NULL */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- byte* insert_buf = NULL;
- ulint rec_size;
- byte* page; /* the relevant page */
- rec_t* last_insert; /* cursor position at previous
- insert */
- rec_t* insert_rec; /* inserted record */
- ulint heap_no; /* heap number of the inserted
- record */
- rec_t* current_rec; /* current record after which the
- new record is inserted */
- rec_t* next_rec; /* next record after current before
- the insertion */
- ulint owner_slot; /* the slot which owns the
- inserted record */
- rec_t* owner_rec;
- ulint n_owned;
- mem_heap_t* heap = NULL;
- ulint comp;
-
- ut_ad(cursor && mtr);
- ut_ad(tuple || rec);
- ut_ad(!(tuple && rec));
- ut_ad(rec || dtuple_check_typed(tuple));
-
- page = page_cur_get_page(cursor);
- comp = page_is_comp(page);
- ut_ad(dict_table_is_comp(index->table) == !!comp);
-
- ut_ad(cursor->rec != page_get_supremum_rec(page));
-
- /* 1. Get the size of the physical record in the page */
- if (tuple != NULL) {
- rec_size = rec_get_converted_size(index, tuple);
- } else {
- if (!offsets) {
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- }
- ut_ad(rec_offs_validate(rec, index, offsets));
- rec_size = rec_offs_size(offsets);
- }
-
- /* 2. Try to find suitable space from page memory management */
- insert_buf = page_mem_alloc(page, rec_size, index, &heap_no);
-
- if (insert_buf == NULL) {
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(NULL);
- }
-
- /* 3. Create the record */
- if (tuple != NULL) {
- insert_rec = rec_convert_dtuple_to_rec(insert_buf,
- index, tuple);
- offsets = rec_get_offsets(insert_rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- } else {
- insert_rec = rec_copy(insert_buf, rec, offsets);
- ut_ad(rec_offs_validate(rec, index, offsets));
- rec_offs_make_valid(insert_rec, index, offsets);
- }
-
- ut_ad(insert_rec);
- ut_ad(rec_size == rec_offs_size(offsets));
-
- /* 4. Insert the record in the linked list of records */
- current_rec = cursor->rec;
-
- ut_ad(!comp || rec_get_status(current_rec) <= REC_STATUS_INFIMUM);
- ut_ad(!comp || rec_get_status(insert_rec) < REC_STATUS_INFIMUM);
-
- next_rec = page_rec_get_next(current_rec);
- ut_ad(!comp || rec_get_status(next_rec) != REC_STATUS_INFIMUM);
- page_rec_set_next(insert_rec, next_rec);
- page_rec_set_next(current_rec, insert_rec);
-
- page_header_set_field(page, PAGE_N_RECS, 1 + page_get_n_recs(page));
-
- /* 5. Set the n_owned field in the inserted record to zero,
- and set the heap_no field */
-
- rec_set_n_owned(insert_rec, comp, 0);
- rec_set_heap_no(insert_rec, comp, heap_no);
-
- /* 6. Update the last insertion info in page header */
-
- last_insert = page_header_get_ptr(page, PAGE_LAST_INSERT);
- ut_ad(!last_insert || !comp
- || rec_get_node_ptr_flag(last_insert)
- == rec_get_node_ptr_flag(insert_rec));
-
- if (last_insert == NULL) {
- page_header_set_field(page, PAGE_DIRECTION, PAGE_NO_DIRECTION);
- page_header_set_field(page, PAGE_N_DIRECTION, 0);
-
- } else if ((last_insert == current_rec)
- && (page_header_get_field(page, PAGE_DIRECTION)
- != PAGE_LEFT)) {
-
- page_header_set_field(page, PAGE_DIRECTION, PAGE_RIGHT);
- page_header_set_field(page, PAGE_N_DIRECTION,
- page_header_get_field(
- page, PAGE_N_DIRECTION) + 1);
-
- } else if ((page_rec_get_next(insert_rec) == last_insert)
- && (page_header_get_field(page, PAGE_DIRECTION)
- != PAGE_RIGHT)) {
-
- page_header_set_field(page, PAGE_DIRECTION, PAGE_LEFT);
- page_header_set_field(page, PAGE_N_DIRECTION,
- page_header_get_field(
- page, PAGE_N_DIRECTION) + 1);
- } else {
- page_header_set_field(page, PAGE_DIRECTION, PAGE_NO_DIRECTION);
- page_header_set_field(page, PAGE_N_DIRECTION, 0);
- }
-
- page_header_set_ptr(page, PAGE_LAST_INSERT, insert_rec);
-
- /* 7. It remains to update the owner record. */
-
- owner_rec = page_rec_find_owner_rec(insert_rec);
- n_owned = rec_get_n_owned(owner_rec, comp);
- rec_set_n_owned(owner_rec, comp, n_owned + 1);
-
- /* 8. Now we have incremented the n_owned field of the owner
- record. If the number exceeds PAGE_DIR_SLOT_MAX_N_OWNED,
- we have to split the corresponding directory slot in two. */
-
- if (n_owned == PAGE_DIR_SLOT_MAX_N_OWNED) {
- owner_slot = page_dir_find_owner_slot(owner_rec);
- page_dir_split_slot(page, owner_slot);
- }
-
- /* 9. Write log record of the insert */
- page_cur_insert_rec_write_log(insert_rec, rec_size, current_rec,
- index, mtr);
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(insert_rec);
-}
-
-/**************************************************************
-Writes a log record of copying a record list end to a new created page. */
-UNIV_INLINE
-byte*
-page_copy_rec_list_to_created_page_write_log(
-/*=========================================*/
- /* out: 4-byte field where to
- write the log data length */
- page_t* page, /* in: index page */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr) /* in: mtr */
-{
- byte* log_ptr;
-
- ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
-
- log_ptr = mlog_open_and_write_index(mtr, page, index,
- page_is_comp(page)
- ? MLOG_COMP_LIST_END_COPY_CREATED
- : MLOG_LIST_END_COPY_CREATED, 4);
- ut_a(log_ptr);
- mlog_close(mtr, log_ptr + 4);
-
- return(log_ptr);
-}
-
-/**************************************************************
-Parses a log record of copying a record list end to a new created page. */
-
-byte*
-page_parse_copy_rec_list_to_created_page(
-/*=====================================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- dict_index_t* index, /* in: record descriptor */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr) /* in: mtr or NULL */
-{
- byte* rec_end;
- ulint log_data_len;
-
- if (ptr + 4 > end_ptr) {
-
- return(NULL);
- }
-
- log_data_len = mach_read_from_4(ptr);
- ptr += 4;
-
- rec_end = ptr + log_data_len;
-
- if (rec_end > end_ptr) {
-
- return(NULL);
- }
-
- if (!page) {
-
- return(rec_end);
- }
-
- while (ptr < rec_end) {
- ptr = page_cur_parse_insert_rec(TRUE, ptr, end_ptr,
- index, page, mtr);
- }
-
- ut_a(ptr == rec_end);
-
- page_header_set_ptr(page, PAGE_LAST_INSERT, NULL);
- page_header_set_field(page, PAGE_DIRECTION, PAGE_NO_DIRECTION);
- page_header_set_field(page, PAGE_N_DIRECTION, 0);
-
- return(rec_end);
-}
-
-/*****************************************************************
-Copies records from page to a newly created page, from a given record onward,
-including that record. Infimum and supremum records are not copied. */
-
-void
-page_copy_rec_list_end_to_created_page(
-/*===================================*/
- page_t* new_page, /* in: index page to copy to */
- page_t* page, /* in: index page */
- rec_t* rec, /* in: first record to copy */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr) /* in: mtr */
-{
- page_dir_slot_t* slot = 0; /* remove warning */
- byte* heap_top;
- rec_t* insert_rec = 0; /* remove warning */
- rec_t* prev_rec;
- ulint count;
- ulint n_recs;
- ulint slot_index;
- ulint rec_size;
- ulint log_mode;
- byte* log_ptr;
- ulint log_data_len;
- ulint comp = page_is_comp(page);
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- ut_ad(page_dir_get_n_heap(new_page) == 2);
- ut_ad(page != new_page);
- ut_ad(comp == page_is_comp(new_page));
-
- if (rec == page_get_infimum_rec(page)) {
-
- rec = page_rec_get_next(rec);
- }
-
- if (rec == page_get_supremum_rec(page)) {
-
- return;
- }
-
-#ifdef UNIV_DEBUG
- /* To pass the debug tests we have to set these dummy values
- in the debug version */
- page_dir_set_n_slots(new_page, UNIV_PAGE_SIZE / 2);
- page_header_set_ptr(new_page, PAGE_HEAP_TOP,
- new_page + UNIV_PAGE_SIZE - 1);
-#endif
-
- log_ptr = page_copy_rec_list_to_created_page_write_log(new_page,
- index, mtr);
-
- log_data_len = dyn_array_get_data_size(&(mtr->log));
-
- /* Individual inserts are logged in a shorter form */
-
- log_mode = mtr_set_log_mode(mtr, MTR_LOG_SHORT_INSERTS);
-
- prev_rec = page_get_infimum_rec(new_page);
- if (comp) {
- heap_top = new_page + PAGE_NEW_SUPREMUM_END;
- } else {
- heap_top = new_page + PAGE_OLD_SUPREMUM_END;
- }
- count = 0;
- slot_index = 0;
- n_recs = 0;
-
- /* should be do ... until, comment by Jani */
- while (rec != page_get_supremum_rec(page)) {
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- insert_rec = rec_copy(heap_top, rec, offsets);
-
- rec_set_next_offs(prev_rec, comp, insert_rec - new_page);
-
- rec_set_n_owned(insert_rec, comp, 0);
- rec_set_heap_no(insert_rec, comp, 2 + n_recs);
-
- rec_size = rec_offs_size(offsets);
-
- heap_top = heap_top + rec_size;
-
- ut_ad(heap_top < new_page + UNIV_PAGE_SIZE);
-
- count++;
- n_recs++;
-
- if (count == (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2) {
-
- slot_index++;
-
- slot = page_dir_get_nth_slot(new_page, slot_index);
-
- page_dir_slot_set_rec(slot, insert_rec);
- page_dir_slot_set_n_owned(slot, count);
-
- count = 0;
- }
-
- page_cur_insert_rec_write_log(insert_rec, rec_size, prev_rec,
- index, mtr);
- prev_rec = insert_rec;
- rec = page_rec_get_next(rec);
- }
-
- if ((slot_index > 0) && (count + 1
- + (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2
- <= PAGE_DIR_SLOT_MAX_N_OWNED)) {
- /* We can merge the two last dir slots. This operation is
- here to make this function imitate exactly the equivalent
- task made using page_cur_insert_rec, which we use in database
- recovery to reproduce the task performed by this function.
- To be able to check the correctness of recovery, it is good
- that it imitates exactly. */
-
- count += (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2;
-
- page_dir_slot_set_n_owned(slot, 0);
-
- slot_index--;
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- log_data_len = dyn_array_get_data_size(&(mtr->log)) - log_data_len;
-
- ut_a(log_data_len < 100 * UNIV_PAGE_SIZE);
-
- mach_write_to_4(log_ptr, log_data_len);
-
- rec_set_next_offs(insert_rec, comp,
- comp ? PAGE_NEW_SUPREMUM : PAGE_OLD_SUPREMUM);
-
- slot = page_dir_get_nth_slot(new_page, 1 + slot_index);
-
- page_dir_slot_set_rec(slot, page_get_supremum_rec(new_page));
- page_dir_slot_set_n_owned(slot, count + 1);
-
- page_dir_set_n_slots(new_page, 2 + slot_index);
- page_header_set_ptr(new_page, PAGE_HEAP_TOP, heap_top);
- page_dir_set_n_heap(new_page, 2 + n_recs);
- page_header_set_field(new_page, PAGE_N_RECS, n_recs);
-
- page_header_set_ptr(new_page, PAGE_LAST_INSERT, NULL);
- page_header_set_field(new_page, PAGE_DIRECTION, PAGE_NO_DIRECTION);
- page_header_set_field(new_page, PAGE_N_DIRECTION, 0);
-
- /* Restore the log mode */
-
- mtr_set_log_mode(mtr, log_mode);
-}
-
-/***************************************************************
-Writes log record of a record delete on a page. */
-UNIV_INLINE
-void
-page_cur_delete_rec_write_log(
-/*==========================*/
- rec_t* rec, /* in: record to be deleted */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- byte* log_ptr;
-
- ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
-
- log_ptr = mlog_open_and_write_index(mtr, rec, index,
- page_rec_is_comp(rec)
- ? MLOG_COMP_REC_DELETE
- : MLOG_REC_DELETE, 2);
-
- if (!log_ptr) {
- /* Logging in mtr is switched off during crash recovery:
- in that case mlog_open returns NULL */
- return;
- }
-
- /* Write the cursor rec offset as a 2-byte ulint */
- mach_write_to_2(log_ptr, page_offset(rec));
-
- mlog_close(mtr, log_ptr + 2);
-}
-
-/***************************************************************
-Parses log record of a record delete on a page. */
-
-byte*
-page_cur_parse_delete_rec(
-/*======================*/
- /* out: pointer to record end or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- dict_index_t* index, /* in: record descriptor */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr) /* in: mtr or NULL */
-{
- ulint offset;
- page_cur_t cursor;
-
- if (end_ptr < ptr + 2) {
-
- return(NULL);
- }
-
- /* Read the cursor rec offset as a 2-byte ulint */
- offset = mach_read_from_2(ptr);
- ptr += 2;
-
- ut_a(offset <= UNIV_PAGE_SIZE);
-
- if (page) {
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- rec_t* rec = page + offset;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- page_cur_position(rec, &cursor);
-
- page_cur_delete_rec(&cursor, index,
- rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap),
- mtr);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- }
-
- return(ptr);
-}
-
-/***************************************************************
-Deletes a record at the page cursor. The cursor is moved to the next
-record after the deleted one. */
-
-void
-page_cur_delete_rec(
-/*================*/
- page_cur_t* cursor, /* in: a page cursor */
- dict_index_t* index, /* in: record descriptor */
- const ulint* offsets,/* in: rec_get_offsets(cursor->rec, index) */
- mtr_t* mtr) /* in: mini-transaction handle */
-{
- page_dir_slot_t* cur_dir_slot;
- page_dir_slot_t* prev_slot;
- page_t* page;
- rec_t* current_rec;
- rec_t* prev_rec = NULL;
- rec_t* next_rec;
- ulint cur_slot_no;
- ulint cur_n_owned;
- rec_t* rec;
-
- ut_ad(cursor && mtr);
-
- page = page_cur_get_page(cursor);
- current_rec = cursor->rec;
- ut_ad(rec_offs_validate(current_rec, index, offsets));
- ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
-
- /* The record must not be the supremum or infimum record. */
- ut_ad(current_rec != page_get_supremum_rec(page));
- ut_ad(current_rec != page_get_infimum_rec(page));
-
- /* Save to local variables some data associated with current_rec */
- cur_slot_no = page_dir_find_owner_slot(current_rec);
- cur_dir_slot = page_dir_get_nth_slot(page, cur_slot_no);
- cur_n_owned = page_dir_slot_get_n_owned(cur_dir_slot);
-
- /* 0. Write the log record */
- page_cur_delete_rec_write_log(current_rec, index, mtr);
-
- /* 1. Reset the last insert info in the page header and increment
- the modify clock for the frame */
-
- page_header_set_ptr(page, PAGE_LAST_INSERT, NULL);
-
- /* The page gets invalid for optimistic searches: increment the
- frame modify clock */
-
- buf_frame_modify_clock_inc(page);
-
- /* 2. Find the next and the previous record. Note that the cursor is
- left at the next record. */
-
- ut_ad(cur_slot_no > 0);
- prev_slot = page_dir_get_nth_slot(page, cur_slot_no - 1);
-
- rec = page_dir_slot_get_rec(prev_slot);
-
- /* rec now points to the record of the previous directory slot. Look
- for the immediate predecessor of current_rec in a loop. */
-
- while(current_rec != rec) {
- prev_rec = rec;
- rec = page_rec_get_next(rec);
- }
-
- page_cur_move_to_next(cursor);
- next_rec = cursor->rec;
-
- /* 3. Remove the record from the linked list of records */
-
- page_rec_set_next(prev_rec, next_rec);
- page_header_set_field(page, PAGE_N_RECS,
- (ulint)(page_get_n_recs(page) - 1));
-
- /* 4. If the deleted record is pointed to by a dir slot, update the
- record pointer in slot. In the following if-clause we assume that
- prev_rec is owned by the same slot, i.e., PAGE_DIR_SLOT_MIN_N_OWNED
- >= 2. */
-
-#if PAGE_DIR_SLOT_MIN_N_OWNED < 2
-# error "PAGE_DIR_SLOT_MIN_N_OWNED < 2"
-#endif
- ut_ad(cur_n_owned > 1);
-
- if (current_rec == page_dir_slot_get_rec(cur_dir_slot)) {
- page_dir_slot_set_rec(cur_dir_slot, prev_rec);
- }
-
- /* 5. Update the number of owned records of the slot */
-
- page_dir_slot_set_n_owned(cur_dir_slot, cur_n_owned - 1);
-
- /* 6. Free the memory occupied by the record */
- page_mem_free(page, current_rec, offsets);
-
- /* 7. Now we have decremented the number of owned records of the slot.
- If the number drops below PAGE_DIR_SLOT_MIN_N_OWNED, we balance the
- slots. */
-
- if (cur_n_owned <= PAGE_DIR_SLOT_MIN_N_OWNED) {
- page_dir_balance_slot(page, cur_slot_no);
- }
-}
-
-#ifdef UNIV_COMPILE_TEST_FUNCS
-
-/***********************************************************************
-Print the first n numbers, generated by page_cur_lcg_prng() to make sure
-(visually) that it works properly. */
-void
-test_page_cur_lcg_prng(
-/*===================*/
- int n) /* in: print first n numbers */
-{
- int i;
- unsigned long long rnd;
-
- for (i = 0; i < n; i++) {
- rnd = page_cur_lcg_prng();
- printf("%llu\t%%2=%llu %%3=%llu %%5=%llu %%7=%llu %%11=%llu\n",
- rnd,
- rnd % 2,
- rnd % 3,
- rnd % 5,
- rnd % 7,
- rnd % 11);
- }
-}
-
-#endif /* UNIV_COMPILE_TEST_FUNCS */
diff --git a/storage/innobase/page/page0page.c b/storage/innobase/page/page0page.c
deleted file mode 100644
index 543cf9e34eb..00000000000
--- a/storage/innobase/page/page0page.c
+++ /dev/null
@@ -1,2038 +0,0 @@
-/******************************************************
-Index page routines
-
-(c) 1994-1996 Innobase Oy
-
-Created 2/2/1994 Heikki Tuuri
-*******************************************************/
-
-#define THIS_MODULE
-#include "page0page.h"
-#ifdef UNIV_NONINL
-#include "page0page.ic"
-#endif
-#undef THIS_MODULE
-
-#include "page0cur.h"
-#include "lock0lock.h"
-#include "fut0lst.h"
-#include "btr0sea.h"
-#include "buf0buf.h"
-#include "srv0srv.h"
-#include "btr0btr.h"
-
-/* THE INDEX PAGE
- ==============
-
-The index page consists of a page header which contains the page's
-id and other information. On top of it are the the index records
-in a heap linked into a one way linear list according to alphabetic order.
-
-Just below page end is an array of pointers which we call page directory,
-to about every sixth record in the list. The pointers are placed in
-the directory in the alphabetical order of the records pointed to,
-enabling us to make binary search using the array. Each slot n:o I
-in the directory points to a record, where a 4-bit field contains a count
-of those records which are in the linear list between pointer I and
-the pointer I - 1 in the directory, including the record
-pointed to by pointer I and not including the record pointed to by I - 1.
-We say that the record pointed to by slot I, or that slot I, owns
-these records. The count is always kept in the range 4 to 8, with
-the exception that it is 1 for the first slot, and 1--8 for the second slot.
-
-An essentially binary search can be performed in the list of index
-records, like we could do if we had pointer to every record in the
-page directory. The data structure is, however, more efficient when
-we are doing inserts, because most inserts are just pushed on a heap.
-Only every 8th insert requires block move in the directory pointer
-table, which itself is quite small. A record is deleted from the page
-by just taking it off the linear list and updating the number of owned
-records-field of the record which owns it, and updating the page directory,
-if necessary. A special case is the one when the record owns itself.
-Because the overhead of inserts is so small, we may also increase the
-page size from the projected default of 8 kB to 64 kB without too
-much loss of efficiency in inserts. Bigger page becomes actual
-when the disk transfer rate compared to seek and latency time rises.
-On the present system, the page size is set so that the page transfer
-time (3 ms) is 20 % of the disk random access time (15 ms).
-
-When the page is split, merged, or becomes full but contains deleted
-records, we have to reorganize the page.
-
-Assuming a page size of 8 kB, a typical index page of a secondary
-index contains 300 index entries, and the size of the page directory
-is 50 x 4 bytes = 200 bytes. */
-
-/*******************************************************************
-Looks for the directory slot which owns the given record. */
-
-ulint
-page_dir_find_owner_slot(
-/*=====================*/
- /* out: the directory slot number */
- rec_t* rec) /* in: the physical record */
-{
- page_t* page;
- register uint16 rec_offs_bytes;
- register page_dir_slot_t* slot;
- register const page_dir_slot_t* first_slot;
- register rec_t* r = rec;
-
- ut_ad(page_rec_check(rec));
-
- page = buf_frame_align(rec);
- first_slot = page_dir_get_nth_slot(page, 0);
- slot = page_dir_get_nth_slot(page, page_dir_get_n_slots(page) - 1);
-
- if (page_is_comp(page)) {
- while (rec_get_n_owned(r, TRUE) == 0) {
- r = page + rec_get_next_offs(r, TRUE);
- ut_ad(r >= page + PAGE_NEW_SUPREMUM);
- ut_ad(r < page + (UNIV_PAGE_SIZE - PAGE_DIR));
- }
- } else {
- while (rec_get_n_owned(r, FALSE) == 0) {
- r = page + rec_get_next_offs(r, FALSE);
- ut_ad(r >= page + PAGE_OLD_SUPREMUM);
- ut_ad(r < page + (UNIV_PAGE_SIZE - PAGE_DIR));
- }
- }
-
- rec_offs_bytes = mach_encode_2(r - page);
-
- while (UNIV_LIKELY(*(uint16*) slot != rec_offs_bytes)) {
-
- if (UNIV_UNLIKELY(slot == first_slot)) {
- fprintf(stderr,
- "InnoDB: Probable data corruption on"
- " page %lu\n"
- "InnoDB: Original record ",
- (ulong) buf_frame_get_page_no(page));
-
- if (page_is_comp(page)) {
- fputs("(compact record)", stderr);
- } else {
- rec_print_old(stderr, rec);
- }
-
- fputs("\n"
- "InnoDB: on that page.\n"
- "InnoDB: Cannot find the dir slot for record ",
- stderr);
- if (page_is_comp(page)) {
- fputs("(compact record)", stderr);
- } else {
- rec_print_old(stderr, page
- + mach_decode_2(rec_offs_bytes));
- }
- fputs("\n"
- "InnoDB: on that page!\n", stderr);
-
- buf_page_print(page);
-
- ut_error;
- }
-
- slot += PAGE_DIR_SLOT_SIZE;
- }
-
- return(((ulint) (first_slot - slot)) / PAGE_DIR_SLOT_SIZE);
-}
-
-/******************************************************************
-Used to check the consistency of a directory slot. */
-static
-ibool
-page_dir_slot_check(
-/*================*/
- /* out: TRUE if succeed */
- page_dir_slot_t* slot) /* in: slot */
-{
- page_t* page;
- ulint n_slots;
- ulint n_owned;
-
- ut_a(slot);
-
- page = buf_frame_align(slot);
-
- n_slots = page_dir_get_n_slots(page);
-
- ut_a(slot <= page_dir_get_nth_slot(page, 0));
- ut_a(slot >= page_dir_get_nth_slot(page, n_slots - 1));
-
- ut_a(page_rec_check(page_dir_slot_get_rec(slot)));
-
- n_owned = rec_get_n_owned(page_dir_slot_get_rec(slot),
- page_is_comp(page));
-
- if (slot == page_dir_get_nth_slot(page, 0)) {
- ut_a(n_owned == 1);
- } else if (slot == page_dir_get_nth_slot(page, n_slots - 1)) {
- ut_a(n_owned >= 1);
- ut_a(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED);
- } else {
- ut_a(n_owned >= PAGE_DIR_SLOT_MIN_N_OWNED);
- ut_a(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED);
- }
-
- return(TRUE);
-}
-
-/*****************************************************************
-Sets the max trx id field value. */
-
-void
-page_set_max_trx_id(
-/*================*/
- page_t* page, /* in: page */
- dulint trx_id) /* in: transaction id */
-{
- buf_block_t* block;
-
- ut_ad(page);
-
- block = buf_block_align(page);
-
- if (block->is_hashed) {
- rw_lock_x_lock(&btr_search_latch);
- }
-
- /* It is not necessary to write this change to the redo log, as
- during a database recovery we assume that the max trx id of every
- page is the maximum trx id assigned before the crash. */
-
- mach_write_to_8(page + PAGE_HEADER + PAGE_MAX_TRX_ID, trx_id);
-
- if (block->is_hashed) {
- rw_lock_x_unlock(&btr_search_latch);
- }
-}
-
-/*****************************************************************
-Calculates free space if a page is emptied. */
-
-ulint
-page_get_free_space_of_empty_noninline(
-/*===================================*/
- /* out: free space */
- ulint comp) /* in: nonzero=compact page format */
-{
- return(page_get_free_space_of_empty(comp));
-}
-
-/****************************************************************
-Allocates a block of memory from an index page. */
-
-byte*
-page_mem_alloc(
-/*===========*/
- /* out: pointer to start of allocated
- buffer, or NULL if allocation fails */
- page_t* page, /* in: index page */
- ulint need, /* in: number of bytes needed */
- dict_index_t* index, /* in: record descriptor */
- ulint* heap_no)/* out: this contains the heap number
- of the allocated record
- if allocation succeeds */
-{
- rec_t* rec;
- byte* block;
- ulint avl_space;
- ulint garbage;
-
- ut_ad(page && heap_no);
-
- /* If there are records in the free list, look if the first is
- big enough */
-
- rec = page_header_get_ptr(page, PAGE_FREE);
-
- if (rec) {
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
-
- if (rec_offs_size(offsets) >= need) {
- page_header_set_ptr(page, PAGE_FREE,
- page_rec_get_next(rec));
-
- garbage = page_header_get_field(page, PAGE_GARBAGE);
- ut_ad(garbage >= need);
-
- page_header_set_field(page, PAGE_GARBAGE,
- garbage - need);
-
- *heap_no = rec_get_heap_no(rec, page_is_comp(page));
-
- block = rec_get_start(rec, offsets);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(block);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- }
-
- /* Could not find space from the free list, try top of heap */
-
- avl_space = page_get_max_insert_size(page, 1);
-
- if (avl_space >= need) {
- block = page_header_get_ptr(page, PAGE_HEAP_TOP);
-
- page_header_set_ptr(page, PAGE_HEAP_TOP, block + need);
- *heap_no = page_dir_get_n_heap(page);
-
- page_dir_set_n_heap(page, 1 + *heap_no);
-
- return(block);
- }
-
- return(NULL);
-}
-
-/**************************************************************
-Writes a log record of page creation. */
-UNIV_INLINE
-void
-page_create_write_log(
-/*==================*/
- buf_frame_t* frame, /* in: a buffer frame where the page is
- created */
- mtr_t* mtr, /* in: mini-transaction handle */
- ulint comp) /* in: nonzero=compact page format */
-{
- mlog_write_initial_log_record(frame, comp
- ? MLOG_COMP_PAGE_CREATE
- : MLOG_PAGE_CREATE, mtr);
-}
-
-/***************************************************************
-Parses a redo log record of creating a page. */
-
-byte*
-page_parse_create(
-/*==============*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr __attribute__((unused)), /* in: buffer end */
- ulint comp, /* in: nonzero=compact page format */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr) /* in: mtr or NULL */
-{
- ut_ad(ptr && end_ptr);
-
- /* The record is empty, except for the record initial part */
-
- if (page) {
- page_create(page, mtr, comp);
- }
-
- return(ptr);
-}
-
-/**************************************************************
-The index page creation function. */
-
-page_t*
-page_create(
-/*========*/
- /* out: pointer to the page */
- buf_frame_t* frame, /* in: a buffer frame where the page is
- created */
- mtr_t* mtr, /* in: mini-transaction handle */
- ulint comp) /* in: nonzero=compact page format */
-{
- page_dir_slot_t* slot;
- mem_heap_t* heap;
- dtuple_t* tuple;
- dfield_t* field;
- byte* heap_top;
- rec_t* infimum_rec;
- rec_t* supremum_rec;
- page_t* page;
- dict_index_t* index;
- ulint* offsets;
-
- index = comp ? srv_sys->dummy_ind2 : srv_sys->dummy_ind1;
-
- ut_ad(frame && mtr);
-#if PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE > PAGE_DATA
-# error "PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE > PAGE_DATA"
-#endif
-#if PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE > PAGE_DATA
-# error "PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE > PAGE_DATA"
-#endif
-
- /* 1. INCREMENT MODIFY CLOCK */
- buf_frame_modify_clock_inc(frame);
-
- /* 2. WRITE LOG INFORMATION */
- page_create_write_log(frame, mtr, comp);
-
- page = frame;
-
- fil_page_set_type(page, FIL_PAGE_INDEX);
-
- heap = mem_heap_create(200);
-
- /* 3. CREATE THE INFIMUM AND SUPREMUM RECORDS */
-
- /* Create first a data tuple for infimum record */
- tuple = dtuple_create(heap, 1);
- dtuple_set_info_bits(tuple, REC_STATUS_INFIMUM);
- field = dtuple_get_nth_field(tuple, 0);
-
- dfield_set_data(field, "infimum", 8);
- dtype_set(dfield_get_type(field),
- DATA_VARCHAR, DATA_ENGLISH | DATA_NOT_NULL, 8);
- /* Set the corresponding physical record to its place in the page
- record heap */
-
- heap_top = page + PAGE_DATA;
-
- infimum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple);
-
- ut_a(infimum_rec == page
- + (comp ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM));
-
- rec_set_n_owned(infimum_rec, comp, 1);
- rec_set_heap_no(infimum_rec, comp, 0);
- offsets = rec_get_offsets(infimum_rec, index, NULL,
- ULINT_UNDEFINED, &heap);
-
- heap_top = rec_get_end(infimum_rec, offsets);
-
- /* Create then a tuple for supremum */
-
- tuple = dtuple_create(heap, 1);
- dtuple_set_info_bits(tuple, REC_STATUS_SUPREMUM);
- field = dtuple_get_nth_field(tuple, 0);
-
- dfield_set_data(field, "supremum", comp ? 8 : 9);
- dtype_set(dfield_get_type(field),
- DATA_VARCHAR, DATA_ENGLISH | DATA_NOT_NULL, comp ? 8 : 9);
-
- supremum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple);
-
- ut_a(supremum_rec == page
- + (comp ? PAGE_NEW_SUPREMUM : PAGE_OLD_SUPREMUM));
-
- rec_set_n_owned(supremum_rec, comp, 1);
- rec_set_heap_no(supremum_rec, comp, 1);
-
- offsets = rec_get_offsets(supremum_rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- heap_top = rec_get_end(supremum_rec, offsets);
-
- ut_ad(heap_top == page
- + (comp ? PAGE_NEW_SUPREMUM_END : PAGE_OLD_SUPREMUM_END));
-
- mem_heap_free(heap);
-
- /* 4. INITIALIZE THE PAGE */
-
- page_header_set_field(page, PAGE_N_DIR_SLOTS, 2);
- page_header_set_ptr(page, PAGE_HEAP_TOP, heap_top);
- page_header_set_field(page, PAGE_N_HEAP, comp ? 0x8002 : 2);
- page_header_set_ptr(page, PAGE_FREE, NULL);
- page_header_set_field(page, PAGE_GARBAGE, 0);
- page_header_set_ptr(page, PAGE_LAST_INSERT, NULL);
- page_header_set_field(page, PAGE_DIRECTION, PAGE_NO_DIRECTION);
- page_header_set_field(page, PAGE_N_DIRECTION, 0);
- page_header_set_field(page, PAGE_N_RECS, 0);
- page_set_max_trx_id(page, ut_dulint_zero);
- memset(heap_top, 0, UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START
- - (heap_top - page));
-
- /* 5. SET POINTERS IN RECORDS AND DIR SLOTS */
-
- /* Set the slots to point to infimum and supremum. */
-
- slot = page_dir_get_nth_slot(page, 0);
- page_dir_slot_set_rec(slot, infimum_rec);
-
- slot = page_dir_get_nth_slot(page, 1);
- page_dir_slot_set_rec(slot, supremum_rec);
-
- /* Set the next pointers in infimum and supremum */
-
- rec_set_next_offs(infimum_rec, comp, (ulint)(supremum_rec - page));
- rec_set_next_offs(supremum_rec, comp, 0);
-
- return(page);
-}
-
-/*****************************************************************
-Differs from page_copy_rec_list_end, because this function does not
-touch the lock table and max trx id on page. */
-
-void
-page_copy_rec_list_end_no_locks(
-/*============================*/
- page_t* new_page, /* in: index page to copy to */
- page_t* page, /* in: index page */
- rec_t* rec, /* in: record on page */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr) /* in: mtr */
-{
- page_cur_t cur1;
- page_cur_t cur2;
- rec_t* sup;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- page_cur_position(rec, &cur1);
-
- if (page_cur_is_before_first(&cur1)) {
-
- page_cur_move_to_next(&cur1);
- }
-
- ut_a((ibool)!!page_is_comp(new_page)
- == dict_table_is_comp(index->table));
- ut_a(page_is_comp(new_page) == page_is_comp(page));
- ut_a(mach_read_from_2(new_page + UNIV_PAGE_SIZE - 10) == (ulint)
- (page_is_comp(new_page)
- ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM));
-
- page_cur_set_before_first(new_page, &cur2);
-
- /* Copy records from the original page to the new page */
-
- sup = page_get_supremum_rec(page);
-
- for (;;) {
- rec_t* cur1_rec = page_cur_get_rec(&cur1);
- if (cur1_rec == sup) {
- break;
- }
- offsets = rec_get_offsets(cur1_rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- if (UNIV_UNLIKELY(!page_cur_rec_insert(&cur2, cur1_rec, index,
- offsets, mtr))) {
- /* Track an assertion failure reported on the mailing
- list on June 18th, 2003 */
-
- buf_page_print(new_page);
- buf_page_print(page);
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- "InnoDB: rec offset %lu, cur1 offset %lu,"
- " cur2 offset %lu\n",
- (ulong)(rec - page),
- (ulong)(page_cur_get_rec(&cur1) - page),
- (ulong)(page_cur_get_rec(&cur2) - new_page));
-
- ut_error;
- }
-
- page_cur_move_to_next(&cur1);
- page_cur_move_to_next(&cur2);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-}
-
-/*****************************************************************
-Copies records from page to new_page, from a given record onward,
-including that record. Infimum and supremum records are not copied.
-The records are copied to the start of the record list on new_page. */
-
-void
-page_copy_rec_list_end(
-/*===================*/
- page_t* new_page, /* in: index page to copy to */
- page_t* page, /* in: index page */
- rec_t* rec, /* in: record on page */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr) /* in: mtr */
-{
- if (page_dir_get_n_heap(new_page) == 2) {
- page_copy_rec_list_end_to_created_page(new_page, page, rec,
- index, mtr);
- } else {
- page_copy_rec_list_end_no_locks(new_page, page, rec,
- index, mtr);
- }
-
- /* Update the lock table, MAX_TRX_ID, and possible hash index */
-
- lock_move_rec_list_end(new_page, page, rec);
-
- page_update_max_trx_id(new_page, page_get_max_trx_id(page));
-
- btr_search_move_or_delete_hash_entries(new_page, page, index);
-}
-
-/*****************************************************************
-Copies records from page to new_page, up to the given record,
-NOT including that record. Infimum and supremum records are not copied.
-The records are copied to the end of the record list on new_page. */
-
-void
-page_copy_rec_list_start(
-/*=====================*/
- page_t* new_page, /* in: index page to copy to */
- page_t* page, /* in: index page */
- rec_t* rec, /* in: record on page */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr) /* in: mtr */
-{
- page_cur_t cur1;
- page_cur_t cur2;
- rec_t* old_end;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- page_cur_set_before_first(page, &cur1);
-
- if (rec == page_cur_get_rec(&cur1)) {
-
- return;
- }
-
- page_cur_move_to_next(&cur1);
-
- page_cur_set_after_last(new_page, &cur2);
- page_cur_move_to_prev(&cur2);
- old_end = page_cur_get_rec(&cur2);
-
- /* Copy records from the original page to the new page */
-
- while (page_cur_get_rec(&cur1) != rec) {
- rec_t* ins_rec;
- rec_t* cur1_rec = page_cur_get_rec(&cur1);
- offsets = rec_get_offsets(cur1_rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- ins_rec = page_cur_rec_insert(&cur2, cur1_rec, index,
- offsets, mtr);
- ut_a(ins_rec);
-
- page_cur_move_to_next(&cur1);
- page_cur_move_to_next(&cur2);
- }
-
- /* Update the lock table, MAX_TRX_ID, and possible hash index */
-
- lock_move_rec_list_start(new_page, page, rec, old_end);
-
- page_update_max_trx_id(new_page, page_get_max_trx_id(page));
-
- btr_search_move_or_delete_hash_entries(new_page, page, index);
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-}
-
-/**************************************************************
-Writes a log record of a record list end or start deletion. */
-UNIV_INLINE
-void
-page_delete_rec_list_write_log(
-/*===========================*/
- rec_t* rec, /* in: record on page */
- dict_index_t* index, /* in: record descriptor */
- byte type, /* in: operation type:
- MLOG_LIST_END_DELETE, ... */
- mtr_t* mtr) /* in: mtr */
-{
- byte* log_ptr;
- ut_ad(type == MLOG_LIST_END_DELETE
- || type == MLOG_LIST_START_DELETE
- || type == MLOG_COMP_LIST_END_DELETE
- || type == MLOG_COMP_LIST_START_DELETE);
-
- log_ptr = mlog_open_and_write_index(mtr, rec, index, type, 2);
- if (log_ptr) {
- /* Write the parameter as a 2-byte ulint */
- mach_write_to_2(log_ptr, page_offset(rec));
- mlog_close(mtr, log_ptr + 2);
- }
-}
-
-/**************************************************************
-Parses a log record of a record list end or start deletion. */
-
-byte*
-page_parse_delete_rec_list(
-/*=======================*/
- /* out: end of log record or NULL */
- byte type, /* in: MLOG_LIST_END_DELETE,
- MLOG_LIST_START_DELETE,
- MLOG_COMP_LIST_END_DELETE or
- MLOG_COMP_LIST_START_DELETE */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- dict_index_t* index, /* in: record descriptor */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr) /* in: mtr or NULL */
-{
- ulint offset;
-
- ut_ad(type == MLOG_LIST_END_DELETE
- || type == MLOG_LIST_START_DELETE
- || type == MLOG_COMP_LIST_END_DELETE
- || type == MLOG_COMP_LIST_START_DELETE);
-
- /* Read the record offset as a 2-byte ulint */
-
- if (end_ptr < ptr + 2) {
-
- return(NULL);
- }
-
- offset = mach_read_from_2(ptr);
- ptr += 2;
-
- if (!page) {
-
- return(ptr);
- }
-
- ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
-
- if (type == MLOG_LIST_END_DELETE
- || type == MLOG_COMP_LIST_END_DELETE) {
- page_delete_rec_list_end(page, page + offset, index,
- ULINT_UNDEFINED,
- ULINT_UNDEFINED, mtr);
- } else {
- page_delete_rec_list_start(page, page + offset, index, mtr);
- }
-
- return(ptr);
-}
-
-/*****************************************************************
-Deletes records from a page from a given record onward, including that record.
-The infimum and supremum records are not deleted. */
-
-void
-page_delete_rec_list_end(
-/*=====================*/
- page_t* page, /* in: index page */
- rec_t* rec, /* in: record on page */
- dict_index_t* index, /* in: record descriptor */
- ulint n_recs, /* in: number of records to delete,
- or ULINT_UNDEFINED if not known */
- ulint size, /* in: the sum of the sizes of the
- records in the end of the chain to
- delete, or ULINT_UNDEFINED if not known */
- mtr_t* mtr) /* in: mtr */
-{
- page_dir_slot_t* slot;
- ulint slot_index;
- rec_t* last_rec;
- rec_t* prev_rec;
- rec_t* free;
- rec_t* rec2;
- ulint count;
- ulint n_owned;
- rec_t* sup;
- ulint comp;
-
- /* Reset the last insert info in the page header and increment
- the modify clock for the frame */
-
- ut_ad(size == ULINT_UNDEFINED || size < UNIV_PAGE_SIZE);
- page_header_set_ptr(page, PAGE_LAST_INSERT, NULL);
-
- /* The page gets invalid for optimistic searches: increment the
- frame modify clock */
-
- buf_frame_modify_clock_inc(page);
-
- sup = page_get_supremum_rec(page);
-
- comp = page_is_comp(page);
- if (page_rec_is_infimum_low(rec - page)) {
- rec = page_rec_get_next(rec);
- }
-
- page_delete_rec_list_write_log(rec, index, comp
- ? MLOG_COMP_LIST_END_DELETE
- : MLOG_LIST_END_DELETE, mtr);
-
- if (rec == sup) {
-
- return;
- }
-
- prev_rec = page_rec_get_prev(rec);
-
- last_rec = page_rec_get_prev(sup);
-
- if ((size == ULINT_UNDEFINED) || (n_recs == ULINT_UNDEFINED)) {
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
- /* Calculate the sum of sizes and the number of records */
- size = 0;
- n_recs = 0;
- rec2 = rec;
-
- while (rec2 != sup) {
- ulint s;
- offsets = rec_get_offsets(rec2, index, offsets,
- ULINT_UNDEFINED, &heap);
- s = rec_offs_size(offsets);
- ut_ad(rec2 - page + s - rec_offs_extra_size(offsets)
- < UNIV_PAGE_SIZE);
- ut_ad(size + s < UNIV_PAGE_SIZE);
- size += s;
- n_recs++;
-
- rec2 = page_rec_get_next(rec2);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- }
-
- ut_ad(size < UNIV_PAGE_SIZE);
-
- /* Update the page directory; there is no need to balance the number
- of the records owned by the supremum record, as it is allowed to be
- less than PAGE_DIR_SLOT_MIN_N_OWNED */
-
- rec2 = rec;
- count = 0;
-
- while (rec_get_n_owned(rec2, comp) == 0) {
- count++;
-
- rec2 = page_rec_get_next(rec2);
- }
-
- ut_ad(rec_get_n_owned(rec2, comp) - count > 0);
-
- n_owned = rec_get_n_owned(rec2, comp) - count;
-
- slot_index = page_dir_find_owner_slot(rec2);
- slot = page_dir_get_nth_slot(page, slot_index);
-
- page_dir_slot_set_rec(slot, sup);
- page_dir_slot_set_n_owned(slot, n_owned);
-
- page_dir_set_n_slots(page, slot_index + 1);
-
- /* Remove the record chain segment from the record chain */
- page_rec_set_next(prev_rec, page_get_supremum_rec(page));
-
- /* Catenate the deleted chain segment to the page free list */
-
- free = page_header_get_ptr(page, PAGE_FREE);
-
- page_rec_set_next(last_rec, free);
- page_header_set_ptr(page, PAGE_FREE, rec);
-
- page_header_set_field(page, PAGE_GARBAGE, size
- + page_header_get_field(page, PAGE_GARBAGE));
-
- page_header_set_field(page, PAGE_N_RECS,
- (ulint)(page_get_n_recs(page) - n_recs));
-}
-
-/*****************************************************************
-Deletes records from page, up to the given record, NOT including
-that record. Infimum and supremum records are not deleted. */
-
-void
-page_delete_rec_list_start(
-/*=======================*/
- page_t* page, /* in: index page */
- rec_t* rec, /* in: record on page */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr) /* in: mtr */
-{
- page_cur_t cur1;
- ulint log_mode;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- mem_heap_t* heap = NULL;
- byte type;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
-
- if (page_is_comp(page)) {
- type = MLOG_COMP_LIST_START_DELETE;
- } else {
- type = MLOG_LIST_START_DELETE;
- }
-
- page_delete_rec_list_write_log(rec, index, type, mtr);
-
- page_cur_set_before_first(page, &cur1);
-
- if (rec == page_cur_get_rec(&cur1)) {
-
- return;
- }
-
- page_cur_move_to_next(&cur1);
-
- /* Individual deletes are not logged */
-
- log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
-
- while (page_cur_get_rec(&cur1) != rec) {
- offsets = rec_get_offsets(page_cur_get_rec(&cur1), index,
- offsets, ULINT_UNDEFINED, &heap);
- page_cur_delete_rec(&cur1, index, offsets, mtr);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- /* Restore log mode */
-
- mtr_set_log_mode(mtr, log_mode);
-}
-
-/*****************************************************************
-Moves record list end to another page. Moved records include
-split_rec. */
-
-void
-page_move_rec_list_end(
-/*===================*/
- page_t* new_page, /* in: index page where to move */
- page_t* page, /* in: index page */
- rec_t* split_rec, /* in: first record to move */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr) /* in: mtr */
-{
- ulint old_data_size;
- ulint new_data_size;
- ulint old_n_recs;
- ulint new_n_recs;
-
- old_data_size = page_get_data_size(new_page);
- old_n_recs = page_get_n_recs(new_page);
-
- page_copy_rec_list_end(new_page, page, split_rec, index, mtr);
-
- new_data_size = page_get_data_size(new_page);
- new_n_recs = page_get_n_recs(new_page);
-
- ut_ad(new_data_size >= old_data_size);
-
- page_delete_rec_list_end(page, split_rec, index,
- new_n_recs - old_n_recs,
- new_data_size - old_data_size, mtr);
-}
-
-/*****************************************************************
-Moves record list start to another page. Moved records do not include
-split_rec. */
-
-void
-page_move_rec_list_start(
-/*=====================*/
- page_t* new_page, /* in: index page where to move */
- page_t* page, /* in: index page */
- rec_t* split_rec, /* in: first record not to move */
- dict_index_t* index, /* in: record descriptor */
- mtr_t* mtr) /* in: mtr */
-{
- page_copy_rec_list_start(new_page, page, split_rec, index, mtr);
-
- page_delete_rec_list_start(page, split_rec, index, mtr);
-}
-
-/***************************************************************************
-This is a low-level operation which is used in a database index creation
-to update the page number of a created B-tree to a data dictionary record. */
-
-void
-page_rec_write_index_page_no(
-/*=========================*/
- rec_t* rec, /* in: record to update */
- ulint i, /* in: index of the field to update */
- ulint page_no,/* in: value to write */
- mtr_t* mtr) /* in: mtr */
-{
- byte* data;
- ulint len;
-
- data = rec_get_nth_field_old(rec, i, &len);
-
- ut_ad(len == 4);
-
- mlog_write_ulint(data, page_no, MLOG_4BYTES, mtr);
-}
-
-/******************************************************************
-Used to delete n slots from the directory. This function updates
-also n_owned fields in the records, so that the first slot after
-the deleted ones inherits the records of the deleted slots. */
-UNIV_INLINE
-void
-page_dir_delete_slots(
-/*==================*/
- page_t* page, /* in: the index page */
- ulint start, /* in: first slot to be deleted */
- ulint n) /* in: number of slots to delete (currently
- only n == 1 allowed) */
-{
- page_dir_slot_t* slot;
- ulint i;
- ulint sum_owned = 0;
- ulint n_slots;
- rec_t* rec;
-
- ut_ad(n == 1);
- ut_ad(start > 0);
- ut_ad(start + n < page_dir_get_n_slots(page));
-
- n_slots = page_dir_get_n_slots(page);
-
- /* 1. Reset the n_owned fields of the slots to be
- deleted */
- for (i = start; i < start + n; i++) {
- slot = page_dir_get_nth_slot(page, i);
- sum_owned += page_dir_slot_get_n_owned(slot);
- page_dir_slot_set_n_owned(slot, 0);
- }
-
- /* 2. Update the n_owned value of the first non-deleted slot */
-
- slot = page_dir_get_nth_slot(page, start + n);
- page_dir_slot_set_n_owned(slot,
- sum_owned + page_dir_slot_get_n_owned(slot));
-
- /* 3. Destroy start and other slots by copying slots */
- for (i = start + n; i < n_slots; i++) {
- slot = page_dir_get_nth_slot(page, i);
- rec = page_dir_slot_get_rec(slot);
-
- slot = page_dir_get_nth_slot(page, i - n);
- page_dir_slot_set_rec(slot, rec);
- }
-
- /* 4. Update the page header */
- page_header_set_field(page, PAGE_N_DIR_SLOTS, n_slots - n);
-}
-
-/******************************************************************
-Used to add n slots to the directory. Does not set the record pointers
-in the added slots or update n_owned values: this is the responsibility
-of the caller. */
-UNIV_INLINE
-void
-page_dir_add_slots(
-/*===============*/
- page_t* page, /* in: the index page */
- ulint start, /* in: the slot above which the new slots are added */
- ulint n) /* in: number of slots to add (currently only n == 1
- allowed) */
-{
- page_dir_slot_t* slot;
- ulint n_slots;
- ulint i;
- rec_t* rec;
-
- ut_ad(n == 1);
-
- n_slots = page_dir_get_n_slots(page);
-
- ut_ad(start < n_slots - 1);
-
- /* Update the page header */
- page_dir_set_n_slots(page, n_slots + n);
-
- /* Move slots up */
-
- for (i = n_slots - 1; i > start; i--) {
-
- slot = page_dir_get_nth_slot(page, i);
- rec = page_dir_slot_get_rec(slot);
-
- slot = page_dir_get_nth_slot(page, i + n);
- page_dir_slot_set_rec(slot, rec);
- }
-}
-
-/********************************************************************
-Splits a directory slot which owns too many records. */
-
-void
-page_dir_split_slot(
-/*================*/
- page_t* page, /* in: the index page in question */
- ulint slot_no) /* in: the directory slot */
-{
- rec_t* rec;
- page_dir_slot_t* new_slot;
- page_dir_slot_t* prev_slot;
- page_dir_slot_t* slot;
- ulint i;
- ulint n_owned;
-
- ut_ad(page);
- ut_ad(slot_no > 0);
-
- slot = page_dir_get_nth_slot(page, slot_no);
-
- n_owned = page_dir_slot_get_n_owned(slot);
- ut_ad(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED + 1);
-
- /* 1. We loop to find a record approximately in the middle of the
- records owned by the slot. */
-
- prev_slot = page_dir_get_nth_slot(page, slot_no - 1);
- rec = page_dir_slot_get_rec(prev_slot);
-
- for (i = 0; i < n_owned / 2; i++) {
- rec = page_rec_get_next(rec);
- }
-
- ut_ad(n_owned / 2 >= PAGE_DIR_SLOT_MIN_N_OWNED);
-
- /* 2. We add one directory slot immediately below the slot to be
- split. */
-
- page_dir_add_slots(page, slot_no - 1, 1);
-
- /* The added slot is now number slot_no, and the old slot is
- now number slot_no + 1 */
-
- new_slot = page_dir_get_nth_slot(page, slot_no);
- slot = page_dir_get_nth_slot(page, slot_no + 1);
-
- /* 3. We store the appropriate values to the new slot. */
-
- page_dir_slot_set_rec(new_slot, rec);
- page_dir_slot_set_n_owned(new_slot, n_owned / 2);
-
- /* 4. Finally, we update the number of records field of the
- original slot */
-
- page_dir_slot_set_n_owned(slot, n_owned - (n_owned / 2));
-}
-
-/*****************************************************************
-Tries to balance the given directory slot with too few records with the upper
-neighbor, so that there are at least the minimum number of records owned by
-the slot; this may result in the merging of two slots. */
-
-void
-page_dir_balance_slot(
-/*==================*/
- page_t* page, /* in: index page */
- ulint slot_no) /* in: the directory slot */
-{
- page_dir_slot_t* slot;
- page_dir_slot_t* up_slot;
- ulint n_owned;
- ulint up_n_owned;
- rec_t* old_rec;
- rec_t* new_rec;
-
- ut_ad(page);
- ut_ad(slot_no > 0);
-
- slot = page_dir_get_nth_slot(page, slot_no);
-
- /* The last directory slot cannot be balanced with the upper
- neighbor, as there is none. */
-
- if (slot_no == page_dir_get_n_slots(page) - 1) {
-
- return;
- }
-
- up_slot = page_dir_get_nth_slot(page, slot_no + 1);
-
- n_owned = page_dir_slot_get_n_owned(slot);
- up_n_owned = page_dir_slot_get_n_owned(up_slot);
-
- ut_ad(n_owned == PAGE_DIR_SLOT_MIN_N_OWNED - 1);
-
- /* If the upper slot has the minimum value of n_owned, we will merge
- the two slots, therefore we assert: */
- ut_ad(2 * PAGE_DIR_SLOT_MIN_N_OWNED - 1 <= PAGE_DIR_SLOT_MAX_N_OWNED);
-
- if (up_n_owned > PAGE_DIR_SLOT_MIN_N_OWNED) {
-
- /* In this case we can just transfer one record owned
- by the upper slot to the property of the lower slot */
- old_rec = page_dir_slot_get_rec(slot);
- new_rec = page_rec_get_next(old_rec);
-
- rec_set_n_owned(old_rec, page_is_comp(page), 0);
- rec_set_n_owned(new_rec, page_is_comp(page), n_owned + 1);
-
- page_dir_slot_set_rec(slot, new_rec);
-
- page_dir_slot_set_n_owned(up_slot, up_n_owned -1);
- } else {
- /* In this case we may merge the two slots */
- page_dir_delete_slots(page, slot_no, 1);
- }
-}
-
-/****************************************************************
-Returns the middle record of the record list. If there are an even number
-of records in the list, returns the first record of the upper half-list. */
-
-rec_t*
-page_get_middle_rec(
-/*================*/
- /* out: middle record */
- page_t* page) /* in: page */
-{
- page_dir_slot_t* slot;
- ulint middle;
- ulint i;
- ulint n_owned;
- ulint count;
- rec_t* rec;
-
- /* This many records we must leave behind */
- middle = (page_get_n_recs(page) + 2) / 2;
-
- count = 0;
-
- for (i = 0;; i++) {
-
- slot = page_dir_get_nth_slot(page, i);
- n_owned = page_dir_slot_get_n_owned(slot);
-
- if (count + n_owned > middle) {
- break;
- } else {
- count += n_owned;
- }
- }
-
- ut_ad(i > 0);
- slot = page_dir_get_nth_slot(page, i - 1);
- rec = page_dir_slot_get_rec(slot);
- rec = page_rec_get_next(rec);
-
- /* There are now count records behind rec */
-
- for (i = 0; i < middle - count; i++) {
- rec = page_rec_get_next(rec);
- }
-
- return(rec);
-}
-
-/*******************************************************************
-Returns the number of records before the given record in chain.
-The number includes infimum and supremum records. */
-
-ulint
-page_rec_get_n_recs_before(
-/*=======================*/
- /* out: number of records */
- rec_t* rec) /* in: the physical record */
-{
- page_dir_slot_t* slot;
- rec_t* slot_rec;
- page_t* page;
- ulint i;
- ulint comp;
- lint n = 0;
-
- ut_ad(page_rec_check(rec));
-
- page = buf_frame_align(rec);
- comp = page_is_comp(page);
-
- while (rec_get_n_owned(rec, comp) == 0) {
-
- rec = page_rec_get_next(rec);
- n--;
- }
-
- for (i = 0; ; i++) {
- slot = page_dir_get_nth_slot(page, i);
- slot_rec = page_dir_slot_get_rec(slot);
-
- n += rec_get_n_owned(slot_rec, comp);
-
- if (rec == slot_rec) {
-
- break;
- }
- }
-
- n--;
-
- ut_ad(n >= 0);
-
- return((ulint) n);
-}
-
-/****************************************************************
-Prints record contents including the data relevant only in
-the index page context. */
-
-void
-page_rec_print(
-/*===========*/
- rec_t* rec, /* in: physical record */
- const ulint* offsets)/* in: record descriptor */
-{
- ulint comp = page_is_comp(buf_frame_align(rec));
-
- ut_a(!comp == !rec_offs_comp(offsets));
- rec_print_new(stderr, rec, offsets);
- fprintf(stderr,
- " n_owned: %lu; heap_no: %lu; next rec: %lu\n",
- (ulong) rec_get_n_owned(rec, comp),
- (ulong) rec_get_heap_no(rec, comp),
- (ulong) rec_get_next_offs(rec, comp));
-
- page_rec_check(rec);
- rec_validate(rec, offsets);
-}
-
-/*******************************************************************
-This is used to print the contents of the directory for
-debugging purposes. */
-
-void
-page_dir_print(
-/*===========*/
- page_t* page, /* in: index page */
- ulint pr_n) /* in: print n first and n last entries */
-{
- ulint n;
- ulint i;
- page_dir_slot_t* slot;
-
- n = page_dir_get_n_slots(page);
-
- fprintf(stderr, "--------------------------------\n"
- "PAGE DIRECTORY\n"
- "Page address %p\n"
- "Directory stack top at offs: %lu; number of slots: %lu\n",
- page, (ulong)(page_dir_get_nth_slot(page, n - 1) - page),
- (ulong) n);
- for (i = 0; i < n; i++) {
- slot = page_dir_get_nth_slot(page, i);
- if ((i == pr_n) && (i < n - pr_n)) {
- fputs(" ... \n", stderr);
- }
- if ((i < pr_n) || (i >= n - pr_n)) {
- fprintf(stderr,
- "Contents of slot: %lu: n_owned: %lu,"
- " rec offs: %lu\n",
- (ulong) i,
- (ulong) page_dir_slot_get_n_owned(slot),
- (ulong)(page_dir_slot_get_rec(slot) - page));
- }
- }
- fprintf(stderr, "Total of %lu records\n"
- "--------------------------------\n",
- (ulong) (2 + page_get_n_recs(page)));
-}
-
-/*******************************************************************
-This is used to print the contents of the page record list for
-debugging purposes. */
-
-void
-page_print_list(
-/*============*/
- page_t* page, /* in: index page */
- dict_index_t* index, /* in: dictionary index of the page */
- ulint pr_n) /* in: print n first and n last entries */
-{
- page_cur_t cur;
- ulint count;
- ulint n_recs;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- ut_a((ibool)!!page_is_comp(page) == dict_table_is_comp(index->table));
-
- fprintf(stderr,
- "--------------------------------\n"
- "PAGE RECORD LIST\n"
- "Page address %p\n", page);
-
- n_recs = page_get_n_recs(page);
-
- page_cur_set_before_first(page, &cur);
- count = 0;
- for (;;) {
- offsets = rec_get_offsets(cur.rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- page_rec_print(cur.rec, offsets);
-
- if (count == pr_n) {
- break;
- }
- if (page_cur_is_after_last(&cur)) {
- break;
- }
- page_cur_move_to_next(&cur);
- count++;
- }
-
- if (n_recs > 2 * pr_n) {
- fputs(" ... \n", stderr);
- }
-
- while (!page_cur_is_after_last(&cur)) {
- page_cur_move_to_next(&cur);
-
- if (count + pr_n >= n_recs) {
- offsets = rec_get_offsets(cur.rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- page_rec_print(cur.rec, offsets);
- }
- count++;
- }
-
- fprintf(stderr,
- "Total of %lu records \n"
- "--------------------------------\n",
- (ulong) (count + 1));
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-}
-
-/*******************************************************************
-Prints the info in a page header. */
-
-void
-page_header_print(
-/*==============*/
- page_t* page)
-{
- fprintf(stderr,
- "--------------------------------\n"
- "PAGE HEADER INFO\n"
- "Page address %p, n records %lu (%s)\n"
- "n dir slots %lu, heap top %lu\n"
- "Page n heap %lu, free %lu, garbage %lu\n"
- "Page last insert %lu, direction %lu, n direction %lu\n",
- page, (ulong) page_header_get_field(page, PAGE_N_RECS),
- page_is_comp(page) ? "compact format" : "original format",
- (ulong) page_header_get_field(page, PAGE_N_DIR_SLOTS),
- (ulong) page_header_get_field(page, PAGE_HEAP_TOP),
- (ulong) page_dir_get_n_heap(page),
- (ulong) page_header_get_field(page, PAGE_FREE),
- (ulong) page_header_get_field(page, PAGE_GARBAGE),
- (ulong) page_header_get_field(page, PAGE_LAST_INSERT),
- (ulong) page_header_get_field(page, PAGE_DIRECTION),
- (ulong) page_header_get_field(page, PAGE_N_DIRECTION));
-}
-
-/*******************************************************************
-This is used to print the contents of the page for
-debugging purposes. */
-
-void
-page_print(
-/*=======*/
- page_t* page, /* in: index page */
- dict_index_t* index, /* in: dictionary index of the page */
- ulint dn, /* in: print dn first and last entries
- in directory */
- ulint rn) /* in: print rn first and last records
- in directory */
-{
- page_header_print(page);
- page_dir_print(page, dn);
- page_print_list(page, index, rn);
-}
-
-/*******************************************************************
-The following is used to validate a record on a page. This function
-differs from rec_validate as it can also check the n_owned field and
-the heap_no field. */
-
-ibool
-page_rec_validate(
-/*==============*/
- /* out: TRUE if ok */
- rec_t* rec, /* in: physical record */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
-{
- ulint n_owned;
- ulint heap_no;
- page_t* page;
- ulint comp;
-
- page = buf_frame_align(rec);
- comp = page_is_comp(page);
- ut_a(!comp == !rec_offs_comp(offsets));
-
- page_rec_check(rec);
- rec_validate(rec, offsets);
-
- n_owned = rec_get_n_owned(rec, comp);
- heap_no = rec_get_heap_no(rec, comp);
-
- if (!(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED)) {
- fprintf(stderr,
- "InnoDB: Dir slot of rec %lu, n owned too big %lu\n",
- (ulong)(rec - page), (ulong) n_owned);
- return(FALSE);
- }
-
- if (!(heap_no < page_dir_get_n_heap(page))) {
- fprintf(stderr,
- "InnoDB: Heap no of rec %lu too big %lu %lu\n",
- (ulong)(rec - page), (ulong) heap_no,
- (ulong) page_dir_get_n_heap(page));
- return(FALSE);
- }
-
- return(TRUE);
-}
-
-/*******************************************************************
-Checks that the first directory slot points to the infimum record and
-the last to the supremum. This function is intended to track if the
-bug fixed in 4.0.14 has caused corruption to users' databases. */
-
-void
-page_check_dir(
-/*===========*/
- page_t* page) /* in: index page */
-{
- ulint n_slots;
-
- n_slots = page_dir_get_n_slots(page);
-
- if (page_dir_slot_get_rec(page_dir_get_nth_slot(page, 0))
- != page_get_infimum_rec(page)) {
-
- fprintf(stderr,
- "InnoDB: Page directory corruption:"
- " infimum not pointed to\n");
- buf_page_print(page);
- }
-
- if (page_dir_slot_get_rec(page_dir_get_nth_slot(page, n_slots - 1))
- != page_get_supremum_rec(page)) {
-
- fprintf(stderr,
- "InnoDB: Page directory corruption:"
- " supremum not pointed to\n");
- buf_page_print(page);
- }
-}
-
-/*******************************************************************
-This function checks the consistency of an index page when we do not
-know the index. This is also resilient so that this should never crash
-even if the page is total garbage. */
-
-ibool
-page_simple_validate(
-/*=================*/
- /* out: TRUE if ok */
- page_t* page) /* in: index page */
-{
- page_cur_t cur;
- page_dir_slot_t* slot;
- ulint slot_no;
- ulint n_slots;
- rec_t* rec;
- byte* rec_heap_top;
- ulint count;
- ulint own_count;
- ibool ret = FALSE;
- ulint comp = page_is_comp(page);
-
- /* Check first that the record heap and the directory do not
- overlap. */
-
- n_slots = page_dir_get_n_slots(page);
-
- if (n_slots > UNIV_PAGE_SIZE / 4) {
- fprintf(stderr,
- "InnoDB: Nonsensical number %lu of page dir slots\n",
- (ulong) n_slots);
-
- goto func_exit;
- }
-
- rec_heap_top = page_header_get_ptr(page, PAGE_HEAP_TOP);
-
- if (rec_heap_top > page_dir_get_nth_slot(page, n_slots - 1)) {
-
- fprintf(stderr,
- "InnoDB: Record heap and dir overlap on a page,"
- " heap top %lu, dir %lu\n",
- (ulong)
- (page_header_get_ptr(page, PAGE_HEAP_TOP) - page),
- (ulong)
- (page_dir_get_nth_slot(page, n_slots - 1) - page));
-
- goto func_exit;
- }
-
- /* Validate the record list in a loop checking also that it is
- consistent with the page record directory. */
-
- count = 0;
- own_count = 1;
- slot_no = 0;
- slot = page_dir_get_nth_slot(page, slot_no);
-
- page_cur_set_before_first(page, &cur);
-
- for (;;) {
- rec = (&cur)->rec;
-
- if (rec > rec_heap_top) {
- fprintf(stderr,
- "InnoDB: Record %lu is above"
- " rec heap top %lu\n",
- (ulong)(rec - page),
- (ulong)(rec_heap_top - page));
-
- goto func_exit;
- }
-
- if (rec_get_n_owned(rec, comp) != 0) {
- /* This is a record pointed to by a dir slot */
- if (rec_get_n_owned(rec, comp) != own_count) {
-
- fprintf(stderr,
- "InnoDB: Wrong owned count %lu, %lu,"
- " rec %lu\n",
- (ulong) rec_get_n_owned(rec, comp),
- (ulong) own_count,
- (ulong)(rec - page));
-
- goto func_exit;
- }
-
- if (page_dir_slot_get_rec(slot) != rec) {
- fprintf(stderr,
- "InnoDB: Dir slot does not point"
- " to right rec %lu\n",
- (ulong)(rec - page));
-
- goto func_exit;
- }
-
- own_count = 0;
-
- if (!page_cur_is_after_last(&cur)) {
- slot_no++;
- slot = page_dir_get_nth_slot(page, slot_no);
- }
- }
-
- if (page_cur_is_after_last(&cur)) {
-
- break;
- }
-
- if (rec_get_next_offs(rec, comp) < FIL_PAGE_DATA
- || rec_get_next_offs(rec, comp) >= UNIV_PAGE_SIZE) {
- fprintf(stderr,
- "InnoDB: Next record offset"
- " nonsensical %lu for rec %lu\n",
- (ulong) rec_get_next_offs(rec, comp),
- (ulong)(rec - page));
-
- goto func_exit;
- }
-
- count++;
-
- if (count > UNIV_PAGE_SIZE) {
- fprintf(stderr,
- "InnoDB: Page record list appears"
- " to be circular %lu\n",
- (ulong) count);
- goto func_exit;
- }
-
- page_cur_move_to_next(&cur);
- own_count++;
- }
-
- if (rec_get_n_owned(rec, comp) == 0) {
- fprintf(stderr, "InnoDB: n owned is zero in a supremum rec\n");
-
- goto func_exit;
- }
-
- if (slot_no != n_slots - 1) {
- fprintf(stderr, "InnoDB: n slots wrong %lu, %lu\n",
- (ulong) slot_no, (ulong) (n_slots - 1));
- goto func_exit;
- }
-
- if (page_header_get_field(page, PAGE_N_RECS) + 2 != count + 1) {
- fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n",
- (ulong) page_header_get_field(page, PAGE_N_RECS) + 2,
- (ulong) (count + 1));
-
- goto func_exit;
- }
-
- /* Check then the free list */
- rec = page_header_get_ptr(page, PAGE_FREE);
-
- while (rec != NULL) {
- if (rec < page + FIL_PAGE_DATA
- || rec >= page + UNIV_PAGE_SIZE) {
- fprintf(stderr,
- "InnoDB: Free list record has"
- " a nonsensical offset %lu\n",
- (ulong) (rec - page));
-
- goto func_exit;
- }
-
- if (rec > rec_heap_top) {
- fprintf(stderr,
- "InnoDB: Free list record %lu"
- " is above rec heap top %lu\n",
- (ulong) (rec - page),
- (ulong) (rec_heap_top - page));
-
- goto func_exit;
- }
-
- count++;
-
- if (count > UNIV_PAGE_SIZE) {
- fprintf(stderr,
- "InnoDB: Page free list appears"
- " to be circular %lu\n",
- (ulong) count);
- goto func_exit;
- }
-
- rec = page_rec_get_next(rec);
- }
-
- if (page_dir_get_n_heap(page) != count + 1) {
-
- fprintf(stderr, "InnoDB: N heap is wrong %lu, %lu\n",
- (ulong) page_dir_get_n_heap(page),
- (ulong) (count + 1));
-
- goto func_exit;
- }
-
- ret = TRUE;
-
-func_exit:
- return(ret);
-}
-
-/*******************************************************************
-This function checks the consistency of an index page. */
-
-ibool
-page_validate(
-/*==========*/
- /* out: TRUE if ok */
- page_t* page, /* in: index page */
- dict_index_t* index) /* in: data dictionary index containing
- the page record type definition */
-{
- page_dir_slot_t* slot;
- mem_heap_t* heap;
- page_cur_t cur;
- byte* buf;
- ulint count;
- ulint own_count;
- ulint slot_no;
- ulint data_size;
- rec_t* rec;
- rec_t* old_rec = NULL;
- ulint offs;
- ulint n_slots;
- ibool ret = FALSE;
- ulint i;
- ulint comp = page_is_comp(page);
- ulint* offsets = NULL;
- ulint* old_offsets = NULL;
-
- if ((ibool)!!comp != dict_table_is_comp(index->table)) {
- fputs("InnoDB: 'compact format' flag mismatch\n", stderr);
- goto func_exit2;
- }
- if (!page_simple_validate(page)) {
- goto func_exit2;
- }
-
- heap = mem_heap_create(UNIV_PAGE_SIZE + 200);
-
- /* The following buffer is used to check that the
- records in the page record heap do not overlap */
-
- buf = mem_heap_alloc(heap, UNIV_PAGE_SIZE);
- memset(buf, 0, UNIV_PAGE_SIZE);
-
- /* Check first that the record heap and the directory do not
- overlap. */
-
- n_slots = page_dir_get_n_slots(page);
-
- if (!(page_header_get_ptr(page, PAGE_HEAP_TOP)
- <= page_dir_get_nth_slot(page, n_slots - 1))) {
-
- fputs("InnoDB: Record heap and dir overlap on a page ",
- stderr);
- dict_index_name_print(stderr, NULL, index);
- fprintf(stderr, ", %p, %p\n",
- page_header_get_ptr(page, PAGE_HEAP_TOP),
- page_dir_get_nth_slot(page, n_slots - 1));
-
- goto func_exit;
- }
-
- /* Validate the record list in a loop checking also that
- it is consistent with the directory. */
- count = 0;
- data_size = 0;
- own_count = 1;
- slot_no = 0;
- slot = page_dir_get_nth_slot(page, slot_no);
-
- page_cur_set_before_first(page, &cur);
-
- for (;;) {
- rec = cur.rec;
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
-
- if (comp && page_rec_is_user_rec(rec)
- && rec_get_node_ptr_flag(rec)
- != (ibool)
- (btr_page_get_level_low(page) != 0)) {
- fputs("InnoDB: node_ptr flag mismatch\n", stderr);
- goto func_exit;
- }
-
- if (!page_rec_validate(rec, offsets)) {
- goto func_exit;
- }
-
- /* Check that the records are in the ascending order */
- if ((count >= 2) && (!page_cur_is_after_last(&cur))) {
- if (!(1 == cmp_rec_rec(rec, old_rec,
- offsets, old_offsets, index))) {
- fprintf(stderr,
- "InnoDB: Records in wrong order"
- " on page %lu ",
- (ulong) buf_frame_get_page_no(page));
- dict_index_name_print(stderr, NULL, index);
- fputs("\nInnoDB: previous record ", stderr);
- rec_print_new(stderr, old_rec, old_offsets);
- fputs("\nInnoDB: record ", stderr);
- rec_print_new(stderr, rec, offsets);
- putc('\n', stderr);
-
- goto func_exit;
- }
- }
-
- if (page_rec_is_user_rec(rec)) {
-
- data_size += rec_offs_size(offsets);
- }
-
- offs = rec_get_start(rec, offsets) - page;
-
- for (i = 0; i < rec_offs_size(offsets); i++) {
- if (!buf[offs + i] == 0) {
- /* No other record may overlap this */
-
- fputs("InnoDB: Record overlaps another\n",
- stderr);
- goto func_exit;
- }
-
- buf[offs + i] = 1;
- }
-
- if (rec_get_n_owned(rec, comp) != 0) {
- /* This is a record pointed to by a dir slot */
- if (rec_get_n_owned(rec, comp) != own_count) {
- fprintf(stderr,
- "InnoDB: Wrong owned count %lu, %lu\n",
- (ulong) rec_get_n_owned(rec, comp),
- (ulong) own_count);
- goto func_exit;
- }
-
- if (page_dir_slot_get_rec(slot) != rec) {
- fputs("InnoDB: Dir slot does not"
- " point to right rec\n",
- stderr);
- goto func_exit;
- }
-
- page_dir_slot_check(slot);
-
- own_count = 0;
- if (!page_cur_is_after_last(&cur)) {
- slot_no++;
- slot = page_dir_get_nth_slot(page, slot_no);
- }
- }
-
- if (page_cur_is_after_last(&cur)) {
- break;
- }
-
- if (rec_get_next_offs(rec, comp) < FIL_PAGE_DATA
- || rec_get_next_offs(rec, comp) >= UNIV_PAGE_SIZE) {
- fprintf(stderr,
- "InnoDB: Next record offset wrong %lu\n",
- (ulong) rec_get_next_offs(rec, comp));
- goto func_exit;
- }
-
- count++;
- page_cur_move_to_next(&cur);
- own_count++;
- old_rec = rec;
- /* set old_offsets to offsets; recycle offsets */
- {
- ulint* offs = old_offsets;
- old_offsets = offsets;
- offsets = offs;
- }
- }
-
- if (rec_get_n_owned(rec, comp) == 0) {
- fputs("InnoDB: n owned is zero\n", stderr);
- goto func_exit;
- }
-
- if (slot_no != n_slots - 1) {
- fprintf(stderr, "InnoDB: n slots wrong %lu %lu\n",
- (ulong) slot_no, (ulong) (n_slots - 1));
- goto func_exit;
- }
-
- if (page_header_get_field(page, PAGE_N_RECS) + 2 != count + 1) {
- fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n",
- (ulong) page_header_get_field(page, PAGE_N_RECS) + 2,
- (ulong) (count + 1));
- goto func_exit;
- }
-
- if (data_size != page_get_data_size(page)) {
- fprintf(stderr,
- "InnoDB: Summed data size %lu, returned by func %lu\n",
- (ulong) data_size, (ulong) page_get_data_size(page));
- goto func_exit;
- }
-
- /* Check then the free list */
- rec = page_header_get_ptr(page, PAGE_FREE);
-
- while (rec != NULL) {
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- if (!page_rec_validate(rec, offsets)) {
-
- goto func_exit;
- }
-
- count++;
- offs = rec_get_start(rec, offsets) - page;
-
- for (i = 0; i < rec_offs_size(offsets); i++) {
-
- if (buf[offs + i] != 0) {
- fputs("InnoDB: Record overlaps another"
- " in free list\n", stderr);
- goto func_exit;
- }
-
- buf[offs + i] = 1;
- }
-
- rec = page_rec_get_next(rec);
- }
-
- if (page_dir_get_n_heap(page) != count + 1) {
- fprintf(stderr, "InnoDB: N heap is wrong %lu %lu\n",
- (ulong) page_dir_get_n_heap(page),
- (ulong) count + 1);
- goto func_exit;
- }
-
- ret = TRUE;
-
-func_exit:
- mem_heap_free(heap);
-
- if (ret == FALSE) {
-func_exit2:
- fprintf(stderr, "InnoDB: Apparent corruption in page %lu in ",
- (ulong) buf_frame_get_page_no(page));
- dict_index_name_print(stderr, NULL, index);
- putc('\n', stderr);
- buf_page_print(page);
- }
-
- return(ret);
-}
-
-/*******************************************************************
-Looks in the page record list for a record with the given heap number. */
-
-rec_t*
-page_find_rec_with_heap_no(
-/*=======================*/
- /* out: record, NULL if not found */
- page_t* page, /* in: index page */
- ulint heap_no)/* in: heap number */
-{
- page_cur_t cur;
-
- page_cur_set_before_first(page, &cur);
-
- for (;;) {
- if (rec_get_heap_no(cur.rec, page_is_comp(page)) == heap_no) {
-
- return(cur.rec);
- }
-
- if (page_cur_is_after_last(&cur)) {
-
- return(NULL);
- }
-
- page_cur_move_to_next(&cur);
- }
-}
diff --git a/storage/innobase/pars/lexyy.c b/storage/innobase/pars/lexyy.c
deleted file mode 100644
index b65de138573..00000000000
--- a/storage/innobase/pars/lexyy.c
+++ /dev/null
@@ -1,2762 +0,0 @@
-#include "univ.i"
-#line 2 "_flex_tmp.c"
-
-#line 4 "_flex_tmp.c"
-
-#define YY_INT_ALIGNED short int
-
-/* A lexical scanner generated by flex */
-
-#define FLEX_SCANNER
-#define YY_FLEX_MAJOR_VERSION 2
-#define YY_FLEX_MINOR_VERSION 5
-#define YY_FLEX_SUBMINOR_VERSION 31
-#if YY_FLEX_SUBMINOR_VERSION > 0
-#define FLEX_BETA
-#endif
-
-/* First, we deal with platform-specific or compiler-specific issues. */
-
-/* begin standard C headers. */
-#include <stdio.h>
-#include <string.h>
-#include <errno.h>
-#include <stdlib.h>
-
-/* end standard C headers. */
-
-/* flex integer type definitions */
-
-#ifndef FLEXINT_H
-#define FLEXINT_H
-
-/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */
-
-#if defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L
-#include <inttypes.h>
-typedef int8_t flex_int8_t;
-typedef uint8_t flex_uint8_t;
-typedef int16_t flex_int16_t;
-typedef uint16_t flex_uint16_t;
-typedef int32_t flex_int32_t;
-typedef uint32_t flex_uint32_t;
-#else
-typedef signed char flex_int8_t;
-typedef short int flex_int16_t;
-typedef int flex_int32_t;
-typedef unsigned char flex_uint8_t;
-typedef unsigned short int flex_uint16_t;
-typedef unsigned int flex_uint32_t;
-#endif /* ! C99 */
-
-/* Limits of integral types. */
-#ifndef INT8_MIN
-#define INT8_MIN (-128)
-#endif
-#ifndef INT16_MIN
-#define INT16_MIN (-32767-1)
-#endif
-#ifndef INT32_MIN
-#define INT32_MIN (-2147483647-1)
-#endif
-#ifndef INT8_MAX
-#define INT8_MAX (127)
-#endif
-#ifndef INT16_MAX
-#define INT16_MAX (32767)
-#endif
-#ifndef INT32_MAX
-#define INT32_MAX (2147483647)
-#endif
-#ifndef UINT8_MAX
-#define UINT8_MAX (255U)
-#endif
-#ifndef UINT16_MAX
-#define UINT16_MAX (65535U)
-#endif
-#ifndef UINT32_MAX
-#define UINT32_MAX (4294967295U)
-#endif
-
-#endif /* ! FLEXINT_H */
-
-#ifdef __cplusplus
-
-/* The "const" storage-class-modifier is valid. */
-#define YY_USE_CONST
-
-#else /* ! __cplusplus */
-
-#if __STDC__
-
-#define YY_USE_CONST
-
-#endif /* __STDC__ */
-#endif /* ! __cplusplus */
-
-#ifdef YY_USE_CONST
-#define yyconst const
-#else
-#define yyconst
-#endif
-
-/* Returned upon end-of-file. */
-#define YY_NULL 0
-
-/* Promotes a possibly negative, possibly signed char to an unsigned
- * integer for use as an array index. If the signed char is negative,
- * we want to instead treat it as an 8-bit unsigned char, hence the
- * double cast.
- */
-#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c)
-
-/* Enter a start condition. This macro really ought to take a parameter,
- * but we do it the disgusting crufty way forced on us by the ()-less
- * definition of BEGIN.
- */
-#define BEGIN (yy_start) = 1 + 2 *
-
-/* Translate the current start state into a value that can be later handed
- * to BEGIN to return to the state. The YYSTATE alias is for lex
- * compatibility.
- */
-#define YY_START (((yy_start) - 1) / 2)
-#define YYSTATE YY_START
-
-/* Action number for EOF rule of a given start state. */
-#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1)
-
-/* Special action meaning "start processing a new file". */
-#define YY_NEW_FILE yyrestart(yyin )
-
-#define YY_END_OF_BUFFER_CHAR 0
-
-/* Size of default input buffer. */
-#ifndef YY_BUF_SIZE
-#define YY_BUF_SIZE 16384
-#endif
-
-#ifndef YY_TYPEDEF_YY_BUFFER_STATE
-#define YY_TYPEDEF_YY_BUFFER_STATE
-typedef struct yy_buffer_state *YY_BUFFER_STATE;
-#endif
-
-extern int yyleng;
-
-extern FILE *yyin, *yyout;
-
-#define EOB_ACT_CONTINUE_SCAN 0
-#define EOB_ACT_END_OF_FILE 1
-#define EOB_ACT_LAST_MATCH 2
-
- #define YY_LESS_LINENO(n)
-
-/* Return all but the first "n" matched characters back to the input stream. */
-#define yyless(n) \
- do \
- { \
- /* Undo effects of setting up yytext. */ \
- int yyless_macro_arg = (n); \
- YY_LESS_LINENO(yyless_macro_arg);\
- *yy_cp = (yy_hold_char); \
- YY_RESTORE_YY_MORE_OFFSET \
- (yy_c_buf_p) = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \
- YY_DO_BEFORE_ACTION; /* set up yytext again */ \
- } \
- while ( 0 )
-
-#define unput(c) yyunput( c, (yytext_ptr) )
-
-/* The following is because we cannot portably get our hands on size_t
- * (without autoconf's help, which isn't available because we want
- * flex-generated scanners to compile on their own).
- */
-
-#ifndef YY_TYPEDEF_YY_SIZE_T
-#define YY_TYPEDEF_YY_SIZE_T
-typedef unsigned int yy_size_t;
-#endif
-
-#ifndef YY_STRUCT_YY_BUFFER_STATE
-#define YY_STRUCT_YY_BUFFER_STATE
-struct yy_buffer_state
- {
- FILE *yy_input_file;
-
- char *yy_ch_buf; /* input buffer */
- char *yy_buf_pos; /* current position in input buffer */
-
- /* Size of input buffer in bytes, not including room for EOB
- * characters.
- */
- yy_size_t yy_buf_size;
-
- /* Number of characters read into yy_ch_buf, not including EOB
- * characters.
- */
- int yy_n_chars;
-
- /* Whether we "own" the buffer - i.e., we know we created it,
- * and can realloc() it to grow it, and should free() it to
- * delete it.
- */
- int yy_is_our_buffer;
-
- /* Whether this is an "interactive" input source; if so, and
- * if we're using stdio for input, then we want to use getc()
- * instead of fread(), to make sure we stop fetching input after
- * each newline.
- */
- int yy_is_interactive;
-
- /* Whether we're considered to be at the beginning of a line.
- * If so, '^' rules will be active on the next match, otherwise
- * not.
- */
- int yy_at_bol;
-
- int yy_bs_lineno; /**< The line count. */
- int yy_bs_column; /**< The column count. */
-
- /* Whether to try to fill the input buffer when we reach the
- * end of it.
- */
- int yy_fill_buffer;
-
- int yy_buffer_status;
-
-#define YY_BUFFER_NEW 0
-#define YY_BUFFER_NORMAL 1
- /* When an EOF's been seen but there's still some text to process
- * then we mark the buffer as YY_EOF_PENDING, to indicate that we
- * shouldn't try reading from the input source any more. We might
- * still have a bunch of tokens to match, though, because of
- * possible backing-up.
- *
- * When we actually see the EOF, we change the status to "new"
- * (via yyrestart()), so that the user can continue scanning by
- * just pointing yyin at a new input file.
- */
-#define YY_BUFFER_EOF_PENDING 2
-
- };
-#endif /* !YY_STRUCT_YY_BUFFER_STATE */
-
-/* Stack of input buffers. */
-static size_t yy_buffer_stack_top = 0; /**< index of top of stack. */
-static size_t yy_buffer_stack_max = 0; /**< capacity of stack. */
-static YY_BUFFER_STATE * yy_buffer_stack = 0; /**< Stack as an array. */
-
-/* We provide macros for accessing buffer states in case in the
- * future we want to put the buffer states in a more general
- * "scanner state".
- *
- * Returns the top of the stack, or NULL.
- */
-#define YY_CURRENT_BUFFER ( (yy_buffer_stack) \
- ? (yy_buffer_stack)[(yy_buffer_stack_top)] \
- : NULL)
-
-/* Same as previous macro, but useful when we know that the buffer stack is not
- * NULL or when we need an lvalue. For internal use only.
- */
-#define YY_CURRENT_BUFFER_LVALUE (yy_buffer_stack)[(yy_buffer_stack_top)]
-
-/* yy_hold_char holds the character lost when yytext is formed. */
-static char yy_hold_char;
-static int yy_n_chars; /* number of characters read into yy_ch_buf */
-int yyleng;
-
-/* Points to current character in buffer. */
-static char *yy_c_buf_p = (char *) 0;
-static int yy_init = 1; /* whether we need to initialize */
-static int yy_start = 0; /* start state number */
-
-/* Flag which is used to allow yywrap()'s to do buffer switches
- * instead of setting up a fresh yyin. A bit of a hack ...
- */
-static int yy_did_buffer_switch_on_eof;
-
-void yyrestart (FILE *input_file );
-void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer );
-YY_BUFFER_STATE yy_create_buffer (FILE *file,int size );
-void yy_delete_buffer (YY_BUFFER_STATE b );
-void yy_flush_buffer (YY_BUFFER_STATE b );
-void yypush_buffer_state (YY_BUFFER_STATE new_buffer );
-void yypop_buffer_state (void );
-
-static void yyensure_buffer_stack (void );
-static void yy_load_buffer_state (void );
-static void yy_init_buffer (YY_BUFFER_STATE b,FILE *file );
-
-#define YY_FLUSH_BUFFER yy_flush_buffer(YY_CURRENT_BUFFER )
-
-YY_BUFFER_STATE yy_scan_buffer (char *base,yy_size_t size );
-YY_BUFFER_STATE yy_scan_string (yyconst char *yy_str );
-YY_BUFFER_STATE yy_scan_bytes (yyconst char *bytes,int len );
-
-void *yyalloc (yy_size_t );
-void *yyrealloc (void *,yy_size_t );
-void yyfree (void * );
-
-#define yy_new_buffer yy_create_buffer
-
-#define yy_set_interactive(is_interactive) \
- { \
- if ( ! YY_CURRENT_BUFFER ){ \
- yyensure_buffer_stack (); \
- YY_CURRENT_BUFFER_LVALUE = \
- yy_create_buffer(yyin,YY_BUF_SIZE ); \
- } \
- YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \
- }
-
-#define yy_set_bol(at_bol) \
- { \
- if ( ! YY_CURRENT_BUFFER ){\
- yyensure_buffer_stack (); \
- YY_CURRENT_BUFFER_LVALUE = \
- yy_create_buffer(yyin,YY_BUF_SIZE ); \
- } \
- YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \
- }
-
-#define YY_AT_BOL() (YY_CURRENT_BUFFER_LVALUE->yy_at_bol)
-
-/* Begin user sect3 */
-
-#define yywrap(n) 1
-#define YY_SKIP_YYWRAP
-
-typedef unsigned char YY_CHAR;
-
-FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0;
-
-typedef int yy_state_type;
-
-extern int yylineno;
-
-int yylineno = 1;
-
-extern char *yytext;
-#define yytext_ptr yytext
-
-static yy_state_type yy_get_previous_state (void );
-static yy_state_type yy_try_NUL_trans (yy_state_type current_state );
-static int yy_get_next_buffer (void );
-static void yy_fatal_error (yyconst char msg[] );
-
-/* Done after the current pattern has been matched and before the
- * corresponding action - sets up yytext.
- */
-#define YY_DO_BEFORE_ACTION \
- (yytext_ptr) = yy_bp; \
- yyleng = (size_t) (yy_cp - yy_bp); \
- (yy_hold_char) = *yy_cp; \
- *yy_cp = '\0'; \
- (yy_c_buf_p) = yy_cp;
-
-#define YY_NUM_RULES 119
-#define YY_END_OF_BUFFER 120
-/* This struct is not used in this scanner,
- but its presence is necessary. */
-struct yy_trans_info
- {
- flex_int32_t yy_verify;
- flex_int32_t yy_nxt;
- };
-static yyconst flex_int16_t yy_accept[399] =
- { 0,
- 0, 0, 114, 114, 0, 0, 0, 0, 120, 118,
- 117, 117, 8, 118, 109, 5, 98, 104, 107, 105,
- 102, 106, 118, 108, 1, 118, 103, 101, 99, 100,
- 112, 92, 92, 92, 92, 92, 92, 92, 92, 92,
- 92, 92, 92, 92, 92, 92, 92, 92, 92, 92,
- 110, 111, 114, 115, 6, 7, 9, 10, 117, 4,
- 93, 113, 2, 1, 3, 94, 95, 97, 96, 92,
- 92, 92, 92, 92, 92, 44, 92, 92, 92, 92,
- 92, 92, 92, 92, 92, 92, 92, 92, 92, 92,
- 92, 92, 28, 17, 25, 92, 92, 92, 92, 92,
-
- 54, 61, 92, 14, 92, 92, 92, 92, 92, 92,
- 92, 92, 92, 92, 92, 92, 92, 92, 92, 92,
- 92, 92, 114, 115, 115, 116, 6, 7, 9, 10,
- 2, 13, 45, 92, 92, 92, 92, 92, 92, 92,
- 92, 92, 92, 92, 92, 92, 92, 92, 92, 92,
- 92, 27, 92, 92, 92, 41, 92, 92, 92, 92,
- 21, 92, 92, 92, 92, 15, 92, 92, 92, 18,
- 92, 92, 92, 92, 92, 80, 92, 92, 92, 51,
- 92, 12, 92, 36, 92, 92, 92, 92, 92, 92,
- 92, 92, 92, 92, 92, 92, 92, 92, 20, 24,
-
- 92, 92, 92, 92, 92, 92, 92, 92, 92, 92,
- 46, 92, 92, 30, 92, 87, 92, 92, 39, 92,
- 92, 92, 92, 92, 48, 92, 89, 32, 91, 92,
- 11, 64, 92, 92, 92, 42, 92, 92, 92, 92,
- 92, 92, 92, 92, 92, 92, 29, 92, 92, 92,
- 92, 92, 92, 92, 92, 92, 85, 92, 26, 92,
- 66, 92, 92, 92, 37, 92, 92, 92, 92, 92,
- 92, 92, 31, 65, 23, 92, 57, 92, 75, 92,
- 92, 92, 43, 92, 92, 92, 92, 92, 92, 92,
- 92, 90, 92, 92, 56, 92, 92, 92, 92, 92,
-
- 92, 92, 40, 33, 79, 19, 92, 83, 74, 55,
- 92, 63, 92, 52, 92, 92, 92, 47, 92, 76,
- 92, 78, 92, 92, 34, 92, 92, 92, 35, 72,
- 92, 92, 92, 92, 58, 92, 50, 49, 92, 92,
- 53, 62, 92, 92, 92, 22, 92, 92, 73, 81,
- 92, 92, 77, 92, 68, 92, 92, 92, 92, 38,
- 92, 88, 67, 92, 84, 92, 92, 92, 86, 92,
- 59, 92, 16, 92, 70, 69, 92, 92, 82, 92,
- 92, 92, 92, 92, 92, 92, 92, 92, 92, 71,
- 92, 92, 92, 92, 92, 92, 60, 0
-
- } ;
-
-static yyconst flex_int32_t yy_ec[256] =
- { 0,
- 1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 2, 1, 4, 1, 5, 6, 1, 7, 8,
- 9, 10, 11, 12, 13, 14, 15, 16, 16, 16,
- 16, 16, 16, 16, 16, 16, 16, 17, 18, 19,
- 20, 21, 22, 1, 23, 24, 25, 26, 27, 28,
- 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
- 39, 40, 41, 42, 43, 44, 45, 46, 47, 32,
- 1, 1, 1, 1, 48, 1, 32, 32, 32, 32,
-
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 49, 1, 50, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1
- } ;
-
-static yyconst flex_int32_t yy_meta[51] =
- { 0,
- 1, 1, 1, 2, 1, 1, 3, 1, 1, 4,
- 1, 1, 1, 1, 1, 5, 1, 1, 1, 6,
- 1, 1, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 1, 1
- } ;
-
-static yyconst flex_int16_t yy_base[409] =
- { 0,
- 0, 0, 437, 436, 438, 437, 439, 438, 441, 448,
- 49, 51, 448, 0, 448, 448, 448, 448, 448, 448,
- 448, 448, 426, 429, 41, 418, 448, 38, 448, 417,
- 448, 20, 33, 32, 46, 40, 44, 0, 54, 52,
- 399, 48, 60, 395, 65, 67, 81, 27, 411, 75,
- 448, 448, 0, 98, 0, 426, 0, 428, 113, 0,
- 448, 448, 415, 54, 410, 448, 448, 448, 448, 0,
- 403, 68, 399, 391, 389, 0, 402, 80, 84, 397,
- 383, 96, 381, 394, 379, 393, 387, 375, 379, 375,
- 377, 377, 0, 98, 0, 376, 97, 385, 368, 375,
-
- 0, 0, 381, 381, 364, 94, 103, 379, 98, 65,
- 381, 369, 109, 361, 377, 373, 351, 97, 372, 363,
- 115, 356, 0, 137, 138, 448, 0, 388, 0, 390,
- 377, 0, 0, 365, 360, 367, 365, 348, 346, 345,
- 350, 359, 347, 359, 95, 347, 353, 354, 336, 336,
- 123, 0, 334, 350, 351, 0, 338, 347, 344, 122,
- 124, 341, 336, 330, 340, 338, 331, 328, 336, 0,
- 326, 336, 334, 325, 315, 309, 322, 307, 327, 0,
- 313, 0, 311, 0, 325, 316, 313, 131, 309, 316,
- 323, 302, 304, 309, 309, 301, 304, 299, 0, 0,
-
- 311, 295, 305, 312, 292, 291, 305, 294, 307, 287,
- 0, 297, 279, 0, 298, 0, 295, 282, 0, 281,
- 276, 281, 280, 290, 0, 276, 0, 0, 0, 280,
- 0, 0, 276, 273, 287, 0, 272, 272, 270, 286,
- 271, 283, 280, 264, 282, 277, 0, 272, 272, 258,
- 257, 270, 256, 270, 269, 268, 0, 252, 0, 246,
- 0, 265, 249, 248, 0, 262, 252, 247, 246, 258,
- 248, 247, 0, 0, 0, 251, 0, 239, 0, 253,
- 249, 235, 0, 249, 250, 233, 238, 231, 249, 231,
- 228, 0, 229, 226, 0, 231, 243, 230, 237, 227,
-
- 235, 220, 0, 0, 0, 212, 219, 0, 0, 0,
- 216, 0, 230, 0, 231, 218, 217, 0, 213, 0,
- 216, 0, 208, 210, 0, 209, 223, 216, 0, 0,
- 219, 222, 204, 219, 0, 215, 0, 0, 199, 213,
- 0, 0, 197, 196, 201, 0, 210, 195, 0, 0,
- 201, 197, 0, 192, 0, 204, 204, 192, 202, 0,
- 179, 0, 0, 199, 0, 183, 177, 183, 0, 174,
- 0, 193, 0, 192, 0, 0, 183, 187, 0, 174,
- 174, 180, 166, 189, 181, 180, 166, 151, 118, 0,
- 130, 136, 127, 123, 119, 111, 0, 448, 167, 173,
-
- 179, 152, 181, 124, 187, 193, 199, 205
- } ;
-
-static yyconst flex_int16_t yy_def[409] =
- { 0,
- 398, 1, 399, 399, 400, 400, 401, 401, 398, 398,
- 398, 398, 398, 402, 398, 398, 398, 398, 398, 398,
- 398, 398, 398, 398, 398, 403, 398, 398, 398, 398,
- 398, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 398, 398, 405, 406, 407, 398, 408, 398, 398, 402,
- 398, 398, 398, 398, 403, 398, 398, 398, 398, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
-
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 405, 406, 406, 398, 407, 398, 408, 398,
- 398, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
-
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
-
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 404, 404, 404,
- 404, 404, 404, 404, 404, 404, 404, 0, 398, 398,
-
- 398, 398, 398, 398, 398, 398, 398, 398
- } ;
-
-static yyconst flex_int16_t yy_nxt[499] =
- { 0,
- 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
- 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
- 30, 31, 32, 33, 34, 35, 36, 37, 38, 38,
- 39, 38, 38, 40, 41, 42, 43, 44, 38, 45,
- 46, 47, 48, 49, 50, 38, 38, 38, 51, 52,
- 59, 59, 59, 59, 63, 71, 64, 67, 68, 73,
- 72, 77, 118, 74, 119, 78, 75, 63, 79, 64,
- 88, 80, 82, 85, 81, 86, 83, 89, 96, 76,
- 90, 93, 84, 91, 99, 87, 92, 101, 97, 94,
- 100, 107, 133, 110, 95, 102, 111, 103, 179, 104,
-
- 108, 109, 105, 115, 121, 112, 180, 125, 134, 113,
- 116, 122, 126, 114, 59, 59, 139, 117, 141, 142,
- 146, 163, 140, 159, 171, 173, 143, 189, 70, 147,
- 172, 177, 183, 164, 207, 208, 148, 190, 160, 161,
- 174, 193, 178, 184, 175, 194, 398, 125, 222, 214,
- 224, 398, 126, 215, 248, 249, 60, 397, 396, 395,
- 225, 394, 393, 223, 392, 391, 250, 53, 53, 53,
- 53, 53, 53, 55, 55, 55, 55, 55, 55, 57,
- 57, 57, 57, 57, 57, 65, 65, 123, 123, 123,
- 390, 123, 123, 124, 124, 124, 124, 124, 124, 127,
-
- 127, 389, 127, 127, 127, 129, 388, 129, 129, 129,
- 129, 387, 386, 385, 384, 383, 382, 381, 380, 379,
- 378, 377, 376, 375, 374, 373, 372, 371, 370, 369,
- 368, 367, 366, 365, 364, 363, 362, 361, 360, 359,
- 358, 357, 356, 355, 354, 353, 352, 351, 350, 349,
- 348, 347, 346, 345, 344, 343, 342, 341, 340, 339,
- 338, 337, 336, 335, 334, 333, 332, 331, 330, 329,
- 328, 327, 326, 325, 324, 323, 322, 321, 320, 319,
- 318, 317, 316, 315, 314, 313, 312, 311, 310, 309,
- 308, 307, 306, 305, 304, 303, 302, 301, 300, 299,
-
- 298, 297, 296, 295, 294, 293, 292, 291, 290, 289,
- 288, 287, 286, 285, 284, 283, 282, 281, 280, 279,
- 278, 277, 276, 275, 274, 273, 272, 271, 270, 269,
- 268, 267, 266, 265, 264, 263, 262, 261, 260, 259,
- 258, 257, 256, 255, 254, 253, 252, 251, 247, 246,
- 245, 244, 243, 242, 241, 240, 239, 238, 237, 236,
- 235, 234, 233, 232, 231, 230, 229, 228, 227, 226,
- 221, 220, 219, 218, 217, 216, 213, 212, 211, 210,
- 209, 206, 205, 204, 203, 202, 201, 200, 199, 198,
- 197, 196, 131, 130, 128, 195, 192, 191, 188, 187,
-
- 186, 185, 182, 181, 176, 170, 169, 168, 167, 166,
- 165, 162, 158, 157, 156, 155, 154, 153, 152, 151,
- 150, 149, 145, 144, 138, 137, 136, 135, 132, 398,
- 131, 130, 128, 120, 106, 98, 69, 66, 62, 61,
- 398, 58, 58, 56, 56, 54, 54, 9, 398, 398,
- 398, 398, 398, 398, 398, 398, 398, 398, 398, 398,
- 398, 398, 398, 398, 398, 398, 398, 398, 398, 398,
- 398, 398, 398, 398, 398, 398, 398, 398, 398, 398,
- 398, 398, 398, 398, 398, 398, 398, 398, 398, 398,
- 398, 398, 398, 398, 398, 398, 398, 398
-
- } ;
-
-static yyconst flex_int16_t yy_chk[499] =
- { 0,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 11, 11, 12, 12, 25, 32, 25, 28, 28, 33,
- 32, 34, 48, 33, 48, 34, 33, 64, 34, 64,
- 37, 34, 35, 36, 34, 36, 35, 37, 40, 33,
- 37, 39, 35, 37, 42, 36, 37, 43, 40, 39,
- 42, 45, 72, 46, 39, 43, 46, 43, 110, 43,
-
- 45, 45, 43, 47, 50, 46, 110, 54, 72, 46,
- 47, 50, 54, 46, 59, 59, 78, 47, 79, 79,
- 82, 97, 78, 94, 106, 107, 79, 118, 404, 82,
- 106, 109, 113, 97, 145, 145, 82, 118, 94, 94,
- 107, 121, 109, 113, 107, 121, 124, 125, 160, 151,
- 161, 124, 125, 151, 188, 188, 402, 396, 395, 394,
- 161, 393, 392, 160, 391, 389, 188, 399, 399, 399,
- 399, 399, 399, 400, 400, 400, 400, 400, 400, 401,
- 401, 401, 401, 401, 401, 403, 403, 405, 405, 405,
- 388, 405, 405, 406, 406, 406, 406, 406, 406, 407,
-
- 407, 387, 407, 407, 407, 408, 386, 408, 408, 408,
- 408, 385, 384, 383, 382, 381, 380, 378, 377, 374,
- 372, 370, 368, 367, 366, 364, 361, 359, 358, 357,
- 356, 354, 352, 351, 348, 347, 345, 344, 343, 340,
- 339, 336, 334, 333, 332, 331, 328, 327, 326, 324,
- 323, 321, 319, 317, 316, 315, 313, 311, 307, 306,
- 302, 301, 300, 299, 298, 297, 296, 294, 293, 291,
- 290, 289, 288, 287, 286, 285, 284, 282, 281, 280,
- 278, 276, 272, 271, 270, 269, 268, 267, 266, 264,
- 263, 262, 260, 258, 256, 255, 254, 253, 252, 251,
-
- 250, 249, 248, 246, 245, 244, 243, 242, 241, 240,
- 239, 238, 237, 235, 234, 233, 230, 226, 224, 223,
- 222, 221, 220, 218, 217, 215, 213, 212, 210, 209,
- 208, 207, 206, 205, 204, 203, 202, 201, 198, 197,
- 196, 195, 194, 193, 192, 191, 190, 189, 187, 186,
- 185, 183, 181, 179, 178, 177, 176, 175, 174, 173,
- 172, 171, 169, 168, 167, 166, 165, 164, 163, 162,
- 159, 158, 157, 155, 154, 153, 150, 149, 148, 147,
- 146, 144, 143, 142, 141, 140, 139, 138, 137, 136,
- 135, 134, 131, 130, 128, 122, 120, 119, 117, 116,
-
- 115, 114, 112, 111, 108, 105, 104, 103, 100, 99,
- 98, 96, 92, 91, 90, 89, 88, 87, 86, 85,
- 84, 83, 81, 80, 77, 75, 74, 73, 71, 65,
- 63, 58, 56, 49, 44, 41, 30, 26, 24, 23,
- 9, 8, 7, 6, 5, 4, 3, 398, 398, 398,
- 398, 398, 398, 398, 398, 398, 398, 398, 398, 398,
- 398, 398, 398, 398, 398, 398, 398, 398, 398, 398,
- 398, 398, 398, 398, 398, 398, 398, 398, 398, 398,
- 398, 398, 398, 398, 398, 398, 398, 398, 398, 398,
- 398, 398, 398, 398, 398, 398, 398, 398
-
- } ;
-
-static yy_state_type yy_last_accepting_state;
-static char *yy_last_accepting_cpos;
-
-extern int yy_flex_debug;
-int yy_flex_debug = 0;
-
-/* The intent behind this definition is that it'll catch
- * any uses of REJECT which flex missed.
- */
-#define REJECT reject_used_but_not_detected
-#define yymore() yymore_used_but_not_detected
-#define YY_MORE_ADJ 0
-#define YY_RESTORE_YY_MORE_OFFSET
-char *yytext;
-#line 1 "pars0lex.l"
-/******************************************************
-SQL parser lexical analyzer: input file for the GNU Flex lexer generator
-
-(c) 1997 Innobase Oy
-
-Created 12/14/1997 Heikki Tuuri
-Published under the GPL version 2
-
-The InnoDB parser is frozen because MySQL takes care of SQL parsing.
-Therefore we normally keep the InnoDB parser C files as they are, and do
-not automatically generate them from pars0grm.y and pars0lex.l.
-
-How to make the InnoDB parser and lexer C files:
-
-1. Run ./make_flex.sh to generate lexer files.
-
-2. Run ./make_bison.sh to generate parser files.
-
-These instructions seem to work at least with bison-1.875d and flex-2.5.31 on
-Linux.
-*******************************************************/
-#define YY_NO_INPUT 1
-#define YY_NO_UNISTD_H 1
-#line 38 "pars0lex.l"
-#define YYSTYPE que_node_t*
-
-#include "univ.i"
-#include "pars0pars.h"
-#include "pars0grm.h"
-#include "pars0sym.h"
-#include "mem0mem.h"
-#include "os0proc.h"
-
-#define malloc(A) ut_malloc(A)
-#define free(A) ut_free(A)
-#define realloc(P, A) ut_realloc(P, A)
-#define exit(A) ut_error
-
-#define YY_INPUT(buf, result, max_size) pars_get_lex_chars(buf, &result, max_size)
-
-/* String buffer for removing quotes */
-static ulint stringbuf_len_alloc = 0; /* Allocated length */
-static ulint stringbuf_len = 0; /* Current length */
-static char* stringbuf; /* Start of buffer */
-/* Appends a string to the buffer. */
-static
-void
-string_append(
-/*==========*/
- const char* str, /* in: string to be appended */
- ulint len) /* in: length of the string */
-{
- if (stringbuf == NULL) {
- stringbuf = malloc(1);
- stringbuf_len_alloc = 1;
- }
-
- if (stringbuf_len + len > stringbuf_len_alloc) {
- while (stringbuf_len + len > stringbuf_len_alloc) {
- stringbuf_len_alloc <<= 1;
- }
- stringbuf = realloc(stringbuf, stringbuf_len_alloc);
- }
-
- memcpy(stringbuf + stringbuf_len, str, len);
- stringbuf_len += len;
-}
-
-
-
-
-#line 759 "_flex_tmp.c"
-
-#define INITIAL 0
-#define comment 1
-#define quoted 2
-#define id 3
-
-#ifndef YY_NO_UNISTD_H
-/* Special case for "unistd.h", since it is non-ANSI. We include it way
- * down here because we want the user's section 1 to have been scanned first.
- * The user has a chance to override it with an option.
- */
-#include <unistd.h>
-#endif
-
-#ifndef YY_EXTRA_TYPE
-#define YY_EXTRA_TYPE void *
-#endif
-
-/* Macros after this point can all be overridden by user definitions in
- * section 1.
- */
-
-#ifndef YY_SKIP_YYWRAP
-#ifdef __cplusplus
-extern "C" int yywrap (void );
-#else
-extern int yywrap (void );
-#endif
-#endif
-
-#ifndef yytext_ptr
-static void yy_flex_strncpy (char *,yyconst char *,int );
-#endif
-
-#ifdef YY_NEED_STRLEN
-static int yy_flex_strlen (yyconst char * );
-#endif
-
-#ifndef YY_NO_INPUT
-
-#ifdef __cplusplus
-static int yyinput (void );
-#else
-static int input (void );
-#endif
-
-#endif
-
-/* Amount of stuff to slurp up with each read. */
-#ifndef YY_READ_BUF_SIZE
-#define YY_READ_BUF_SIZE 8192
-#endif
-
-/* Copy whatever the last rule matched to the standard output. */
-#ifndef ECHO
-/* This used to be an fputs(), but since the string might contain NUL's,
- * we now use fwrite().
- */
-#define ECHO (void) fwrite( yytext, yyleng, 1, yyout )
-#endif
-
-/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL,
- * is returned in "result".
- */
-#ifndef YY_INPUT
-#define YY_INPUT(buf,result,max_size) \
- if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \
- { \
- int c = '*'; \
- size_t n; \
- for ( n = 0; n < max_size && \
- (c = getc( yyin )) != EOF && c != '\n'; ++n ) \
- buf[n] = (char) c; \
- if ( c == '\n' ) \
- buf[n++] = (char) c; \
- if ( c == EOF && ferror( yyin ) ) \
- YY_FATAL_ERROR( "input in flex scanner failed" ); \
- result = n; \
- } \
- else \
- { \
- errno=0; \
- while ( (result = fread(buf, 1, max_size, yyin))==0 && ferror(yyin)) \
- { \
- if( errno != EINTR) \
- { \
- YY_FATAL_ERROR( "input in flex scanner failed" ); \
- break; \
- } \
- errno=0; \
- clearerr(yyin); \
- } \
- }\
-\
-
-#endif
-
-/* No semi-colon after return; correct usage is to write "yyterminate();" -
- * we don't want an extra ';' after the "return" because that will cause
- * some compilers to complain about unreachable statements.
- */
-#ifndef yyterminate
-#define yyterminate() return YY_NULL
-#endif
-
-/* Number of entries by which start-condition stack grows. */
-#ifndef YY_START_STACK_INCR
-#define YY_START_STACK_INCR 25
-#endif
-
-/* Report a fatal error. */
-#ifndef YY_FATAL_ERROR
-#define YY_FATAL_ERROR(msg) yy_fatal_error( msg )
-#endif
-
-/* end tables serialization structures and prototypes */
-
-/* Default declaration of generated scanner - a define so the user can
- * easily add parameters.
- */
-#ifndef YY_DECL
-#define YY_DECL_IS_OURS 1
-
-extern int yylex (void);
-
-#define YY_DECL int yylex (void)
-#endif /* !YY_DECL */
-
-/* Code executed at the beginning of each rule, after yytext and yyleng
- * have been set up.
- */
-#ifndef YY_USER_ACTION
-#define YY_USER_ACTION
-#endif
-
-/* Code executed at the end of each rule. */
-#ifndef YY_BREAK
-#define YY_BREAK break;
-#endif
-
-#define YY_RULE_SETUP \
- YY_USER_ACTION
-
-/** The main scanner function which does all the work.
- */
-YY_DECL
-{
- register yy_state_type yy_current_state;
- register char *yy_cp, *yy_bp;
- register int yy_act;
-
-#line 92 "pars0lex.l"
-
-
-#line 914 "_flex_tmp.c"
-
- if ( (yy_init) )
- {
- (yy_init) = 0;
-
-#ifdef YY_USER_INIT
- YY_USER_INIT;
-#endif
-
- if ( ! (yy_start) )
- (yy_start) = 1; /* first start state */
-
- if ( ! yyin )
- yyin = stdin;
-
- if ( ! yyout )
- yyout = stdout;
-
- if ( ! YY_CURRENT_BUFFER ) {
- yyensure_buffer_stack ();
- YY_CURRENT_BUFFER_LVALUE =
- yy_create_buffer(yyin,YY_BUF_SIZE );
- }
-
- yy_load_buffer_state( );
- }
-
- while ( 1 ) /* loops until end-of-file is reached */
- {
- yy_cp = (yy_c_buf_p);
-
- /* Support of yytext. */
- *yy_cp = (yy_hold_char);
-
- /* yy_bp points to the position in yy_ch_buf of the start of
- * the current run.
- */
- yy_bp = yy_cp;
-
- yy_current_state = (yy_start);
-yy_match:
- do
- {
- register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)];
- if ( yy_accept[yy_current_state] )
- {
- (yy_last_accepting_state) = yy_current_state;
- (yy_last_accepting_cpos) = yy_cp;
- }
- while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
- {
- yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 399 )
- yy_c = yy_meta[(unsigned int) yy_c];
- }
- yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
- ++yy_cp;
- }
- while ( yy_current_state != 398 );
- yy_cp = (yy_last_accepting_cpos);
- yy_current_state = (yy_last_accepting_state);
-
-yy_find_action:
- yy_act = yy_accept[yy_current_state];
-
- YY_DO_BEFORE_ACTION;
-
-do_action: /* This label is used only to access EOF actions. */
-
- switch ( yy_act )
- { /* beginning of action switch */
- case 0: /* must back up */
- /* undo the effects of YY_DO_BEFORE_ACTION */
- *yy_cp = (yy_hold_char);
- yy_cp = (yy_last_accepting_cpos);
- yy_current_state = (yy_last_accepting_state);
- goto yy_find_action;
-
-case 1:
-YY_RULE_SETUP
-#line 94 "pars0lex.l"
-{
- yylval = sym_tab_add_int_lit(pars_sym_tab_global,
- atoi(yytext));
- return(PARS_INT_LIT);
-}
- YY_BREAK
-case 2:
-YY_RULE_SETUP
-#line 100 "pars0lex.l"
-{
- ut_error; /* not implemented */
-
- return(PARS_FLOAT_LIT);
-}
- YY_BREAK
-case 3:
-YY_RULE_SETUP
-#line 106 "pars0lex.l"
-{
- ulint type;
-
- yylval = sym_tab_add_bound_lit(pars_sym_tab_global,
- yytext + 1, &type);
-
- return((int) type);
-}
- YY_BREAK
-case 4:
-YY_RULE_SETUP
-#line 115 "pars0lex.l"
-{
- yylval = sym_tab_add_bound_id(pars_sym_tab_global,
- yytext + 1);
-
- return(PARS_ID_TOKEN);
-}
- YY_BREAK
-case 5:
-YY_RULE_SETUP
-#line 122 "pars0lex.l"
-{
-/* Quoted character string literals are handled in an explicit
-start state 'quoted'. This state is entered and the buffer for
-the scanned string is emptied upon encountering a starting quote.
-
-In the state 'quoted', only two actions are possible (defined below). */
- BEGIN(quoted);
- stringbuf_len = 0;
-}
- YY_BREAK
-case 6:
-/* rule 6 can match eol */
-YY_RULE_SETUP
-#line 131 "pars0lex.l"
-{
- /* Got a sequence of characters other than "'":
- append to string buffer */
- string_append(yytext, yyleng);
-}
- YY_BREAK
-case 7:
-YY_RULE_SETUP
-#line 136 "pars0lex.l"
-{
- /* Got a sequence of "'" characters:
- append half of them to string buffer,
- as "''" represents a single "'".
- We apply truncating division,
- so that "'''" will result in "'". */
-
- string_append(yytext, yyleng / 2);
-
- /* If we got an odd number of quotes, then the
- last quote we got is the terminating quote.
- At the end of the string, we return to the
- initial start state and report the scanned
- string literal. */
-
- if (yyleng % 2) {
- BEGIN(INITIAL);
- yylval = sym_tab_add_str_lit(
- pars_sym_tab_global,
- (byte*) stringbuf, stringbuf_len);
- return(PARS_STR_LIT);
- }
-}
- YY_BREAK
-case 8:
-YY_RULE_SETUP
-#line 160 "pars0lex.l"
-{
-/* Quoted identifiers are handled in an explicit start state 'id'.
-This state is entered and the buffer for the scanned string is emptied
-upon encountering a starting quote.
-
-In the state 'id', only two actions are possible (defined below). */
- BEGIN(id);
- stringbuf_len = 0;
-}
- YY_BREAK
-case 9:
-/* rule 9 can match eol */
-YY_RULE_SETUP
-#line 169 "pars0lex.l"
-{
- /* Got a sequence of characters other than '"':
- append to string buffer */
- string_append(yytext, yyleng);
-}
- YY_BREAK
-case 10:
-YY_RULE_SETUP
-#line 174 "pars0lex.l"
-{
- /* Got a sequence of '"' characters:
- append half of them to string buffer,
- as '""' represents a single '"'.
- We apply truncating division,
- so that '"""' will result in '"'. */
-
- string_append(yytext, yyleng / 2);
-
- /* If we got an odd number of quotes, then the
- last quote we got is the terminating quote.
- At the end of the string, we return to the
- initial start state and report the scanned
- identifier. */
-
- if (yyleng % 2) {
- BEGIN(INITIAL);
- yylval = sym_tab_add_id(
- pars_sym_tab_global,
- (byte*) stringbuf, stringbuf_len);
-
- return(PARS_ID_TOKEN);
- }
-}
- YY_BREAK
-case 11:
-YY_RULE_SETUP
-#line 199 "pars0lex.l"
-{
- yylval = sym_tab_add_null_lit(pars_sym_tab_global);
-
- return(PARS_NULL_LIT);
-}
- YY_BREAK
-case 12:
-YY_RULE_SETUP
-#line 205 "pars0lex.l"
-{
- /* Implicit cursor name */
- yylval = sym_tab_add_str_lit(pars_sym_tab_global,
- (byte*) yytext, yyleng);
- return(PARS_SQL_TOKEN);
-}
- YY_BREAK
-case 13:
-YY_RULE_SETUP
-#line 212 "pars0lex.l"
-{
- return(PARS_AND_TOKEN);
-}
- YY_BREAK
-case 14:
-YY_RULE_SETUP
-#line 216 "pars0lex.l"
-{
- return(PARS_OR_TOKEN);
-}
- YY_BREAK
-case 15:
-YY_RULE_SETUP
-#line 220 "pars0lex.l"
-{
- return(PARS_NOT_TOKEN);
-}
- YY_BREAK
-case 16:
-YY_RULE_SETUP
-#line 224 "pars0lex.l"
-{
- return(PARS_PROCEDURE_TOKEN);
-}
- YY_BREAK
-case 17:
-YY_RULE_SETUP
-#line 228 "pars0lex.l"
-{
- return(PARS_IN_TOKEN);
-}
- YY_BREAK
-case 18:
-YY_RULE_SETUP
-#line 232 "pars0lex.l"
-{
- return(PARS_OUT_TOKEN);
-}
- YY_BREAK
-case 19:
-YY_RULE_SETUP
-#line 236 "pars0lex.l"
-{
- return(PARS_BINARY_TOKEN);
-}
- YY_BREAK
-case 20:
-YY_RULE_SETUP
-#line 240 "pars0lex.l"
-{
- return(PARS_BLOB_TOKEN);
-}
- YY_BREAK
-case 21:
-YY_RULE_SETUP
-#line 244 "pars0lex.l"
-{
- return(PARS_INT_TOKEN);
-}
- YY_BREAK
-case 22:
-YY_RULE_SETUP
-#line 248 "pars0lex.l"
-{
- return(PARS_INT_TOKEN);
-}
- YY_BREAK
-case 23:
-YY_RULE_SETUP
-#line 252 "pars0lex.l"
-{
- return(PARS_FLOAT_TOKEN);
-}
- YY_BREAK
-case 24:
-YY_RULE_SETUP
-#line 256 "pars0lex.l"
-{
- return(PARS_CHAR_TOKEN);
-}
- YY_BREAK
-case 25:
-YY_RULE_SETUP
-#line 260 "pars0lex.l"
-{
- return(PARS_IS_TOKEN);
-}
- YY_BREAK
-case 26:
-YY_RULE_SETUP
-#line 264 "pars0lex.l"
-{
- return(PARS_BEGIN_TOKEN);
-}
- YY_BREAK
-case 27:
-YY_RULE_SETUP
-#line 268 "pars0lex.l"
-{
- return(PARS_END_TOKEN);
-}
- YY_BREAK
-case 28:
-YY_RULE_SETUP
-#line 272 "pars0lex.l"
-{
- return(PARS_IF_TOKEN);
-}
- YY_BREAK
-case 29:
-YY_RULE_SETUP
-#line 276 "pars0lex.l"
-{
- return(PARS_THEN_TOKEN);
-}
- YY_BREAK
-case 30:
-YY_RULE_SETUP
-#line 280 "pars0lex.l"
-{
- return(PARS_ELSE_TOKEN);
-}
- YY_BREAK
-case 31:
-YY_RULE_SETUP
-#line 284 "pars0lex.l"
-{
- return(PARS_ELSIF_TOKEN);
-}
- YY_BREAK
-case 32:
-YY_RULE_SETUP
-#line 288 "pars0lex.l"
-{
- return(PARS_LOOP_TOKEN);
-}
- YY_BREAK
-case 33:
-YY_RULE_SETUP
-#line 292 "pars0lex.l"
-{
- return(PARS_WHILE_TOKEN);
-}
- YY_BREAK
-case 34:
-YY_RULE_SETUP
-#line 296 "pars0lex.l"
-{
- return(PARS_RETURN_TOKEN);
-}
- YY_BREAK
-case 35:
-YY_RULE_SETUP
-#line 300 "pars0lex.l"
-{
- return(PARS_SELECT_TOKEN);
-}
- YY_BREAK
-case 36:
-YY_RULE_SETUP
-#line 304 "pars0lex.l"
-{
- return(PARS_SUM_TOKEN);
-}
- YY_BREAK
-case 37:
-YY_RULE_SETUP
-#line 308 "pars0lex.l"
-{
- return(PARS_COUNT_TOKEN);
-}
- YY_BREAK
-case 38:
-YY_RULE_SETUP
-#line 312 "pars0lex.l"
-{
- return(PARS_DISTINCT_TOKEN);
-}
- YY_BREAK
-case 39:
-YY_RULE_SETUP
-#line 316 "pars0lex.l"
-{
- return(PARS_FROM_TOKEN);
-}
- YY_BREAK
-case 40:
-YY_RULE_SETUP
-#line 320 "pars0lex.l"
-{
- return(PARS_WHERE_TOKEN);
-}
- YY_BREAK
-case 41:
-YY_RULE_SETUP
-#line 324 "pars0lex.l"
-{
- return(PARS_FOR_TOKEN);
-}
- YY_BREAK
-case 42:
-YY_RULE_SETUP
-#line 328 "pars0lex.l"
-{
- return(PARS_READ_TOKEN);
-}
- YY_BREAK
-case 43:
-YY_RULE_SETUP
-#line 332 "pars0lex.l"
-{
- return(PARS_ORDER_TOKEN);
-}
- YY_BREAK
-case 44:
-YY_RULE_SETUP
-#line 336 "pars0lex.l"
-{
- return(PARS_BY_TOKEN);
-}
- YY_BREAK
-case 45:
-YY_RULE_SETUP
-#line 340 "pars0lex.l"
-{
- return(PARS_ASC_TOKEN);
-}
- YY_BREAK
-case 46:
-YY_RULE_SETUP
-#line 344 "pars0lex.l"
-{
- return(PARS_DESC_TOKEN);
-}
- YY_BREAK
-case 47:
-YY_RULE_SETUP
-#line 348 "pars0lex.l"
-{
- return(PARS_INSERT_TOKEN);
-}
- YY_BREAK
-case 48:
-YY_RULE_SETUP
-#line 352 "pars0lex.l"
-{
- return(PARS_INTO_TOKEN);
-}
- YY_BREAK
-case 49:
-YY_RULE_SETUP
-#line 356 "pars0lex.l"
-{
- return(PARS_VALUES_TOKEN);
-}
- YY_BREAK
-case 50:
-YY_RULE_SETUP
-#line 360 "pars0lex.l"
-{
- return(PARS_UPDATE_TOKEN);
-}
- YY_BREAK
-case 51:
-YY_RULE_SETUP
-#line 364 "pars0lex.l"
-{
- return(PARS_SET_TOKEN);
-}
- YY_BREAK
-case 52:
-YY_RULE_SETUP
-#line 368 "pars0lex.l"
-{
- return(PARS_DELETE_TOKEN);
-}
- YY_BREAK
-case 53:
-YY_RULE_SETUP
-#line 372 "pars0lex.l"
-{
- return(PARS_CURRENT_TOKEN);
-}
- YY_BREAK
-case 54:
-YY_RULE_SETUP
-#line 376 "pars0lex.l"
-{
- return(PARS_OF_TOKEN);
-}
- YY_BREAK
-case 55:
-YY_RULE_SETUP
-#line 380 "pars0lex.l"
-{
- return(PARS_CREATE_TOKEN);
-}
- YY_BREAK
-case 56:
-YY_RULE_SETUP
-#line 384 "pars0lex.l"
-{
- return(PARS_TABLE_TOKEN);
-}
- YY_BREAK
-case 57:
-YY_RULE_SETUP
-#line 388 "pars0lex.l"
-{
- return(PARS_INDEX_TOKEN);
-}
- YY_BREAK
-case 58:
-YY_RULE_SETUP
-#line 392 "pars0lex.l"
-{
- return(PARS_UNIQUE_TOKEN);
-}
- YY_BREAK
-case 59:
-YY_RULE_SETUP
-#line 396 "pars0lex.l"
-{
- return(PARS_CLUSTERED_TOKEN);
-}
- YY_BREAK
-case 60:
-YY_RULE_SETUP
-#line 400 "pars0lex.l"
-{
- return(PARS_DOES_NOT_FIT_IN_MEM_TOKEN);
-}
- YY_BREAK
-case 61:
-YY_RULE_SETUP
-#line 404 "pars0lex.l"
-{
- return(PARS_ON_TOKEN);
-}
- YY_BREAK
-case 62:
-YY_RULE_SETUP
-#line 408 "pars0lex.l"
-{
- return(PARS_DECLARE_TOKEN);
-}
- YY_BREAK
-case 63:
-YY_RULE_SETUP
-#line 412 "pars0lex.l"
-{
- return(PARS_CURSOR_TOKEN);
-}
- YY_BREAK
-case 64:
-YY_RULE_SETUP
-#line 416 "pars0lex.l"
-{
- return(PARS_OPEN_TOKEN);
-}
- YY_BREAK
-case 65:
-YY_RULE_SETUP
-#line 420 "pars0lex.l"
-{
- return(PARS_FETCH_TOKEN);
-}
- YY_BREAK
-case 66:
-YY_RULE_SETUP
-#line 424 "pars0lex.l"
-{
- return(PARS_CLOSE_TOKEN);
-}
- YY_BREAK
-case 67:
-YY_RULE_SETUP
-#line 428 "pars0lex.l"
-{
- return(PARS_NOTFOUND_TOKEN);
-}
- YY_BREAK
-case 68:
-YY_RULE_SETUP
-#line 432 "pars0lex.l"
-{
- return(PARS_TO_CHAR_TOKEN);
-}
- YY_BREAK
-case 69:
-YY_RULE_SETUP
-#line 436 "pars0lex.l"
-{
- return(PARS_TO_NUMBER_TOKEN);
-}
- YY_BREAK
-case 70:
-YY_RULE_SETUP
-#line 440 "pars0lex.l"
-{
- return(PARS_TO_BINARY_TOKEN);
-}
- YY_BREAK
-case 71:
-YY_RULE_SETUP
-#line 444 "pars0lex.l"
-{
- return(PARS_BINARY_TO_NUMBER_TOKEN);
-}
- YY_BREAK
-case 72:
-YY_RULE_SETUP
-#line 448 "pars0lex.l"
-{
- return(PARS_SUBSTR_TOKEN);
-}
- YY_BREAK
-case 73:
-YY_RULE_SETUP
-#line 452 "pars0lex.l"
-{
- return(PARS_REPLSTR_TOKEN);
-}
- YY_BREAK
-case 74:
-YY_RULE_SETUP
-#line 456 "pars0lex.l"
-{
- return(PARS_CONCAT_TOKEN);
-}
- YY_BREAK
-case 75:
-YY_RULE_SETUP
-#line 460 "pars0lex.l"
-{
- return(PARS_INSTR_TOKEN);
-}
- YY_BREAK
-case 76:
-YY_RULE_SETUP
-#line 464 "pars0lex.l"
-{
- return(PARS_LENGTH_TOKEN);
-}
- YY_BREAK
-case 77:
-YY_RULE_SETUP
-#line 468 "pars0lex.l"
-{
- return(PARS_SYSDATE_TOKEN);
-}
- YY_BREAK
-case 78:
-YY_RULE_SETUP
-#line 472 "pars0lex.l"
-{
- return(PARS_PRINTF_TOKEN);
-}
- YY_BREAK
-case 79:
-YY_RULE_SETUP
-#line 476 "pars0lex.l"
-{
- return(PARS_ASSERT_TOKEN);
-}
- YY_BREAK
-case 80:
-YY_RULE_SETUP
-#line 480 "pars0lex.l"
-{
- return(PARS_RND_TOKEN);
-}
- YY_BREAK
-case 81:
-YY_RULE_SETUP
-#line 484 "pars0lex.l"
-{
- return(PARS_RND_STR_TOKEN);
-}
- YY_BREAK
-case 82:
-YY_RULE_SETUP
-#line 488 "pars0lex.l"
-{
- return(PARS_ROW_PRINTF_TOKEN);
-}
- YY_BREAK
-case 83:
-YY_RULE_SETUP
-#line 492 "pars0lex.l"
-{
- return(PARS_COMMIT_TOKEN);
-}
- YY_BREAK
-case 84:
-YY_RULE_SETUP
-#line 496 "pars0lex.l"
-{
- return(PARS_ROLLBACK_TOKEN);
-}
- YY_BREAK
-case 85:
-YY_RULE_SETUP
-#line 500 "pars0lex.l"
-{
- return(PARS_WORK_TOKEN);
-}
- YY_BREAK
-case 86:
-YY_RULE_SETUP
-#line 504 "pars0lex.l"
-{
- return(PARS_UNSIGNED_TOKEN);
-}
- YY_BREAK
-case 87:
-YY_RULE_SETUP
-#line 508 "pars0lex.l"
-{
- return(PARS_EXIT_TOKEN);
-}
- YY_BREAK
-case 88:
-YY_RULE_SETUP
-#line 512 "pars0lex.l"
-{
- return(PARS_FUNCTION_TOKEN);
-}
- YY_BREAK
-case 89:
-YY_RULE_SETUP
-#line 516 "pars0lex.l"
-{
- return(PARS_LOCK_TOKEN);
-}
- YY_BREAK
-case 90:
-YY_RULE_SETUP
-#line 520 "pars0lex.l"
-{
- return(PARS_SHARE_TOKEN);
-}
- YY_BREAK
-case 91:
-YY_RULE_SETUP
-#line 524 "pars0lex.l"
-{
- return(PARS_MODE_TOKEN);
-}
- YY_BREAK
-case 92:
-YY_RULE_SETUP
-#line 528 "pars0lex.l"
-{
- yylval = sym_tab_add_id(pars_sym_tab_global,
- (byte*)yytext,
- ut_strlen(yytext));
- return(PARS_ID_TOKEN);
-}
- YY_BREAK
-case 93:
-YY_RULE_SETUP
-#line 535 "pars0lex.l"
-{
- return(PARS_DDOT_TOKEN);
-}
- YY_BREAK
-case 94:
-YY_RULE_SETUP
-#line 539 "pars0lex.l"
-{
- return(PARS_ASSIGN_TOKEN);
-}
- YY_BREAK
-case 95:
-YY_RULE_SETUP
-#line 543 "pars0lex.l"
-{
- return(PARS_LE_TOKEN);
-}
- YY_BREAK
-case 96:
-YY_RULE_SETUP
-#line 547 "pars0lex.l"
-{
- return(PARS_GE_TOKEN);
-}
- YY_BREAK
-case 97:
-YY_RULE_SETUP
-#line 551 "pars0lex.l"
-{
- return(PARS_NE_TOKEN);
-}
- YY_BREAK
-case 98:
-YY_RULE_SETUP
-#line 555 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 99:
-YY_RULE_SETUP
-#line 560 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 100:
-YY_RULE_SETUP
-#line 565 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 101:
-YY_RULE_SETUP
-#line 570 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 102:
-YY_RULE_SETUP
-#line 575 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 103:
-YY_RULE_SETUP
-#line 580 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 104:
-YY_RULE_SETUP
-#line 585 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 105:
-YY_RULE_SETUP
-#line 590 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 106:
-YY_RULE_SETUP
-#line 595 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 107:
-YY_RULE_SETUP
-#line 600 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 108:
-YY_RULE_SETUP
-#line 605 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 109:
-YY_RULE_SETUP
-#line 610 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 110:
-YY_RULE_SETUP
-#line 615 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 111:
-YY_RULE_SETUP
-#line 620 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 112:
-YY_RULE_SETUP
-#line 625 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 113:
-YY_RULE_SETUP
-#line 630 "pars0lex.l"
-BEGIN(comment); /* eat up comment */
- YY_BREAK
-case 114:
-/* rule 114 can match eol */
-YY_RULE_SETUP
-#line 632 "pars0lex.l"
-
- YY_BREAK
-case 115:
-/* rule 115 can match eol */
-YY_RULE_SETUP
-#line 633 "pars0lex.l"
-
- YY_BREAK
-case 116:
-YY_RULE_SETUP
-#line 634 "pars0lex.l"
-BEGIN(INITIAL);
- YY_BREAK
-case 117:
-/* rule 117 can match eol */
-YY_RULE_SETUP
-#line 636 "pars0lex.l"
-/* eat up whitespace */
- YY_BREAK
-case 118:
-YY_RULE_SETUP
-#line 639 "pars0lex.l"
-{
- fprintf(stderr,"Unrecognized character: %02x\n",
- *yytext);
-
- ut_error;
-
- return(0);
-}
- YY_BREAK
-case 119:
-YY_RULE_SETUP
-#line 648 "pars0lex.l"
-YY_FATAL_ERROR( "flex scanner jammed" );
- YY_BREAK
-#line 1916 "_flex_tmp.c"
-case YY_STATE_EOF(INITIAL):
-case YY_STATE_EOF(comment):
-case YY_STATE_EOF(quoted):
-case YY_STATE_EOF(id):
- yyterminate();
-
- case YY_END_OF_BUFFER:
- {
- /* Amount of text matched not including the EOB char. */
- int yy_amount_of_matched_text = (int) (yy_cp - (yytext_ptr)) - 1;
-
- /* Undo the effects of YY_DO_BEFORE_ACTION. */
- *yy_cp = (yy_hold_char);
- YY_RESTORE_YY_MORE_OFFSET
-
- if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_NEW )
- {
- /* We're scanning a new file or input source. It's
- * possible that this happened because the user
- * just pointed yyin at a new source and called
- * yylex(). If so, then we have to assure
- * consistency between YY_CURRENT_BUFFER and our
- * globals. Here is the right place to do so, because
- * this is the first action (other than possibly a
- * back-up) that will match for the new input source.
- */
- (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
- YY_CURRENT_BUFFER_LVALUE->yy_input_file = yyin;
- YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_NORMAL;
- }
-
- /* Note that here we test for yy_c_buf_p "<=" to the position
- * of the first EOB in the buffer, since yy_c_buf_p will
- * already have been incremented past the NUL character
- * (since all states make transitions on EOB to the
- * end-of-buffer state). Contrast this with the test
- * in input().
- */
- if ( (yy_c_buf_p) <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] )
- { /* This was really a NUL. */
- yy_state_type yy_next_state;
-
- (yy_c_buf_p) = (yytext_ptr) + yy_amount_of_matched_text;
-
- yy_current_state = yy_get_previous_state( );
-
- /* Okay, we're now positioned to make the NUL
- * transition. We couldn't have
- * yy_get_previous_state() go ahead and do it
- * for us because it doesn't know how to deal
- * with the possibility of jamming (and we don't
- * want to build jamming into it because then it
- * will run more slowly).
- */
-
- yy_next_state = yy_try_NUL_trans( yy_current_state );
-
- yy_bp = (yytext_ptr) + YY_MORE_ADJ;
-
- if ( yy_next_state )
- {
- /* Consume the NUL. */
- yy_cp = ++(yy_c_buf_p);
- yy_current_state = yy_next_state;
- goto yy_match;
- }
-
- else
- {
- yy_cp = (yy_last_accepting_cpos);
- yy_current_state = (yy_last_accepting_state);
- goto yy_find_action;
- }
- }
-
- else switch ( yy_get_next_buffer( ) )
- {
- case EOB_ACT_END_OF_FILE:
- {
- (yy_did_buffer_switch_on_eof) = 0;
-
- if ( yywrap( ) )
- {
- /* Note: because we've taken care in
- * yy_get_next_buffer() to have set up
- * yytext, we can now set up
- * yy_c_buf_p so that if some total
- * hoser (like flex itself) wants to
- * call the scanner after we return the
- * YY_NULL, it'll still work - another
- * YY_NULL will get returned.
- */
- (yy_c_buf_p) = (yytext_ptr) + YY_MORE_ADJ;
-
- yy_act = YY_STATE_EOF(YY_START);
- goto do_action;
- }
-
- else
- {
- if ( ! (yy_did_buffer_switch_on_eof) )
- YY_NEW_FILE;
- }
- break;
- }
-
- case EOB_ACT_CONTINUE_SCAN:
- (yy_c_buf_p) =
- (yytext_ptr) + yy_amount_of_matched_text;
-
- yy_current_state = yy_get_previous_state( );
-
- yy_cp = (yy_c_buf_p);
- yy_bp = (yytext_ptr) + YY_MORE_ADJ;
- goto yy_match;
-
- case EOB_ACT_LAST_MATCH:
- (yy_c_buf_p) =
- &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)];
-
- yy_current_state = yy_get_previous_state( );
-
- yy_cp = (yy_c_buf_p);
- yy_bp = (yytext_ptr) + YY_MORE_ADJ;
- goto yy_find_action;
- }
- break;
- }
-
- default:
- YY_FATAL_ERROR(
- "fatal flex scanner internal error--no action found" );
- } /* end of action switch */
- } /* end of scanning one token */
-} /* end of yylex */
-
-/* yy_get_next_buffer - try to read in a new buffer
- *
- * Returns a code representing an action:
- * EOB_ACT_LAST_MATCH -
- * EOB_ACT_CONTINUE_SCAN - continue scanning from current position
- * EOB_ACT_END_OF_FILE - end of file
- */
-static int yy_get_next_buffer (void)
-{
- register char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf;
- register char *source = (yytext_ptr);
- register int number_to_move, i;
- int ret_val;
-
- if ( (yy_c_buf_p) > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] )
- YY_FATAL_ERROR(
- "fatal flex scanner internal error--end of buffer missed" );
-
- if ( YY_CURRENT_BUFFER_LVALUE->yy_fill_buffer == 0 )
- { /* Don't try to fill the buffer, so this is an EOF. */
- if ( (yy_c_buf_p) - (yytext_ptr) - YY_MORE_ADJ == 1 )
- {
- /* We matched a single character, the EOB, so
- * treat this as a final EOF.
- */
- return EOB_ACT_END_OF_FILE;
- }
-
- else
- {
- /* We matched some text prior to the EOB, first
- * process it.
- */
- return EOB_ACT_LAST_MATCH;
- }
- }
-
- /* Try to read more data. */
-
- /* First move last chars to start of buffer. */
- number_to_move = (int) ((yy_c_buf_p) - (yytext_ptr)) - 1;
-
- for ( i = 0; i < number_to_move; ++i )
- *(dest++) = *(source++);
-
- if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_EOF_PENDING )
- /* don't do the read, it's not guaranteed to return an EOF,
- * just force an EOF
- */
- YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars) = 0;
-
- else
- {
- size_t num_to_read =
- YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1;
-
- while ( num_to_read <= 0 )
- { /* Not enough room in the buffer - grow it. */
-
- /* just a shorter name for the current buffer */
- YY_BUFFER_STATE b = YY_CURRENT_BUFFER;
-
- int yy_c_buf_p_offset =
- (int) ((yy_c_buf_p) - b->yy_ch_buf);
-
- if ( b->yy_is_our_buffer )
- {
- int new_size = b->yy_buf_size * 2;
-
- if ( new_size <= 0 )
- b->yy_buf_size += b->yy_buf_size / 8;
- else
- b->yy_buf_size *= 2;
-
- b->yy_ch_buf = (char *)
- /* Include room in for 2 EOB chars. */
- yyrealloc((void *) b->yy_ch_buf,b->yy_buf_size + 2 );
- }
- else
- /* Can't grow it, we don't own it. */
- b->yy_ch_buf = 0;
-
- if ( ! b->yy_ch_buf )
- YY_FATAL_ERROR(
- "fatal error - scanner input buffer overflow" );
-
- (yy_c_buf_p) = &b->yy_ch_buf[yy_c_buf_p_offset];
-
- num_to_read = YY_CURRENT_BUFFER_LVALUE->yy_buf_size -
- number_to_move - 1;
-
- }
-
- if ( num_to_read > YY_READ_BUF_SIZE )
- num_to_read = YY_READ_BUF_SIZE;
-
- /* Read in more data. */
- YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]),
- (yy_n_chars), num_to_read );
-
- YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
- }
-
- if ( (yy_n_chars) == 0 )
- {
- if ( number_to_move == YY_MORE_ADJ )
- {
- ret_val = EOB_ACT_END_OF_FILE;
- yyrestart(yyin );
- }
-
- else
- {
- ret_val = EOB_ACT_LAST_MATCH;
- YY_CURRENT_BUFFER_LVALUE->yy_buffer_status =
- YY_BUFFER_EOF_PENDING;
- }
- }
-
- else
- ret_val = EOB_ACT_CONTINUE_SCAN;
-
- (yy_n_chars) += number_to_move;
- YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] = YY_END_OF_BUFFER_CHAR;
- YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] = YY_END_OF_BUFFER_CHAR;
-
- (yytext_ptr) = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[0];
-
- return ret_val;
-}
-
-/* yy_get_previous_state - get the state just before the EOB char was reached */
-
- static yy_state_type yy_get_previous_state (void)
-{
- register yy_state_type yy_current_state;
- register char *yy_cp;
-
- yy_current_state = (yy_start);
-
- for ( yy_cp = (yytext_ptr) + YY_MORE_ADJ; yy_cp < (yy_c_buf_p); ++yy_cp )
- {
- register YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1);
- if ( yy_accept[yy_current_state] )
- {
- (yy_last_accepting_state) = yy_current_state;
- (yy_last_accepting_cpos) = yy_cp;
- }
- while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
- {
- yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 399 )
- yy_c = yy_meta[(unsigned int) yy_c];
- }
- yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
- }
-
- return yy_current_state;
-}
-
-/* yy_try_NUL_trans - try to make a transition on the NUL character
- *
- * synopsis
- * next_state = yy_try_NUL_trans( current_state );
- */
- static yy_state_type yy_try_NUL_trans (yy_state_type yy_current_state )
-{
- register int yy_is_jam;
- register char *yy_cp = (yy_c_buf_p);
-
- register YY_CHAR yy_c = 1;
- if ( yy_accept[yy_current_state] )
- {
- (yy_last_accepting_state) = yy_current_state;
- (yy_last_accepting_cpos) = yy_cp;
- }
- while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
- {
- yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 399 )
- yy_c = yy_meta[(unsigned int) yy_c];
- }
- yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
- yy_is_jam = (yy_current_state == 398);
-
- return yy_is_jam ? 0 : yy_current_state;
-}
-
-#ifndef YY_NO_INPUT
-#ifdef __cplusplus
- static int yyinput (void)
-#else
- static int input (void)
-#endif
-
-{
- int c;
-
- *(yy_c_buf_p) = (yy_hold_char);
-
- if ( *(yy_c_buf_p) == YY_END_OF_BUFFER_CHAR )
- {
- /* yy_c_buf_p now points to the character we want to return.
- * If this occurs *before* the EOB characters, then it's a
- * valid NUL; if not, then we've hit the end of the buffer.
- */
- if ( (yy_c_buf_p) < &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] )
- /* This was really a NUL. */
- *(yy_c_buf_p) = '\0';
-
- else
- { /* need more input */
- int offset = (int)((yy_c_buf_p) - (yytext_ptr));
- ++(yy_c_buf_p);
-
- switch ( yy_get_next_buffer( ) )
- {
- case EOB_ACT_LAST_MATCH:
- /* This happens because yy_g_n_b()
- * sees that we've accumulated a
- * token and flags that we need to
- * try matching the token before
- * proceeding. But for input(),
- * there's no matching to consider.
- * So convert the EOB_ACT_LAST_MATCH
- * to EOB_ACT_END_OF_FILE.
- */
-
- /* Reset buffer status. */
- yyrestart(yyin );
-
- /*FALLTHROUGH*/
-
- case EOB_ACT_END_OF_FILE:
- {
- if ( yywrap( ) )
- return EOF;
-
- if ( ! (yy_did_buffer_switch_on_eof) )
- YY_NEW_FILE;
-#ifdef __cplusplus
- return yyinput();
-#else
- return input();
-#endif
- }
-
- case EOB_ACT_CONTINUE_SCAN:
- (yy_c_buf_p) = (yytext_ptr) + offset;
- break;
- }
- }
- }
-
- c = *(unsigned char *) (yy_c_buf_p); /* cast for 8-bit char's */
- *(yy_c_buf_p) = '\0'; /* preserve yytext */
- (yy_hold_char) = *++(yy_c_buf_p);
-
- return c;
-}
-#endif /* ifndef YY_NO_INPUT */
-
-/** Immediately switch to a different input stream.
- * @param input_file A readable stream.
- *
- * @note This function does not reset the start condition to @c INITIAL .
- */
- void yyrestart (FILE * input_file )
-{
-
- if ( ! YY_CURRENT_BUFFER ){
- yyensure_buffer_stack ();
- YY_CURRENT_BUFFER_LVALUE =
- yy_create_buffer(yyin,YY_BUF_SIZE );
- }
-
- yy_init_buffer(YY_CURRENT_BUFFER,input_file );
- yy_load_buffer_state( );
-}
-
-/** Switch to a different input buffer.
- * @param new_buffer The new input buffer.
- *
- */
- void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer )
-{
-
- /* TODO. We should be able to replace this entire function body
- * with
- * yypop_buffer_state();
- * yypush_buffer_state(new_buffer);
- */
- yyensure_buffer_stack ();
- if ( YY_CURRENT_BUFFER == new_buffer )
- return;
-
- if ( YY_CURRENT_BUFFER )
- {
- /* Flush out information for old buffer. */
- *(yy_c_buf_p) = (yy_hold_char);
- YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p);
- YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
- }
-
- YY_CURRENT_BUFFER_LVALUE = new_buffer;
- yy_load_buffer_state( );
-
- /* We don't actually know whether we did this switch during
- * EOF (yywrap()) processing, but the only time this flag
- * is looked at is after yywrap() is called, so it's safe
- * to go ahead and always set it.
- */
- (yy_did_buffer_switch_on_eof) = 1;
-}
-
-static void yy_load_buffer_state (void)
-{
- (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
- (yytext_ptr) = (yy_c_buf_p) = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos;
- yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file;
- (yy_hold_char) = *(yy_c_buf_p);
-}
-
-/** Allocate and initialize an input buffer state.
- * @param file A readable stream.
- * @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE.
- *
- * @return the allocated buffer state.
- */
- YY_BUFFER_STATE yy_create_buffer (FILE * file, int size )
-{
- YY_BUFFER_STATE b;
-
- b = (YY_BUFFER_STATE) yyalloc(sizeof( struct yy_buffer_state ) );
- if ( ! b )
- YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
-
- b->yy_buf_size = size;
-
- /* yy_ch_buf has to be 2 characters longer than the size given because
- * we need to put in 2 end-of-buffer characters.
- */
- b->yy_ch_buf = (char *) yyalloc(b->yy_buf_size + 2 );
- if ( ! b->yy_ch_buf )
- YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
-
- b->yy_is_our_buffer = 1;
-
- yy_init_buffer(b,file );
-
- return b;
-}
-
-/** Destroy the buffer.
- * @param b a buffer created with yy_create_buffer()
- *
- */
- void yy_delete_buffer (YY_BUFFER_STATE b )
-{
-
- if ( ! b )
- return;
-
- if ( b == YY_CURRENT_BUFFER ) /* Not sure if we should pop here. */
- YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0;
-
- if ( b->yy_is_our_buffer )
- yyfree((void *) b->yy_ch_buf );
-
- yyfree((void *) b );
-}
-
-/* Initializes or reinitializes a buffer.
- * This function is sometimes called more than once on the same buffer,
- * such as during a yyrestart() or at EOF.
- */
- static void yy_init_buffer (YY_BUFFER_STATE b, FILE * file )
-
-{
- int oerrno = errno;
-
- yy_flush_buffer(b );
-
- b->yy_input_file = file;
- b->yy_fill_buffer = 1;
-
- /* If b is the current buffer, then yy_init_buffer was _probably_
- * called from yyrestart() or through yy_get_next_buffer.
- * In that case, we don't want to reset the lineno or column.
- */
- if (b != YY_CURRENT_BUFFER){
- b->yy_bs_lineno = 1;
- b->yy_bs_column = 0;
- }
-
- b->yy_is_interactive = 0;
-
- errno = oerrno;
-}
-
-/** Discard all buffered characters. On the next scan, YY_INPUT will be called.
- * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER.
- *
- */
- void yy_flush_buffer (YY_BUFFER_STATE b )
-{
- if ( ! b )
- return;
-
- b->yy_n_chars = 0;
-
- /* We always need two end-of-buffer characters. The first causes
- * a transition to the end-of-buffer state. The second causes
- * a jam in that state.
- */
- b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR;
- b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR;
-
- b->yy_buf_pos = &b->yy_ch_buf[0];
-
- b->yy_at_bol = 1;
- b->yy_buffer_status = YY_BUFFER_NEW;
-
- if ( b == YY_CURRENT_BUFFER )
- yy_load_buffer_state( );
-}
-
-/** Pushes the new state onto the stack. The new state becomes
- * the current state. This function will allocate the stack
- * if necessary.
- * @param new_buffer The new state.
- *
- */
-void yypush_buffer_state (YY_BUFFER_STATE new_buffer )
-{
- if (new_buffer == NULL)
- return;
-
- yyensure_buffer_stack();
-
- /* This block is copied from yy_switch_to_buffer. */
- if ( YY_CURRENT_BUFFER )
- {
- /* Flush out information for old buffer. */
- *(yy_c_buf_p) = (yy_hold_char);
- YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p);
- YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
- }
-
- /* Only push if top exists. Otherwise, replace top. */
- if (YY_CURRENT_BUFFER)
- (yy_buffer_stack_top)++;
- YY_CURRENT_BUFFER_LVALUE = new_buffer;
-
- /* copied from yy_switch_to_buffer. */
- yy_load_buffer_state( );
- (yy_did_buffer_switch_on_eof) = 1;
-}
-
-/** Removes and deletes the top of the stack, if present.
- * The next element becomes the new top.
- *
- */
-void yypop_buffer_state (void)
-{
- if (!YY_CURRENT_BUFFER)
- return;
-
- yy_delete_buffer(YY_CURRENT_BUFFER );
- YY_CURRENT_BUFFER_LVALUE = NULL;
- if ((yy_buffer_stack_top) > 0)
- --(yy_buffer_stack_top);
-
- if (YY_CURRENT_BUFFER) {
- yy_load_buffer_state( );
- (yy_did_buffer_switch_on_eof) = 1;
- }
-}
-
-/* Allocates the stack if it does not exist.
- * Guarantees space for at least one push.
- */
-static void yyensure_buffer_stack (void)
-{
- int num_to_alloc;
-
- if (!(yy_buffer_stack)) {
-
- /* First allocation is just for 2 elements, since we don't know if this
- * scanner will even need a stack. We use 2 instead of 1 to avoid an
- * immediate realloc on the next call.
- */
- num_to_alloc = 1;
- (yy_buffer_stack) = (struct yy_buffer_state**)yyalloc
- (num_to_alloc * sizeof(struct yy_buffer_state*)
- );
-
- memset((yy_buffer_stack), 0, num_to_alloc * sizeof(struct yy_buffer_state*));
-
- (yy_buffer_stack_max) = num_to_alloc;
- (yy_buffer_stack_top) = 0;
- return;
- }
-
- if ((yy_buffer_stack_top) >= ((yy_buffer_stack_max)) - 1){
-
- /* Increase the buffer to prepare for a possible push. */
- int grow_size = 8 /* arbitrary grow size */;
-
- num_to_alloc = (yy_buffer_stack_max) + grow_size;
- (yy_buffer_stack) = (struct yy_buffer_state**)yyrealloc
- ((yy_buffer_stack),
- num_to_alloc * sizeof(struct yy_buffer_state*)
- );
-
- /* zero only the new slots.*/
- memset((yy_buffer_stack) + (yy_buffer_stack_max), 0, grow_size * sizeof(struct yy_buffer_state*));
- (yy_buffer_stack_max) = num_to_alloc;
- }
-}
-
-#ifndef YY_EXIT_FAILURE
-#define YY_EXIT_FAILURE 2
-#endif
-
-static void yy_fatal_error (yyconst char* msg )
-{
- (void) fprintf( stderr, "%s\n", msg );
- exit( YY_EXIT_FAILURE );
-}
-
-/* Redefine yyless() so it works in section 3 code. */
-
-#undef yyless
-#define yyless(n) \
- do \
- { \
- /* Undo effects of setting up yytext. */ \
- int yyless_macro_arg = (n); \
- YY_LESS_LINENO(yyless_macro_arg);\
- yytext[yyleng] = (yy_hold_char); \
- (yy_c_buf_p) = yytext + yyless_macro_arg; \
- (yy_hold_char) = *(yy_c_buf_p); \
- *(yy_c_buf_p) = '\0'; \
- yyleng = yyless_macro_arg; \
- } \
- while ( 0 )
-
-/* Accessor methods (get/set functions) to struct members. */
-
-/** Get the current line number.
- *
- */
-int yyget_lineno (void)
-{
-
- return yylineno;
-}
-
-/** Get the input stream.
- *
- */
-FILE *yyget_in (void)
-{
- return yyin;
-}
-
-/** Get the output stream.
- *
- */
-FILE *yyget_out (void)
-{
- return yyout;
-}
-
-/** Get the length of the current token.
- *
- */
-int yyget_leng (void)
-{
- return yyleng;
-}
-
-/** Get the current token.
- *
- */
-
-char *yyget_text (void)
-{
- return yytext;
-}
-
-/** Set the current line number.
- * @param line_number
- *
- */
-void yyset_lineno (int line_number )
-{
-
- yylineno = line_number;
-}
-
-/** Set the input stream. This does not discard the current
- * input buffer.
- * @param in_str A readable stream.
- *
- * @see yy_switch_to_buffer
- */
-void yyset_in (FILE * in_str )
-{
- yyin = in_str ;
-}
-
-void yyset_out (FILE * out_str )
-{
- yyout = out_str ;
-}
-
-int yyget_debug (void)
-{
- return yy_flex_debug;
-}
-
-void yyset_debug (int bdebug )
-{
- yy_flex_debug = bdebug ;
-}
-
-/* yylex_destroy is for both reentrant and non-reentrant scanners. */
-int yylex_destroy (void)
-{
-
- /* Pop the buffer stack, destroying each element. */
- while(YY_CURRENT_BUFFER){
- yy_delete_buffer(YY_CURRENT_BUFFER );
- YY_CURRENT_BUFFER_LVALUE = NULL;
- yypop_buffer_state();
- }
-
- /* Destroy the stack itself. */
- yyfree((yy_buffer_stack) );
- (yy_buffer_stack) = NULL;
-
- return 0;
-}
-
-/*
- * Internal utility routines.
- */
-
-#ifndef yytext_ptr
-static void yy_flex_strncpy (char* s1, yyconst char * s2, int n )
-{
- register int i;
- for ( i = 0; i < n; ++i )
- s1[i] = s2[i];
-}
-#endif
-
-#ifdef YY_NEED_STRLEN
-static int yy_flex_strlen (yyconst char * s )
-{
- register int n;
- for ( n = 0; s[n]; ++n )
- ;
-
- return n;
-}
-#endif
-
-void *yyalloc (yy_size_t size )
-{
- return (void *) malloc( size );
-}
-
-void *yyrealloc (void * ptr, yy_size_t size )
-{
- /* The cast to (char *) in the following accommodates both
- * implementations that use char* generic pointers, and those
- * that use void* generic pointers. It works with the latter
- * because both ANSI C and C++ allow castless assignment from
- * any pointer type to void*, and deal with argument conversions
- * as though doing an assignment.
- */
- return (void *) realloc( (char *) ptr, size );
-}
-
-void yyfree (void * ptr )
-{
- free( (char *) ptr ); /* see yyrealloc() for (char *) cast */
-}
-
-#define YYTABLES_NAME "yytables"
-
-#undef YY_NEW_FILE
-#undef YY_FLUSH_BUFFER
-#undef yy_set_bol
-#undef yy_new_buffer
-#undef yy_set_interactive
-#undef yytext_ptr
-#undef YY_DO_BEFORE_ACTION
-
-#ifdef YY_DECL_IS_OURS
-#undef YY_DECL_IS_OURS
-#undef YY_DECL
-#endif
-#line 648 "pars0lex.l"
-
-
-
diff --git a/storage/innobase/pars/make_bison.sh b/storage/innobase/pars/make_bison.sh
deleted file mode 100755
index c11456230c4..00000000000
--- a/storage/innobase/pars/make_bison.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/bash
-#
-# generate parser files from bison input files.
-
-set -eu
-
-bison -d pars0grm.y
-mv pars0grm.tab.c pars0grm.c
-mv pars0grm.tab.h pars0grm.h
-cp pars0grm.h ../include
diff --git a/storage/innobase/pars/make_flex.sh b/storage/innobase/pars/make_flex.sh
deleted file mode 100755
index c015327bf8c..00000000000
--- a/storage/innobase/pars/make_flex.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/bin/bash
-#
-# generate lexer files from flex input files.
-
-set -eu
-
-TMPFILE=_flex_tmp.c
-OUTFILE=lexyy.c
-
-flex -o $TMPFILE pars0lex.l
-
-# AIX needs its includes done in a certain order, so include "univ.i" first
-# to be sure we get it right.
-echo '#include "univ.i"' > $OUTFILE
-
-# flex assigns a pointer to an int in one place without a cast, resulting in
-# a warning on Win64. this adds the cast.
-sed -e 's/int offset = (yy_c_buf_p) - (yytext_ptr);/int offset = (int)((yy_c_buf_p) - (yytext_ptr));/;' < $TMPFILE >> $OUTFILE
-
-rm $TMPFILE
diff --git a/storage/innobase/pars/pars0grm.c b/storage/innobase/pars/pars0grm.c
deleted file mode 100644
index 2e39b05bada..00000000000
--- a/storage/innobase/pars/pars0grm.c
+++ /dev/null
@@ -1,2571 +0,0 @@
-/* A Bison parser, made by GNU Bison 1.875d. */
-
-/* Skeleton parser for Yacc-like parsing with Bison,
- Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
-
-/* As a special exception, when this file is copied by Bison into a
- Bison output file, you may use that output file without restriction.
- This special exception was added by the Free Software Foundation
- in version 1.24 of Bison. */
-
-/* Written by Richard Stallman by simplifying the original so called
- ``semantic'' parser. */
-
-/* All symbols defined below should begin with yy or YY, to avoid
- infringing on user name space. This should be done even for local
- variables, as they might otherwise be expanded by user macros.
- There are some unavoidable exceptions within include files to
- define necessary library symbols; they are noted "INFRINGES ON
- USER NAME SPACE" below. */
-
-/* Identify Bison output. */
-#define YYBISON 1
-
-/* Skeleton name. */
-#define YYSKELETON_NAME "yacc.c"
-
-/* Pure parsers. */
-#define YYPURE 0
-
-/* Using locations. */
-#define YYLSP_NEEDED 0
-
-
-
-/* Tokens. */
-#ifndef YYTOKENTYPE
-# define YYTOKENTYPE
- /* Put the tokens into the symbol table, so that GDB and other debuggers
- know about them. */
- enum yytokentype {
- PARS_INT_LIT = 258,
- PARS_FLOAT_LIT = 259,
- PARS_STR_LIT = 260,
- PARS_FIXBINARY_LIT = 261,
- PARS_BLOB_LIT = 262,
- PARS_NULL_LIT = 263,
- PARS_ID_TOKEN = 264,
- PARS_AND_TOKEN = 265,
- PARS_OR_TOKEN = 266,
- PARS_NOT_TOKEN = 267,
- PARS_GE_TOKEN = 268,
- PARS_LE_TOKEN = 269,
- PARS_NE_TOKEN = 270,
- PARS_PROCEDURE_TOKEN = 271,
- PARS_IN_TOKEN = 272,
- PARS_OUT_TOKEN = 273,
- PARS_BINARY_TOKEN = 274,
- PARS_BLOB_TOKEN = 275,
- PARS_INT_TOKEN = 276,
- PARS_INTEGER_TOKEN = 277,
- PARS_FLOAT_TOKEN = 278,
- PARS_CHAR_TOKEN = 279,
- PARS_IS_TOKEN = 280,
- PARS_BEGIN_TOKEN = 281,
- PARS_END_TOKEN = 282,
- PARS_IF_TOKEN = 283,
- PARS_THEN_TOKEN = 284,
- PARS_ELSE_TOKEN = 285,
- PARS_ELSIF_TOKEN = 286,
- PARS_LOOP_TOKEN = 287,
- PARS_WHILE_TOKEN = 288,
- PARS_RETURN_TOKEN = 289,
- PARS_SELECT_TOKEN = 290,
- PARS_SUM_TOKEN = 291,
- PARS_COUNT_TOKEN = 292,
- PARS_DISTINCT_TOKEN = 293,
- PARS_FROM_TOKEN = 294,
- PARS_WHERE_TOKEN = 295,
- PARS_FOR_TOKEN = 296,
- PARS_DDOT_TOKEN = 297,
- PARS_READ_TOKEN = 298,
- PARS_ORDER_TOKEN = 299,
- PARS_BY_TOKEN = 300,
- PARS_ASC_TOKEN = 301,
- PARS_DESC_TOKEN = 302,
- PARS_INSERT_TOKEN = 303,
- PARS_INTO_TOKEN = 304,
- PARS_VALUES_TOKEN = 305,
- PARS_UPDATE_TOKEN = 306,
- PARS_SET_TOKEN = 307,
- PARS_DELETE_TOKEN = 308,
- PARS_CURRENT_TOKEN = 309,
- PARS_OF_TOKEN = 310,
- PARS_CREATE_TOKEN = 311,
- PARS_TABLE_TOKEN = 312,
- PARS_INDEX_TOKEN = 313,
- PARS_UNIQUE_TOKEN = 314,
- PARS_CLUSTERED_TOKEN = 315,
- PARS_DOES_NOT_FIT_IN_MEM_TOKEN = 316,
- PARS_ON_TOKEN = 317,
- PARS_ASSIGN_TOKEN = 318,
- PARS_DECLARE_TOKEN = 319,
- PARS_CURSOR_TOKEN = 320,
- PARS_SQL_TOKEN = 321,
- PARS_OPEN_TOKEN = 322,
- PARS_FETCH_TOKEN = 323,
- PARS_CLOSE_TOKEN = 324,
- PARS_NOTFOUND_TOKEN = 325,
- PARS_TO_CHAR_TOKEN = 326,
- PARS_TO_NUMBER_TOKEN = 327,
- PARS_TO_BINARY_TOKEN = 328,
- PARS_BINARY_TO_NUMBER_TOKEN = 329,
- PARS_SUBSTR_TOKEN = 330,
- PARS_REPLSTR_TOKEN = 331,
- PARS_CONCAT_TOKEN = 332,
- PARS_INSTR_TOKEN = 333,
- PARS_LENGTH_TOKEN = 334,
- PARS_SYSDATE_TOKEN = 335,
- PARS_PRINTF_TOKEN = 336,
- PARS_ASSERT_TOKEN = 337,
- PARS_RND_TOKEN = 338,
- PARS_RND_STR_TOKEN = 339,
- PARS_ROW_PRINTF_TOKEN = 340,
- PARS_COMMIT_TOKEN = 341,
- PARS_ROLLBACK_TOKEN = 342,
- PARS_WORK_TOKEN = 343,
- PARS_UNSIGNED_TOKEN = 344,
- PARS_EXIT_TOKEN = 345,
- PARS_FUNCTION_TOKEN = 346,
- PARS_LOCK_TOKEN = 347,
- PARS_SHARE_TOKEN = 348,
- PARS_MODE_TOKEN = 349,
- NEG = 350
- };
-#endif
-#define PARS_INT_LIT 258
-#define PARS_FLOAT_LIT 259
-#define PARS_STR_LIT 260
-#define PARS_FIXBINARY_LIT 261
-#define PARS_BLOB_LIT 262
-#define PARS_NULL_LIT 263
-#define PARS_ID_TOKEN 264
-#define PARS_AND_TOKEN 265
-#define PARS_OR_TOKEN 266
-#define PARS_NOT_TOKEN 267
-#define PARS_GE_TOKEN 268
-#define PARS_LE_TOKEN 269
-#define PARS_NE_TOKEN 270
-#define PARS_PROCEDURE_TOKEN 271
-#define PARS_IN_TOKEN 272
-#define PARS_OUT_TOKEN 273
-#define PARS_BINARY_TOKEN 274
-#define PARS_BLOB_TOKEN 275
-#define PARS_INT_TOKEN 276
-#define PARS_INTEGER_TOKEN 277
-#define PARS_FLOAT_TOKEN 278
-#define PARS_CHAR_TOKEN 279
-#define PARS_IS_TOKEN 280
-#define PARS_BEGIN_TOKEN 281
-#define PARS_END_TOKEN 282
-#define PARS_IF_TOKEN 283
-#define PARS_THEN_TOKEN 284
-#define PARS_ELSE_TOKEN 285
-#define PARS_ELSIF_TOKEN 286
-#define PARS_LOOP_TOKEN 287
-#define PARS_WHILE_TOKEN 288
-#define PARS_RETURN_TOKEN 289
-#define PARS_SELECT_TOKEN 290
-#define PARS_SUM_TOKEN 291
-#define PARS_COUNT_TOKEN 292
-#define PARS_DISTINCT_TOKEN 293
-#define PARS_FROM_TOKEN 294
-#define PARS_WHERE_TOKEN 295
-#define PARS_FOR_TOKEN 296
-#define PARS_DDOT_TOKEN 297
-#define PARS_READ_TOKEN 298
-#define PARS_ORDER_TOKEN 299
-#define PARS_BY_TOKEN 300
-#define PARS_ASC_TOKEN 301
-#define PARS_DESC_TOKEN 302
-#define PARS_INSERT_TOKEN 303
-#define PARS_INTO_TOKEN 304
-#define PARS_VALUES_TOKEN 305
-#define PARS_UPDATE_TOKEN 306
-#define PARS_SET_TOKEN 307
-#define PARS_DELETE_TOKEN 308
-#define PARS_CURRENT_TOKEN 309
-#define PARS_OF_TOKEN 310
-#define PARS_CREATE_TOKEN 311
-#define PARS_TABLE_TOKEN 312
-#define PARS_INDEX_TOKEN 313
-#define PARS_UNIQUE_TOKEN 314
-#define PARS_CLUSTERED_TOKEN 315
-#define PARS_DOES_NOT_FIT_IN_MEM_TOKEN 316
-#define PARS_ON_TOKEN 317
-#define PARS_ASSIGN_TOKEN 318
-#define PARS_DECLARE_TOKEN 319
-#define PARS_CURSOR_TOKEN 320
-#define PARS_SQL_TOKEN 321
-#define PARS_OPEN_TOKEN 322
-#define PARS_FETCH_TOKEN 323
-#define PARS_CLOSE_TOKEN 324
-#define PARS_NOTFOUND_TOKEN 325
-#define PARS_TO_CHAR_TOKEN 326
-#define PARS_TO_NUMBER_TOKEN 327
-#define PARS_TO_BINARY_TOKEN 328
-#define PARS_BINARY_TO_NUMBER_TOKEN 329
-#define PARS_SUBSTR_TOKEN 330
-#define PARS_REPLSTR_TOKEN 331
-#define PARS_CONCAT_TOKEN 332
-#define PARS_INSTR_TOKEN 333
-#define PARS_LENGTH_TOKEN 334
-#define PARS_SYSDATE_TOKEN 335
-#define PARS_PRINTF_TOKEN 336
-#define PARS_ASSERT_TOKEN 337
-#define PARS_RND_TOKEN 338
-#define PARS_RND_STR_TOKEN 339
-#define PARS_ROW_PRINTF_TOKEN 340
-#define PARS_COMMIT_TOKEN 341
-#define PARS_ROLLBACK_TOKEN 342
-#define PARS_WORK_TOKEN 343
-#define PARS_UNSIGNED_TOKEN 344
-#define PARS_EXIT_TOKEN 345
-#define PARS_FUNCTION_TOKEN 346
-#define PARS_LOCK_TOKEN 347
-#define PARS_SHARE_TOKEN 348
-#define PARS_MODE_TOKEN 349
-#define NEG 350
-
-
-
-
-/* Copy the first part of user declarations. */
-#line 13 "pars0grm.y"
-
-/* The value of the semantic attribute is a pointer to a query tree node
-que_node_t */
-
-#include "univ.i"
-#include <math.h> /* Can't be before univ.i */
-#include "pars0pars.h"
-#include "mem0mem.h"
-#include "que0types.h"
-#include "que0que.h"
-#include "row0sel.h"
-
-#define YYSTYPE que_node_t*
-
-/* #define __STDC__ */
-
-int
-yylex(void);
-
-
-/* Enabling traces. */
-#ifndef YYDEBUG
-# define YYDEBUG 0
-#endif
-
-/* Enabling verbose error messages. */
-#ifdef YYERROR_VERBOSE
-# undef YYERROR_VERBOSE
-# define YYERROR_VERBOSE 1
-#else
-# define YYERROR_VERBOSE 0
-#endif
-
-#if ! defined (YYSTYPE) && ! defined (YYSTYPE_IS_DECLARED)
-typedef int YYSTYPE;
-# define yystype YYSTYPE /* obsolescent; will be withdrawn */
-# define YYSTYPE_IS_DECLARED 1
-# define YYSTYPE_IS_TRIVIAL 1
-#endif
-
-
-
-/* Copy the second part of user declarations. */
-
-
-/* Line 214 of yacc.c. */
-#line 297 "pars0grm.tab.c"
-
-#if ! defined (yyoverflow) || YYERROR_VERBOSE
-
-# ifndef YYFREE
-# define YYFREE free
-# endif
-# ifndef YYMALLOC
-# define YYMALLOC malloc
-# endif
-
-/* The parser invokes alloca or malloc; define the necessary symbols. */
-
-# ifdef YYSTACK_USE_ALLOCA
-# if YYSTACK_USE_ALLOCA
-# define YYSTACK_ALLOC alloca
-# endif
-# else
-# if defined (alloca) || defined (_ALLOCA_H)
-# define YYSTACK_ALLOC alloca
-# else
-# ifdef __GNUC__
-# define YYSTACK_ALLOC __builtin_alloca
-# endif
-# endif
-# endif
-
-# ifdef YYSTACK_ALLOC
- /* Pacify GCC's `empty if-body' warning. */
-# define YYSTACK_FREE(Ptr) do { /* empty */; } while (0)
-# else
-# if defined (__STDC__) || defined (__cplusplus)
-# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */
-# define YYSIZE_T size_t
-# endif
-# define YYSTACK_ALLOC YYMALLOC
-# define YYSTACK_FREE YYFREE
-# endif
-#endif /* ! defined (yyoverflow) || YYERROR_VERBOSE */
-
-
-#if (! defined (yyoverflow) \
- && (! defined (__cplusplus) \
- || (defined (YYSTYPE_IS_TRIVIAL) && YYSTYPE_IS_TRIVIAL)))
-
-/* A type that is properly aligned for any stack member. */
-union yyalloc
-{
- short int yyss;
- YYSTYPE yyvs;
- };
-
-/* The size of the maximum gap between one aligned stack and the next. */
-# define YYSTACK_GAP_MAXIMUM (sizeof (union yyalloc) - 1)
-
-/* The size of an array large to enough to hold all stacks, each with
- N elements. */
-# define YYSTACK_BYTES(N) \
- ((N) * (sizeof (short int) + sizeof (YYSTYPE)) \
- + YYSTACK_GAP_MAXIMUM)
-
-/* Copy COUNT objects from FROM to TO. The source and destination do
- not overlap. */
-# ifndef YYCOPY
-# if defined (__GNUC__) && 1 < __GNUC__
-# define YYCOPY(To, From, Count) \
- __builtin_memcpy (To, From, (Count) * sizeof (*(From)))
-# else
-# define YYCOPY(To, From, Count) \
- do \
- { \
- register YYSIZE_T yyi; \
- for (yyi = 0; yyi < (Count); yyi++) \
- (To)[yyi] = (From)[yyi]; \
- } \
- while (0)
-# endif
-# endif
-
-/* Relocate STACK from its old location to the new one. The
- local variables YYSIZE and YYSTACKSIZE give the old and new number of
- elements in the stack, and YYPTR gives the new location of the
- stack. Advance YYPTR to a properly aligned location for the next
- stack. */
-# define YYSTACK_RELOCATE(Stack) \
- do \
- { \
- YYSIZE_T yynewbytes; \
- YYCOPY (&yyptr->Stack, Stack, yysize); \
- Stack = &yyptr->Stack; \
- yynewbytes = yystacksize * sizeof (*Stack) + YYSTACK_GAP_MAXIMUM; \
- yyptr += yynewbytes / sizeof (*yyptr); \
- } \
- while (0)
-
-#endif
-
-#if defined (__STDC__) || defined (__cplusplus)
- typedef signed char yysigned_char;
-#else
- typedef short int yysigned_char;
-#endif
-
-/* YYFINAL -- State number of the termination state. */
-#define YYFINAL 5
-/* YYLAST -- Last index in YYTABLE. */
-#define YYLAST 752
-
-/* YYNTOKENS -- Number of terminals. */
-#define YYNTOKENS 111
-/* YYNNTS -- Number of nonterminals. */
-#define YYNNTS 70
-/* YYNRULES -- Number of rules. */
-#define YYNRULES 175
-/* YYNRULES -- Number of states. */
-#define YYNSTATES 339
-
-/* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */
-#define YYUNDEFTOK 2
-#define YYMAXUTOK 350
-
-#define YYTRANSLATE(YYX) \
- ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK)
-
-/* YYTRANSLATE[YYLEX] -- Bison symbol number corresponding to YYLEX. */
-static const unsigned char yytranslate[] =
-{
- 0, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 103, 2, 2,
- 105, 106, 100, 99, 108, 98, 2, 101, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 104,
- 96, 95, 97, 107, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 109, 2, 110, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 1, 2, 3, 4,
- 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
- 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
- 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
- 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
- 45, 46, 47, 48, 49, 50, 51, 52, 53, 54,
- 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
- 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
- 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
- 85, 86, 87, 88, 89, 90, 91, 92, 93, 94,
- 102
-};
-
-#if YYDEBUG
-/* YYPRHS[YYN] -- Index of the first RHS symbol of rule number YYN in
- YYRHS. */
-static const unsigned short int yyprhs[] =
-{
- 0, 0, 3, 6, 8, 11, 14, 17, 20, 23,
- 26, 29, 32, 35, 38, 41, 44, 47, 50, 53,
- 56, 59, 62, 65, 68, 71, 73, 76, 78, 83,
- 85, 87, 89, 91, 93, 95, 97, 101, 105, 109,
- 113, 116, 120, 124, 128, 132, 136, 140, 144, 148,
- 152, 155, 159, 163, 165, 167, 169, 171, 173, 175,
- 177, 179, 181, 183, 185, 186, 188, 192, 199, 204,
- 206, 208, 210, 214, 216, 220, 221, 223, 227, 228,
- 230, 234, 236, 241, 247, 252, 253, 255, 259, 261,
- 265, 267, 268, 271, 272, 275, 276, 281, 282, 284,
- 286, 287, 292, 301, 305, 311, 314, 318, 320, 324,
- 329, 334, 337, 340, 344, 347, 350, 353, 357, 362,
- 364, 367, 368, 371, 373, 381, 388, 399, 401, 403,
- 406, 409, 414, 419, 425, 427, 431, 432, 436, 437,
- 439, 440, 443, 444, 446, 454, 456, 460, 461, 463,
- 464, 466, 477, 480, 483, 485, 487, 489, 491, 493,
- 497, 501, 502, 504, 508, 512, 513, 515, 518, 525,
- 530, 532, 534, 535, 537, 540
-};
-
-/* YYRHS -- A `-1'-separated list of the rules' RHS. */
-static const short int yyrhs[] =
-{
- 112, 0, -1, 180, 104, -1, 118, -1, 119, 104,
- -1, 151, 104, -1, 152, 104, -1, 153, 104, -1,
- 150, 104, -1, 154, 104, -1, 146, 104, -1, 133,
- 104, -1, 135, 104, -1, 145, 104, -1, 143, 104,
- -1, 144, 104, -1, 140, 104, -1, 141, 104, -1,
- 155, 104, -1, 157, 104, -1, 156, 104, -1, 169,
- 104, -1, 170, 104, -1, 164, 104, -1, 168, 104,
- -1, 113, -1, 114, 113, -1, 9, -1, 116, 105,
- 124, 106, -1, 3, -1, 4, -1, 5, -1, 6,
- -1, 7, -1, 8, -1, 66, -1, 115, 99, 115,
- -1, 115, 98, 115, -1, 115, 100, 115, -1, 115,
- 101, 115, -1, 98, 115, -1, 105, 115, 106, -1,
- 115, 95, 115, -1, 115, 96, 115, -1, 115, 97,
- 115, -1, 115, 13, 115, -1, 115, 14, 115, -1,
- 115, 15, 115, -1, 115, 10, 115, -1, 115, 11,
- 115, -1, 12, 115, -1, 9, 103, 70, -1, 66,
- 103, 70, -1, 71, -1, 72, -1, 73, -1, 74,
- -1, 75, -1, 77, -1, 78, -1, 79, -1, 80,
- -1, 83, -1, 84, -1, -1, 107, -1, 117, 108,
- 107, -1, 109, 9, 105, 117, 106, 110, -1, 120,
- 105, 124, 106, -1, 76, -1, 81, -1, 82, -1,
- 9, 105, 106, -1, 9, -1, 122, 108, 9, -1,
- -1, 9, -1, 123, 108, 9, -1, -1, 115, -1,
- 124, 108, 115, -1, 115, -1, 37, 105, 100, 106,
- -1, 37, 105, 38, 9, 106, -1, 36, 105, 115,
- 106, -1, -1, 125, -1, 126, 108, 125, -1, 100,
- -1, 126, 49, 123, -1, 126, -1, -1, 40, 115,
- -1, -1, 41, 51, -1, -1, 92, 17, 93, 94,
- -1, -1, 46, -1, 47, -1, -1, 44, 45, 9,
- 131, -1, 35, 127, 39, 122, 128, 129, 130, 132,
- -1, 48, 49, 9, -1, 134, 50, 105, 124, 106,
- -1, 134, 133, -1, 9, 95, 115, -1, 136, -1,
- 137, 108, 136, -1, 40, 54, 55, 9, -1, 51,
- 9, 52, 137, -1, 139, 128, -1, 139, 138, -1,
- 53, 39, 9, -1, 142, 128, -1, 142, 138, -1,
- 85, 133, -1, 9, 63, 115, -1, 31, 115, 29,
- 114, -1, 147, -1, 148, 147, -1, -1, 30, 114,
- -1, 148, -1, 28, 115, 29, 114, 149, 27, 28,
- -1, 33, 115, 32, 114, 27, 32, -1, 41, 9,
- 17, 115, 42, 115, 32, 114, 27, 32, -1, 90,
- -1, 34, -1, 67, 9, -1, 69, 9, -1, 68,
- 9, 49, 123, -1, 68, 9, 49, 121, -1, 9,
- 171, 160, 161, 162, -1, 158, -1, 159, 108, 158,
- -1, -1, 105, 3, 106, -1, -1, 89, -1, -1,
- 12, 8, -1, -1, 61, -1, 56, 57, 9, 105,
- 159, 106, 163, -1, 9, -1, 165, 108, 9, -1,
- -1, 59, -1, -1, 60, -1, 56, 166, 167, 58,
- 9, 62, 9, 105, 165, 106, -1, 86, 88, -1,
- 87, 88, -1, 21, -1, 22, -1, 24, -1, 19,
- -1, 20, -1, 9, 17, 171, -1, 9, 18, 171,
- -1, -1, 172, -1, 173, 108, 172, -1, 9, 171,
- 104, -1, -1, 174, -1, 175, 174, -1, 64, 65,
- 9, 25, 133, 104, -1, 64, 91, 9, 104, -1,
- 176, -1, 177, -1, -1, 178, -1, 179, 178, -1,
- 16, 9, 105, 173, 106, 25, 175, 179, 26, 114,
- 27, -1
-};
-
-/* YYRLINE[YYN] -- source line where rule number YYN was defined. */
-static const unsigned short int yyrline[] =
-{
- 0, 138, 138, 141, 142, 143, 144, 145, 146, 147,
- 148, 149, 150, 151, 152, 153, 154, 155, 156, 157,
- 158, 159, 160, 161, 162, 166, 167, 172, 173, 175,
- 176, 177, 178, 179, 180, 181, 182, 183, 184, 185,
- 186, 187, 188, 189, 190, 191, 192, 193, 194, 195,
- 196, 197, 199, 204, 205, 206, 207, 209, 210, 211,
- 212, 213, 214, 215, 218, 220, 221, 225, 230, 235,
- 236, 237, 241, 245, 246, 251, 252, 253, 258, 259,
- 260, 264, 265, 270, 276, 283, 284, 285, 290, 292,
- 294, 298, 299, 303, 304, 309, 310, 315, 316, 317,
- 321, 322, 327, 337, 342, 344, 349, 353, 354, 359,
- 365, 372, 377, 382, 388, 393, 398, 403, 408, 414,
- 415, 420, 421, 423, 427, 434, 440, 448, 452, 456,
- 462, 468, 470, 475, 480, 481, 486, 487, 492, 493,
- 499, 500, 506, 507, 513, 519, 520, 525, 526, 530,
- 531, 535, 543, 548, 553, 554, 555, 556, 557, 561,
- 564, 570, 571, 572, 577, 581, 583, 584, 588, 594,
- 599, 600, 603, 605, 606, 610
-};
-#endif
-
-#if YYDEBUG || YYERROR_VERBOSE
-/* YYTNME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
- First, the terminals, then, starting at YYNTOKENS, nonterminals. */
-static const char *const yytname[] =
-{
- "$end", "error", "$undefined", "PARS_INT_LIT", "PARS_FLOAT_LIT",
- "PARS_STR_LIT", "PARS_FIXBINARY_LIT", "PARS_BLOB_LIT", "PARS_NULL_LIT",
- "PARS_ID_TOKEN", "PARS_AND_TOKEN", "PARS_OR_TOKEN", "PARS_NOT_TOKEN",
- "PARS_GE_TOKEN", "PARS_LE_TOKEN", "PARS_NE_TOKEN",
- "PARS_PROCEDURE_TOKEN", "PARS_IN_TOKEN", "PARS_OUT_TOKEN",
- "PARS_BINARY_TOKEN", "PARS_BLOB_TOKEN", "PARS_INT_TOKEN",
- "PARS_INTEGER_TOKEN", "PARS_FLOAT_TOKEN", "PARS_CHAR_TOKEN",
- "PARS_IS_TOKEN", "PARS_BEGIN_TOKEN", "PARS_END_TOKEN", "PARS_IF_TOKEN",
- "PARS_THEN_TOKEN", "PARS_ELSE_TOKEN", "PARS_ELSIF_TOKEN",
- "PARS_LOOP_TOKEN", "PARS_WHILE_TOKEN", "PARS_RETURN_TOKEN",
- "PARS_SELECT_TOKEN", "PARS_SUM_TOKEN", "PARS_COUNT_TOKEN",
- "PARS_DISTINCT_TOKEN", "PARS_FROM_TOKEN", "PARS_WHERE_TOKEN",
- "PARS_FOR_TOKEN", "PARS_DDOT_TOKEN", "PARS_READ_TOKEN",
- "PARS_ORDER_TOKEN", "PARS_BY_TOKEN", "PARS_ASC_TOKEN", "PARS_DESC_TOKEN",
- "PARS_INSERT_TOKEN", "PARS_INTO_TOKEN", "PARS_VALUES_TOKEN",
- "PARS_UPDATE_TOKEN", "PARS_SET_TOKEN", "PARS_DELETE_TOKEN",
- "PARS_CURRENT_TOKEN", "PARS_OF_TOKEN", "PARS_CREATE_TOKEN",
- "PARS_TABLE_TOKEN", "PARS_INDEX_TOKEN", "PARS_UNIQUE_TOKEN",
- "PARS_CLUSTERED_TOKEN", "PARS_DOES_NOT_FIT_IN_MEM_TOKEN",
- "PARS_ON_TOKEN", "PARS_ASSIGN_TOKEN", "PARS_DECLARE_TOKEN",
- "PARS_CURSOR_TOKEN", "PARS_SQL_TOKEN", "PARS_OPEN_TOKEN",
- "PARS_FETCH_TOKEN", "PARS_CLOSE_TOKEN", "PARS_NOTFOUND_TOKEN",
- "PARS_TO_CHAR_TOKEN", "PARS_TO_NUMBER_TOKEN", "PARS_TO_BINARY_TOKEN",
- "PARS_BINARY_TO_NUMBER_TOKEN", "PARS_SUBSTR_TOKEN", "PARS_REPLSTR_TOKEN",
- "PARS_CONCAT_TOKEN", "PARS_INSTR_TOKEN", "PARS_LENGTH_TOKEN",
- "PARS_SYSDATE_TOKEN", "PARS_PRINTF_TOKEN", "PARS_ASSERT_TOKEN",
- "PARS_RND_TOKEN", "PARS_RND_STR_TOKEN", "PARS_ROW_PRINTF_TOKEN",
- "PARS_COMMIT_TOKEN", "PARS_ROLLBACK_TOKEN", "PARS_WORK_TOKEN",
- "PARS_UNSIGNED_TOKEN", "PARS_EXIT_TOKEN", "PARS_FUNCTION_TOKEN",
- "PARS_LOCK_TOKEN", "PARS_SHARE_TOKEN", "PARS_MODE_TOKEN", "'='", "'<'",
- "'>'", "'-'", "'+'", "'*'", "'/'", "NEG", "'%'", "';'", "'('", "')'",
- "'?'", "','", "'{'", "'}'", "$accept", "top_statement", "statement",
- "statement_list", "exp", "function_name", "question_mark_list",
- "stored_procedure_call", "predefined_procedure_call",
- "predefined_procedure_name", "user_function_call", "table_list",
- "variable_list", "exp_list", "select_item", "select_item_list",
- "select_list", "search_condition", "for_update_clause",
- "lock_shared_clause", "order_direction", "order_by_clause",
- "select_statement", "insert_statement_start", "insert_statement",
- "column_assignment", "column_assignment_list", "cursor_positioned",
- "update_statement_start", "update_statement_searched",
- "update_statement_positioned", "delete_statement_start",
- "delete_statement_searched", "delete_statement_positioned",
- "row_printf_statement", "assignment_statement", "elsif_element",
- "elsif_list", "else_part", "if_statement", "while_statement",
- "for_statement", "exit_statement", "return_statement",
- "open_cursor_statement", "close_cursor_statement", "fetch_statement",
- "column_def", "column_def_list", "opt_column_len", "opt_unsigned",
- "opt_not_null", "not_fit_in_memory", "create_table", "column_list",
- "unique_def", "clustered_def", "create_index", "commit_statement",
- "rollback_statement", "type_name", "parameter_declaration",
- "parameter_declaration_list", "variable_declaration",
- "variable_declaration_list", "cursor_declaration",
- "function_declaration", "declaration", "declaration_list",
- "procedure_definition", 0
-};
-#endif
-
-# ifdef YYPRINT
-/* YYTOKNUM[YYLEX-NUM] -- Internal token number corresponding to
- token YYLEX-NUM. */
-static const unsigned short int yytoknum[] =
-{
- 0, 256, 257, 258, 259, 260, 261, 262, 263, 264,
- 265, 266, 267, 268, 269, 270, 271, 272, 273, 274,
- 275, 276, 277, 278, 279, 280, 281, 282, 283, 284,
- 285, 286, 287, 288, 289, 290, 291, 292, 293, 294,
- 295, 296, 297, 298, 299, 300, 301, 302, 303, 304,
- 305, 306, 307, 308, 309, 310, 311, 312, 313, 314,
- 315, 316, 317, 318, 319, 320, 321, 322, 323, 324,
- 325, 326, 327, 328, 329, 330, 331, 332, 333, 334,
- 335, 336, 337, 338, 339, 340, 341, 342, 343, 344,
- 345, 346, 347, 348, 349, 61, 60, 62, 45, 43,
- 42, 47, 350, 37, 59, 40, 41, 63, 44, 123,
- 125
-};
-# endif
-
-/* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */
-static const unsigned char yyr1[] =
-{
- 0, 111, 112, 113, 113, 113, 113, 113, 113, 113,
- 113, 113, 113, 113, 113, 113, 113, 113, 113, 113,
- 113, 113, 113, 113, 113, 114, 114, 115, 115, 115,
- 115, 115, 115, 115, 115, 115, 115, 115, 115, 115,
- 115, 115, 115, 115, 115, 115, 115, 115, 115, 115,
- 115, 115, 115, 116, 116, 116, 116, 116, 116, 116,
- 116, 116, 116, 116, 117, 117, 117, 118, 119, 120,
- 120, 120, 121, 122, 122, 123, 123, 123, 124, 124,
- 124, 125, 125, 125, 125, 126, 126, 126, 127, 127,
- 127, 128, 128, 129, 129, 130, 130, 131, 131, 131,
- 132, 132, 133, 134, 135, 135, 136, 137, 137, 138,
- 139, 140, 141, 142, 143, 144, 145, 146, 147, 148,
- 148, 149, 149, 149, 150, 151, 152, 153, 154, 155,
- 156, 157, 157, 158, 159, 159, 160, 160, 161, 161,
- 162, 162, 163, 163, 164, 165, 165, 166, 166, 167,
- 167, 168, 169, 170, 171, 171, 171, 171, 171, 172,
- 172, 173, 173, 173, 174, 175, 175, 175, 176, 177,
- 178, 178, 179, 179, 179, 180
-};
-
-/* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */
-static const unsigned char yyr2[] =
-{
- 0, 2, 2, 1, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 1, 2, 1, 4, 1,
- 1, 1, 1, 1, 1, 1, 3, 3, 3, 3,
- 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 2, 3, 3, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 0, 1, 3, 6, 4, 1,
- 1, 1, 3, 1, 3, 0, 1, 3, 0, 1,
- 3, 1, 4, 5, 4, 0, 1, 3, 1, 3,
- 1, 0, 2, 0, 2, 0, 4, 0, 1, 1,
- 0, 4, 8, 3, 5, 2, 3, 1, 3, 4,
- 4, 2, 2, 3, 2, 2, 2, 3, 4, 1,
- 2, 0, 2, 1, 7, 6, 10, 1, 1, 2,
- 2, 4, 4, 5, 1, 3, 0, 3, 0, 1,
- 0, 2, 0, 1, 7, 1, 3, 0, 1, 0,
- 1, 10, 2, 2, 1, 1, 1, 1, 1, 3,
- 3, 0, 1, 3, 3, 0, 1, 2, 6, 4,
- 1, 1, 0, 1, 2, 11
-};
-
-/* YYDEFACT[STATE-NAME] -- Default rule to reduce with in state
- STATE-NUM when YYTABLE doesn't specify something else to do. Zero
- means the default is an error. */
-static const unsigned char yydefact[] =
-{
- 0, 0, 0, 0, 0, 1, 2, 161, 0, 162,
- 0, 0, 0, 0, 0, 157, 158, 154, 155, 156,
- 159, 160, 165, 163, 0, 166, 172, 0, 0, 167,
- 170, 171, 173, 0, 164, 0, 0, 0, 174, 0,
- 0, 0, 0, 0, 128, 85, 0, 0, 0, 0,
- 147, 0, 0, 0, 69, 70, 71, 0, 0, 0,
- 127, 0, 25, 0, 3, 0, 0, 0, 0, 0,
- 91, 0, 0, 91, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 169, 0, 29, 30, 31, 32, 33, 34, 27,
- 0, 35, 53, 54, 55, 56, 57, 58, 59, 60,
- 61, 62, 63, 0, 0, 0, 0, 0, 0, 0,
- 88, 81, 86, 90, 0, 0, 0, 0, 0, 0,
- 148, 149, 129, 0, 130, 116, 152, 153, 0, 175,
- 26, 4, 78, 11, 0, 105, 12, 0, 111, 112,
- 16, 17, 114, 115, 14, 15, 13, 10, 8, 5,
- 6, 7, 9, 18, 20, 19, 23, 24, 21, 22,
- 0, 117, 0, 50, 0, 40, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 78, 0, 0, 0, 75, 0, 0, 0, 103, 0,
- 113, 0, 150, 0, 75, 64, 79, 0, 78, 0,
- 92, 168, 51, 52, 41, 48, 49, 45, 46, 47,
- 121, 42, 43, 44, 37, 36, 38, 39, 0, 0,
- 0, 0, 0, 76, 89, 87, 73, 91, 0, 0,
- 107, 110, 0, 0, 76, 132, 131, 65, 0, 68,
- 0, 0, 0, 0, 0, 119, 123, 0, 28, 0,
- 84, 0, 82, 0, 0, 0, 93, 0, 0, 0,
- 0, 134, 0, 0, 0, 0, 0, 80, 104, 109,
- 122, 0, 120, 0, 125, 83, 77, 74, 0, 95,
- 0, 106, 108, 136, 142, 0, 0, 72, 67, 66,
- 0, 124, 94, 0, 100, 0, 0, 138, 143, 144,
- 135, 0, 118, 0, 0, 102, 0, 0, 139, 140,
- 0, 0, 0, 0, 137, 0, 133, 145, 0, 96,
- 97, 126, 141, 151, 0, 98, 99, 101, 146
-};
-
-/* YYDEFGOTO[NTERM-NUM]. */
-static const short int yydefgoto[] =
-{
- -1, 2, 62, 63, 206, 116, 248, 64, 65, 66,
- 245, 237, 234, 207, 122, 123, 124, 148, 289, 304,
- 337, 315, 67, 68, 69, 240, 241, 149, 70, 71,
- 72, 73, 74, 75, 76, 77, 255, 256, 257, 78,
- 79, 80, 81, 82, 83, 84, 85, 271, 272, 307,
- 319, 326, 309, 86, 328, 131, 203, 87, 88, 89,
- 20, 9, 10, 25, 26, 30, 31, 32, 33, 3
-};
-
-/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
- STATE-NUM. */
-#define YYPACT_NINF -177
-static const short int yypact[] =
-{
- 28, 38, 54, -46, -29, -177, -177, 56, 50, -177,
- -75, 8, 8, 46, 56, -177, -177, -177, -177, -177,
- -177, -177, 63, -177, 8, -177, 2, -26, -51, -177,
- -177, -177, -177, -13, -177, 71, 72, 587, -177, 57,
- -21, 26, 272, 272, -177, 13, 91, 55, 96, 67,
- -22, 99, 100, 103, -177, -177, -177, 75, 29, 35,
- -177, 116, -177, 396, -177, 22, 23, 27, -9, 30,
- 87, 31, 32, 87, 47, 49, 52, 58, 59, 60,
- 61, 62, 65, 66, 74, 77, 78, 86, 89, 102,
- 75, -177, 272, -177, -177, -177, -177, -177, -177, 39,
- 272, 51, -177, -177, -177, -177, -177, -177, -177, -177,
- -177, -177, -177, 272, 272, 361, 25, 489, 45, 90,
- -177, 651, -177, -39, 93, 142, 124, 108, 152, 170,
- -177, 131, -177, 143, -177, -177, -177, -177, 98, -177,
- -177, -177, 272, -177, 110, -177, -177, 256, -177, -177,
- -177, -177, -177, -177, -177, -177, -177, -177, -177, -177,
- -177, -177, -177, -177, -177, -177, -177, -177, -177, -177,
- 112, 651, 137, 101, 147, 204, 88, 272, 272, 272,
- 272, 272, 587, 272, 272, 272, 272, 272, 272, 272,
- 272, 587, 272, -30, 211, 168, 212, 272, -177, 213,
- -177, 118, -177, 167, 217, 122, 651, -63, 272, 175,
- 651, -177, -177, -177, -177, 101, 101, 21, 21, 651,
- 332, 21, 21, 21, -6, -6, 204, 204, -60, 460,
- 198, 222, 126, -177, 125, -177, -177, -33, 584, 140,
- -177, 128, 228, 229, 139, -177, 125, -177, -53, -177,
- 272, -49, 240, 587, 272, -177, 224, 226, -177, 225,
- -177, 150, -177, 258, 272, 260, 230, 272, 272, 213,
- 8, -177, -45, 208, 166, 164, 176, 651, -177, -177,
- 587, 631, -177, 254, -177, -177, -177, -177, 234, 194,
- 638, 651, -177, 182, 227, 228, 280, -177, -177, -177,
- 587, -177, -177, 273, 247, 587, 289, 214, -177, -177,
- -177, 195, 587, 209, 261, -177, 524, 199, -177, 295,
- 292, 215, 299, 279, -177, 304, -177, -177, -44, -177,
- -8, -177, -177, -177, 305, -177, -177, -177, -177
-};
-
-/* YYPGOTO[NTERM-NUM]. */
-static const short int yypgoto[] =
-{
- -177, -177, -62, -176, -40, -177, -177, -177, -177, -177,
- -177, -177, 109, -166, 120, -177, -177, -69, -177, -177,
- -177, -177, -34, -177, -177, 48, -177, 243, -177, -177,
- -177, -177, -177, -177, -177, -177, 64, -177, -177, -177,
- -177, -177, -177, -177, -177, -177, -177, 24, -177, -177,
- -177, -177, -177, -177, -177, -177, -177, -177, -177, -177,
- -12, 307, -177, 297, -177, -177, -177, 285, -177, -177
-};
-
-/* YYTABLE[YYPACT[STATE-NUM]]. What to do in state STATE-NUM. If
- positive, shift that token. If negative, reduce the rule which
- number is the opposite. If zero, do what YYDEFACT says.
- If YYTABLE_NINF, syntax error. */
-#define YYTABLE_NINF -1
-static const unsigned short int yytable[] =
-{
- 21, 140, 115, 117, 152, 121, 220, 264, 231, 181,
- 194, 24, 27, 37, 35, 229, 93, 94, 95, 96,
- 97, 98, 99, 135, 228, 100, 45, 15, 16, 17,
- 18, 13, 19, 14, 145, 129, 181, 130, 335, 336,
- 36, 144, 251, 249, 1, 250, 258, 4, 250, 118,
- 119, 28, 171, 275, 5, 276, 170, 278, 6, 250,
- 173, 294, 333, 295, 334, 8, 28, 11, 12, 195,
- 232, 22, 24, 175, 176, 265, 7, 280, 34, 101,
- 39, 40, 90, 91, 102, 103, 104, 105, 106, 92,
- 107, 108, 109, 110, 188, 189, 111, 112, 177, 178,
- 125, 179, 180, 181, 126, 127, 128, 210, 132, 133,
- 45, 113, 134, 120, 179, 180, 181, 136, 114, 186,
- 187, 188, 189, 137, 312, 138, 141, 147, 142, 316,
- 190, 143, 196, 198, 146, 150, 151, 215, 216, 217,
- 218, 219, 172, 221, 222, 223, 224, 225, 226, 227,
- 192, 154, 230, 155, 174, 121, 156, 238, 140, 197,
- 199, 200, 157, 158, 159, 160, 161, 140, 266, 162,
- 163, 93, 94, 95, 96, 97, 98, 99, 164, 201,
- 100, 165, 166, 183, 184, 185, 186, 187, 188, 189,
- 167, 202, 204, 168, 214, 193, 183, 184, 185, 186,
- 187, 188, 189, 205, 118, 119, 169, 212, 177, 178,
- 277, 179, 180, 181, 281, 208, 211, 213, 140, 181,
- 233, 236, 239, 242, 210, 243, 244, 290, 291, 247,
- 252, 261, 262, 263, 101, 268, 269, 270, 273, 102,
- 103, 104, 105, 106, 274, 107, 108, 109, 110, 279,
- 140, 111, 112, 283, 140, 254, 285, 284, 293, 93,
- 94, 95, 96, 97, 98, 99, 113, 286, 100, 287,
- 296, 288, 297, 114, 298, 93, 94, 95, 96, 97,
- 98, 99, 301, 299, 100, 302, 303, 306, 308, 311,
- 313, 314, 317, 183, 184, 185, 186, 187, 188, 189,
- 320, 327, 321, 318, 260, 324, 322, 325, 330, 329,
- 209, 331, 332, 246, 338, 235, 153, 292, 38, 310,
- 282, 23, 101, 29, 0, 0, 0, 102, 103, 104,
- 105, 106, 0, 107, 108, 109, 110, 0, 101, 111,
- 112, 41, 0, 102, 103, 104, 105, 106, 0, 107,
- 108, 109, 110, 0, 113, 111, 112, 0, 0, 0,
- 42, 114, 253, 254, 0, 43, 44, 45, 0, 0,
- 113, 177, 178, 46, 179, 180, 181, 114, 0, 0,
- 47, 0, 0, 48, 0, 49, 0, 0, 50, 0,
- 182, 0, 0, 0, 0, 0, 0, 0, 0, 51,
- 52, 53, 0, 0, 0, 41, 0, 0, 54, 0,
- 0, 0, 0, 55, 56, 0, 0, 57, 58, 59,
- 0, 0, 60, 139, 42, 0, 0, 0, 0, 43,
- 44, 45, 0, 0, 0, 0, 0, 46, 0, 0,
- 0, 61, 0, 0, 47, 0, 0, 48, 0, 49,
- 0, 0, 50, 0, 0, 0, 183, 184, 185, 186,
- 187, 188, 189, 51, 52, 53, 0, 0, 0, 41,
- 0, 0, 54, 0, 0, 0, 0, 55, 56, 0,
- 0, 57, 58, 59, 0, 0, 60, 259, 42, 0,
- 0, 0, 0, 43, 44, 45, 0, 0, 0, 177,
- 178, 46, 179, 180, 181, 61, 0, 0, 47, 0,
- 0, 48, 0, 49, 0, 0, 50, 0, 0, 0,
- 0, 191, 0, 0, 0, 0, 0, 51, 52, 53,
- 0, 0, 0, 41, 0, 0, 54, 0, 0, 0,
- 0, 55, 56, 0, 0, 57, 58, 59, 0, 0,
- 60, 323, 42, 0, 0, 0, 0, 43, 44, 45,
- 0, 0, 0, 0, 0, 46, 0, 0, 0, 61,
- 0, 0, 47, 0, 0, 48, 0, 49, 0, 0,
- 50, 0, 0, 0, 183, 184, 185, 186, 187, 188,
- 189, 51, 52, 53, 177, 178, 41, 179, 180, 181,
- 54, 0, 0, 0, 0, 55, 56, 0, 0, 57,
- 58, 59, 0, 0, 60, 42, 0, 0, 0, 0,
- 43, 44, 45, 0, 0, 0, 267, 0, 46, 0,
- 0, 0, 0, 61, 0, 47, 0, 0, 48, 0,
- 49, 177, 178, 50, 179, 180, 181, 0, 177, 178,
- 0, 179, 180, 181, 51, 52, 53, 0, 0, 0,
- 300, 177, 178, 54, 179, 180, 181, 0, 55, 56,
- 305, 0, 57, 58, 59, 0, 0, 60, 0, 183,
- 184, 185, 186, 187, 188, 189, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 61, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 183, 184, 185, 186,
- 187, 188, 189, 183, 184, 185, 186, 187, 188, 189,
- 0, 0, 0, 0, 0, 0, 183, 184, 185, 186,
- 187, 188, 189
-};
-
-static const short int yycheck[] =
-{
- 12, 63, 42, 43, 73, 45, 182, 40, 38, 15,
- 49, 9, 24, 26, 65, 191, 3, 4, 5, 6,
- 7, 8, 9, 57, 190, 12, 35, 19, 20, 21,
- 22, 106, 24, 108, 68, 57, 15, 59, 46, 47,
- 91, 50, 208, 106, 16, 108, 106, 9, 108, 36,
- 37, 64, 92, 106, 0, 108, 90, 106, 104, 108,
- 100, 106, 106, 108, 108, 9, 64, 17, 18, 108,
- 100, 25, 9, 113, 114, 108, 105, 253, 104, 66,
- 9, 9, 25, 104, 71, 72, 73, 74, 75, 63,
- 77, 78, 79, 80, 100, 101, 83, 84, 10, 11,
- 9, 13, 14, 15, 49, 9, 39, 147, 9, 9,
- 35, 98, 9, 100, 13, 14, 15, 88, 105, 98,
- 99, 100, 101, 88, 300, 9, 104, 40, 105, 305,
- 105, 104, 39, 9, 104, 104, 104, 177, 178, 179,
- 180, 181, 103, 183, 184, 185, 186, 187, 188, 189,
- 105, 104, 192, 104, 103, 195, 104, 197, 220, 17,
- 52, 9, 104, 104, 104, 104, 104, 229, 237, 104,
- 104, 3, 4, 5, 6, 7, 8, 9, 104, 9,
- 12, 104, 104, 95, 96, 97, 98, 99, 100, 101,
- 104, 60, 49, 104, 106, 105, 95, 96, 97, 98,
- 99, 100, 101, 105, 36, 37, 104, 70, 10, 11,
- 250, 13, 14, 15, 254, 105, 104, 70, 280, 15,
- 9, 9, 9, 105, 264, 58, 9, 267, 268, 107,
- 55, 9, 106, 108, 66, 95, 108, 9, 9, 71,
- 72, 73, 74, 75, 105, 77, 78, 79, 80, 9,
- 312, 83, 84, 27, 316, 31, 106, 32, 270, 3,
- 4, 5, 6, 7, 8, 9, 98, 9, 12, 9,
- 62, 41, 106, 105, 110, 3, 4, 5, 6, 7,
- 8, 9, 28, 107, 12, 51, 92, 105, 61, 9,
- 17, 44, 3, 95, 96, 97, 98, 99, 100, 101,
- 105, 9, 93, 89, 106, 106, 45, 12, 9, 94,
- 54, 32, 8, 204, 9, 195, 73, 269, 33, 295,
- 256, 14, 66, 26, -1, -1, -1, 71, 72, 73,
- 74, 75, -1, 77, 78, 79, 80, -1, 66, 83,
- 84, 9, -1, 71, 72, 73, 74, 75, -1, 77,
- 78, 79, 80, -1, 98, 83, 84, -1, -1, -1,
- 28, 105, 30, 31, -1, 33, 34, 35, -1, -1,
- 98, 10, 11, 41, 13, 14, 15, 105, -1, -1,
- 48, -1, -1, 51, -1, 53, -1, -1, 56, -1,
- 29, -1, -1, -1, -1, -1, -1, -1, -1, 67,
- 68, 69, -1, -1, -1, 9, -1, -1, 76, -1,
- -1, -1, -1, 81, 82, -1, -1, 85, 86, 87,
- -1, -1, 90, 27, 28, -1, -1, -1, -1, 33,
- 34, 35, -1, -1, -1, -1, -1, 41, -1, -1,
- -1, 109, -1, -1, 48, -1, -1, 51, -1, 53,
- -1, -1, 56, -1, -1, -1, 95, 96, 97, 98,
- 99, 100, 101, 67, 68, 69, -1, -1, -1, 9,
- -1, -1, 76, -1, -1, -1, -1, 81, 82, -1,
- -1, 85, 86, 87, -1, -1, 90, 27, 28, -1,
- -1, -1, -1, 33, 34, 35, -1, -1, -1, 10,
- 11, 41, 13, 14, 15, 109, -1, -1, 48, -1,
- -1, 51, -1, 53, -1, -1, 56, -1, -1, -1,
- -1, 32, -1, -1, -1, -1, -1, 67, 68, 69,
- -1, -1, -1, 9, -1, -1, 76, -1, -1, -1,
- -1, 81, 82, -1, -1, 85, 86, 87, -1, -1,
- 90, 27, 28, -1, -1, -1, -1, 33, 34, 35,
- -1, -1, -1, -1, -1, 41, -1, -1, -1, 109,
- -1, -1, 48, -1, -1, 51, -1, 53, -1, -1,
- 56, -1, -1, -1, 95, 96, 97, 98, 99, 100,
- 101, 67, 68, 69, 10, 11, 9, 13, 14, 15,
- 76, -1, -1, -1, -1, 81, 82, -1, -1, 85,
- 86, 87, -1, -1, 90, 28, -1, -1, -1, -1,
- 33, 34, 35, -1, -1, -1, 42, -1, 41, -1,
- -1, -1, -1, 109, -1, 48, -1, -1, 51, -1,
- 53, 10, 11, 56, 13, 14, 15, -1, 10, 11,
- -1, 13, 14, 15, 67, 68, 69, -1, -1, -1,
- 29, 10, 11, 76, 13, 14, 15, -1, 81, 82,
- 32, -1, 85, 86, 87, -1, -1, 90, -1, 95,
- 96, 97, 98, 99, 100, 101, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, 109, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, 95, 96, 97, 98,
- 99, 100, 101, 95, 96, 97, 98, 99, 100, 101,
- -1, -1, -1, -1, -1, -1, 95, 96, 97, 98,
- 99, 100, 101
-};
-
-/* YYSTOS[STATE-NUM] -- The (internal number of the) accessing
- symbol of state STATE-NUM. */
-static const unsigned char yystos[] =
-{
- 0, 16, 112, 180, 9, 0, 104, 105, 9, 172,
- 173, 17, 18, 106, 108, 19, 20, 21, 22, 24,
- 171, 171, 25, 172, 9, 174, 175, 171, 64, 174,
- 176, 177, 178, 179, 104, 65, 91, 26, 178, 9,
- 9, 9, 28, 33, 34, 35, 41, 48, 51, 53,
- 56, 67, 68, 69, 76, 81, 82, 85, 86, 87,
- 90, 109, 113, 114, 118, 119, 120, 133, 134, 135,
- 139, 140, 141, 142, 143, 144, 145, 146, 150, 151,
- 152, 153, 154, 155, 156, 157, 164, 168, 169, 170,
- 25, 104, 63, 3, 4, 5, 6, 7, 8, 9,
- 12, 66, 71, 72, 73, 74, 75, 77, 78, 79,
- 80, 83, 84, 98, 105, 115, 116, 115, 36, 37,
- 100, 115, 125, 126, 127, 9, 49, 9, 39, 57,
- 59, 166, 9, 9, 9, 133, 88, 88, 9, 27,
- 113, 104, 105, 104, 50, 133, 104, 40, 128, 138,
- 104, 104, 128, 138, 104, 104, 104, 104, 104, 104,
- 104, 104, 104, 104, 104, 104, 104, 104, 104, 104,
- 133, 115, 103, 115, 103, 115, 115, 10, 11, 13,
- 14, 15, 29, 95, 96, 97, 98, 99, 100, 101,
- 105, 32, 105, 105, 49, 108, 39, 17, 9, 52,
- 9, 9, 60, 167, 49, 105, 115, 124, 105, 54,
- 115, 104, 70, 70, 106, 115, 115, 115, 115, 115,
- 114, 115, 115, 115, 115, 115, 115, 115, 124, 114,
- 115, 38, 100, 9, 123, 125, 9, 122, 115, 9,
- 136, 137, 105, 58, 9, 121, 123, 107, 117, 106,
- 108, 124, 55, 30, 31, 147, 148, 149, 106, 27,
- 106, 9, 106, 108, 40, 108, 128, 42, 95, 108,
- 9, 158, 159, 9, 105, 106, 108, 115, 106, 9,
- 114, 115, 147, 27, 32, 106, 9, 9, 41, 129,
- 115, 115, 136, 171, 106, 108, 62, 106, 110, 107,
- 29, 28, 51, 92, 130, 32, 105, 160, 61, 163,
- 158, 9, 114, 17, 44, 132, 114, 3, 89, 161,
- 105, 93, 45, 27, 106, 12, 162, 9, 165, 94,
- 9, 32, 8, 106, 108, 46, 47, 131, 9
-};
-
-#if ! defined (YYSIZE_T) && defined (__SIZE_TYPE__)
-# define YYSIZE_T __SIZE_TYPE__
-#endif
-#if ! defined (YYSIZE_T) && defined (size_t)
-# define YYSIZE_T size_t
-#endif
-#if ! defined (YYSIZE_T)
-# if defined (__STDC__) || defined (__cplusplus)
-# include <stddef.h> /* INFRINGES ON USER NAME SPACE */
-# define YYSIZE_T size_t
-# endif
-#endif
-#if ! defined (YYSIZE_T)
-# define YYSIZE_T unsigned int
-#endif
-
-#define yyerrok (yyerrstatus = 0)
-#define yyclearin (yychar = YYEMPTY)
-#define YYEMPTY (-2)
-#define YYEOF 0
-
-#define YYACCEPT goto yyacceptlab
-#define YYABORT goto yyabortlab
-#define YYERROR goto yyerrorlab
-
-
-/* Like YYERROR except do call yyerror. This remains here temporarily
- to ease the transition to the new meaning of YYERROR, for GCC.
- Once GCC version 2 has supplanted version 1, this can go. */
-
-#define YYFAIL goto yyerrlab
-
-#define YYRECOVERING() (!!yyerrstatus)
-
-#define YYBACKUP(Token, Value) \
-do \
- if (yychar == YYEMPTY && yylen == 1) \
- { \
- yychar = (Token); \
- yylval = (Value); \
- yytoken = YYTRANSLATE (yychar); \
- YYPOPSTACK; \
- goto yybackup; \
- } \
- else \
- { \
- yyerror ("syntax error: cannot back up");\
- YYERROR; \
- } \
-while (0)
-
-#define YYTERROR 1
-#define YYERRCODE 256
-
-/* YYLLOC_DEFAULT -- Compute the default location (before the actions
- are run). */
-
-#ifndef YYLLOC_DEFAULT
-# define YYLLOC_DEFAULT(Current, Rhs, N) \
- ((Current).first_line = (Rhs)[1].first_line, \
- (Current).first_column = (Rhs)[1].first_column, \
- (Current).last_line = (Rhs)[N].last_line, \
- (Current).last_column = (Rhs)[N].last_column)
-#endif
-
-/* YYLEX -- calling `yylex' with the right arguments. */
-
-#ifdef YYLEX_PARAM
-# define YYLEX yylex (YYLEX_PARAM)
-#else
-# define YYLEX yylex ()
-#endif
-
-/* Enable debugging if requested. */
-#if YYDEBUG
-
-# ifndef YYFPRINTF
-# include <stdio.h> /* INFRINGES ON USER NAME SPACE */
-# define YYFPRINTF fprintf
-# endif
-
-# define YYDPRINTF(Args) \
-do { \
- if (yydebug) \
- YYFPRINTF Args; \
-} while (0)
-
-# define YYDSYMPRINT(Args) \
-do { \
- if (yydebug) \
- yysymprint Args; \
-} while (0)
-
-# define YYDSYMPRINTF(Title, Token, Value, Location) \
-do { \
- if (yydebug) \
- { \
- YYFPRINTF (stderr, "%s ", Title); \
- yysymprint (stderr, \
- Token, Value); \
- YYFPRINTF (stderr, "\n"); \
- } \
-} while (0)
-
-/*------------------------------------------------------------------.
-| yy_stack_print -- Print the state stack from its BOTTOM up to its |
-| TOP (included). |
-`------------------------------------------------------------------*/
-
-#if defined (__STDC__) || defined (__cplusplus)
-static void
-yy_stack_print (short int *bottom, short int *top)
-#else
-static void
-yy_stack_print (bottom, top)
- short int *bottom;
- short int *top;
-#endif
-{
- YYFPRINTF (stderr, "Stack now");
- for (/* Nothing. */; bottom <= top; ++bottom)
- YYFPRINTF (stderr, " %d", *bottom);
- YYFPRINTF (stderr, "\n");
-}
-
-# define YY_STACK_PRINT(Bottom, Top) \
-do { \
- if (yydebug) \
- yy_stack_print ((Bottom), (Top)); \
-} while (0)
-
-
-/*------------------------------------------------.
-| Report that the YYRULE is going to be reduced. |
-`------------------------------------------------*/
-
-#if defined (__STDC__) || defined (__cplusplus)
-static void
-yy_reduce_print (int yyrule)
-#else
-static void
-yy_reduce_print (yyrule)
- int yyrule;
-#endif
-{
- int yyi;
- unsigned int yylno = yyrline[yyrule];
- YYFPRINTF (stderr, "Reducing stack by rule %d (line %u), ",
- yyrule - 1, yylno);
- /* Print the symbols being reduced, and their result. */
- for (yyi = yyprhs[yyrule]; 0 <= yyrhs[yyi]; yyi++)
- YYFPRINTF (stderr, "%s ", yytname [yyrhs[yyi]]);
- YYFPRINTF (stderr, "-> %s\n", yytname [yyr1[yyrule]]);
-}
-
-# define YY_REDUCE_PRINT(Rule) \
-do { \
- if (yydebug) \
- yy_reduce_print (Rule); \
-} while (0)
-
-/* Nonzero means print parse trace. It is left uninitialized so that
- multiple parsers can coexist. */
-int yydebug;
-#else /* !YYDEBUG */
-# define YYDPRINTF(Args)
-# define YYDSYMPRINT(Args)
-# define YYDSYMPRINTF(Title, Token, Value, Location)
-# define YY_STACK_PRINT(Bottom, Top)
-# define YY_REDUCE_PRINT(Rule)
-#endif /* !YYDEBUG */
-
-
-/* YYINITDEPTH -- initial size of the parser's stacks. */
-#ifndef YYINITDEPTH
-# define YYINITDEPTH 200
-#endif
-
-/* YYMAXDEPTH -- maximum size the stacks can grow to (effective only
- if the built-in stack extension method is used).
-
- Do not make this value too large; the results are undefined if
- SIZE_MAX < YYSTACK_BYTES (YYMAXDEPTH)
- evaluated with infinite-precision integer arithmetic. */
-
-#if defined (YYMAXDEPTH) && YYMAXDEPTH == 0
-# undef YYMAXDEPTH
-#endif
-
-#ifndef YYMAXDEPTH
-# define YYMAXDEPTH 10000
-#endif
-
-
-
-#if YYERROR_VERBOSE
-
-# ifndef yystrlen
-# if defined (__GLIBC__) && defined (_STRING_H)
-# define yystrlen strlen
-# else
-/* Return the length of YYSTR. */
-static YYSIZE_T
-# if defined (__STDC__) || defined (__cplusplus)
-yystrlen (const char *yystr)
-# else
-yystrlen (yystr)
- const char *yystr;
-# endif
-{
- register const char *yys = yystr;
-
- while (*yys++ != '\0')
- continue;
-
- return yys - yystr - 1;
-}
-# endif
-# endif
-
-# ifndef yystpcpy
-# if defined (__GLIBC__) && defined (_STRING_H) && defined (_GNU_SOURCE)
-# define yystpcpy stpcpy
-# else
-/* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in
- YYDEST. */
-static char *
-# if defined (__STDC__) || defined (__cplusplus)
-yystpcpy (char *yydest, const char *yysrc)
-# else
-yystpcpy (yydest, yysrc)
- char *yydest;
- const char *yysrc;
-# endif
-{
- register char *yyd = yydest;
- register const char *yys = yysrc;
-
- while ((*yyd++ = *yys++) != '\0')
- continue;
-
- return yyd - 1;
-}
-# endif
-# endif
-
-#endif /* !YYERROR_VERBOSE */
-
-
-
-#if YYDEBUG
-/*--------------------------------.
-| Print this symbol on YYOUTPUT. |
-`--------------------------------*/
-
-#if defined (__STDC__) || defined (__cplusplus)
-static void
-yysymprint (FILE *yyoutput, int yytype, YYSTYPE *yyvaluep)
-#else
-static void
-yysymprint (yyoutput, yytype, yyvaluep)
- FILE *yyoutput;
- int yytype;
- YYSTYPE *yyvaluep;
-#endif
-{
- /* Pacify ``unused variable'' warnings. */
- (void) yyvaluep;
-
- if (yytype < YYNTOKENS)
- {
- YYFPRINTF (yyoutput, "token %s (", yytname[yytype]);
-# ifdef YYPRINT
- YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep);
-# endif
- }
- else
- YYFPRINTF (yyoutput, "nterm %s (", yytname[yytype]);
-
- switch (yytype)
- {
- default:
- break;
- }
- YYFPRINTF (yyoutput, ")");
-}
-
-#endif /* ! YYDEBUG */
-/*-----------------------------------------------.
-| Release the memory associated to this symbol. |
-`-----------------------------------------------*/
-
-#if defined (__STDC__) || defined (__cplusplus)
-static void
-yydestruct (int yytype, YYSTYPE *yyvaluep)
-#else
-static void
-yydestruct (yytype, yyvaluep)
- int yytype;
- YYSTYPE *yyvaluep;
-#endif
-{
- /* Pacify ``unused variable'' warnings. */
- (void) yyvaluep;
-
- switch (yytype)
- {
-
- default:
- break;
- }
-}
-
-
-/* Prevent warnings from -Wmissing-prototypes. */
-
-#ifdef YYPARSE_PARAM
-# if defined (__STDC__) || defined (__cplusplus)
-int yyparse (void *YYPARSE_PARAM);
-# else
-int yyparse ();
-# endif
-#else /* ! YYPARSE_PARAM */
-#if defined (__STDC__) || defined (__cplusplus)
-int yyparse (void);
-#else
-int yyparse ();
-#endif
-#endif /* ! YYPARSE_PARAM */
-
-
-
-/* The lookahead symbol. */
-int yychar;
-
-/* The semantic value of the lookahead symbol. */
-YYSTYPE yylval;
-
-/* Number of syntax errors so far. */
-int yynerrs;
-
-
-
-/*----------.
-| yyparse. |
-`----------*/
-
-#ifdef YYPARSE_PARAM
-# if defined (__STDC__) || defined (__cplusplus)
-int yyparse (void *YYPARSE_PARAM)
-# else
-int yyparse (YYPARSE_PARAM)
- void *YYPARSE_PARAM;
-# endif
-#else /* ! YYPARSE_PARAM */
-#if defined (__STDC__) || defined (__cplusplus)
-int
-yyparse (void)
-#else
-int
-yyparse ()
-
-#endif
-#endif
-{
-
- register int yystate;
- register int yyn;
- int yyresult;
- /* Number of tokens to shift before error messages enabled. */
- int yyerrstatus;
- /* Lookahead token as an internal (translated) token number. */
- int yytoken = 0;
-
- /* Three stacks and their tools:
- `yyss': related to states,
- `yyvs': related to semantic values,
- `yyls': related to locations.
-
- Refer to the stacks thru separate pointers, to allow yyoverflow
- to reallocate them elsewhere. */
-
- /* The state stack. */
- short int yyssa[YYINITDEPTH];
- short int *yyss = yyssa;
- register short int *yyssp;
-
- /* The semantic value stack. */
- YYSTYPE yyvsa[YYINITDEPTH];
- YYSTYPE *yyvs = yyvsa;
- register YYSTYPE *yyvsp;
-
-
-
-#define YYPOPSTACK (yyvsp--, yyssp--)
-
- YYSIZE_T yystacksize = YYINITDEPTH;
-
- /* The variables used to return semantic value and location from the
- action routines. */
- YYSTYPE yyval;
-
-
- /* When reducing, the number of symbols on the RHS of the reduced
- rule. */
- int yylen;
-
- YYDPRINTF ((stderr, "Starting parse\n"));
-
- yystate = 0;
- yyerrstatus = 0;
- yynerrs = 0;
- yychar = YYEMPTY; /* Cause a token to be read. */
-
- /* Initialize stack pointers.
- Waste one element of value and location stack
- so that they stay on the same level as the state stack.
- The wasted elements are never initialized. */
-
- yyssp = yyss;
- yyvsp = yyvs;
-
-
- goto yysetstate;
-
-/*------------------------------------------------------------.
-| yynewstate -- Push a new state, which is found in yystate. |
-`------------------------------------------------------------*/
- yynewstate:
- /* In all cases, when you get here, the value and location stacks
- have just been pushed. so pushing a state here evens the stacks.
- */
- yyssp++;
-
- yysetstate:
- *yyssp = yystate;
-
- if (yyss + yystacksize - 1 <= yyssp)
- {
- /* Get the current used size of the three stacks, in elements. */
- YYSIZE_T yysize = yyssp - yyss + 1;
-
-#ifdef yyoverflow
- {
- /* Give user a chance to reallocate the stack. Use copies of
- these so that the &'s don't force the real ones into
- memory. */
- YYSTYPE *yyvs1 = yyvs;
- short int *yyss1 = yyss;
-
-
- /* Each stack pointer address is followed by the size of the
- data in use in that stack, in bytes. This used to be a
- conditional around just the two extra args, but that might
- be undefined if yyoverflow is a macro. */
- yyoverflow ("parser stack overflow",
- &yyss1, yysize * sizeof (*yyssp),
- &yyvs1, yysize * sizeof (*yyvsp),
-
- &yystacksize);
-
- yyss = yyss1;
- yyvs = yyvs1;
- }
-#else /* no yyoverflow */
-# ifndef YYSTACK_RELOCATE
- goto yyoverflowlab;
-# else
- /* Extend the stack our own way. */
- if (YYMAXDEPTH <= yystacksize)
- goto yyoverflowlab;
- yystacksize *= 2;
- if (YYMAXDEPTH < yystacksize)
- yystacksize = YYMAXDEPTH;
-
- {
- short int *yyss1 = yyss;
- union yyalloc *yyptr =
- (union yyalloc *) YYSTACK_ALLOC (YYSTACK_BYTES (yystacksize));
- if (! yyptr)
- goto yyoverflowlab;
- YYSTACK_RELOCATE (yyss);
- YYSTACK_RELOCATE (yyvs);
-
-# undef YYSTACK_RELOCATE
- if (yyss1 != yyssa)
- YYSTACK_FREE (yyss1);
- }
-# endif
-#endif /* no yyoverflow */
-
- yyssp = yyss + yysize - 1;
- yyvsp = yyvs + yysize - 1;
-
-
- YYDPRINTF ((stderr, "Stack size increased to %lu\n",
- (unsigned long int) yystacksize));
-
- if (yyss + yystacksize - 1 <= yyssp)
- YYABORT;
- }
-
- YYDPRINTF ((stderr, "Entering state %d\n", yystate));
-
- goto yybackup;
-
-/*-----------.
-| yybackup. |
-`-----------*/
-yybackup:
-
-/* Do appropriate processing given the current state. */
-/* Read a lookahead token if we need one and don't already have one. */
-/* yyresume: */
-
- /* First try to decide what to do without reference to lookahead token. */
-
- yyn = yypact[yystate];
- if (yyn == YYPACT_NINF)
- goto yydefault;
-
- /* Not known => get a lookahead token if don't already have one. */
-
- /* YYCHAR is either YYEMPTY or YYEOF or a valid lookahead symbol. */
- if (yychar == YYEMPTY)
- {
- YYDPRINTF ((stderr, "Reading a token: "));
- yychar = YYLEX;
- }
-
- if (yychar <= YYEOF)
- {
- yychar = yytoken = YYEOF;
- YYDPRINTF ((stderr, "Now at end of input.\n"));
- }
- else
- {
- yytoken = YYTRANSLATE (yychar);
- YYDSYMPRINTF ("Next token is", yytoken, &yylval, &yylloc);
- }
-
- /* If the proper action on seeing token YYTOKEN is to reduce or to
- detect an error, take that action. */
- yyn += yytoken;
- if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken)
- goto yydefault;
- yyn = yytable[yyn];
- if (yyn <= 0)
- {
- if (yyn == 0 || yyn == YYTABLE_NINF)
- goto yyerrlab;
- yyn = -yyn;
- goto yyreduce;
- }
-
- if (yyn == YYFINAL)
- YYACCEPT;
-
- /* Shift the lookahead token. */
- YYDPRINTF ((stderr, "Shifting token %s, ", yytname[yytoken]));
-
- /* Discard the token being shifted unless it is eof. */
- if (yychar != YYEOF)
- yychar = YYEMPTY;
-
- *++yyvsp = yylval;
-
-
- /* Count tokens shifted since error; after three, turn off error
- status. */
- if (yyerrstatus)
- yyerrstatus--;
-
- yystate = yyn;
- goto yynewstate;
-
-
-/*-----------------------------------------------------------.
-| yydefault -- do the default action for the current state. |
-`-----------------------------------------------------------*/
-yydefault:
- yyn = yydefact[yystate];
- if (yyn == 0)
- goto yyerrlab;
- goto yyreduce;
-
-
-/*-----------------------------.
-| yyreduce -- Do a reduction. |
-`-----------------------------*/
-yyreduce:
- /* yyn is the number of a rule to reduce with. */
- yylen = yyr2[yyn];
-
- /* If YYLEN is nonzero, implement the default value of the action:
- `$$ = $1'.
-
- Otherwise, the following line sets YYVAL to garbage.
- This behavior is undocumented and Bison
- users should not rely upon it. Assigning to YYVAL
- unconditionally makes the parser a bit smaller, and it avoids a
- GCC warning that YYVAL may be used uninitialized. */
- yyval = yyvsp[1-yylen];
-
-
- YY_REDUCE_PRINT (yyn);
- switch (yyn)
- {
- case 25:
-#line 166 "pars0grm.y"
- { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;}
- break;
-
- case 26:
-#line 168 "pars0grm.y"
- { yyval = que_node_list_add_last(yyvsp[-1], yyvsp[0]); ;}
- break;
-
- case 27:
-#line 172 "pars0grm.y"
- { yyval = yyvsp[0];;}
- break;
-
- case 28:
-#line 174 "pars0grm.y"
- { yyval = pars_func(yyvsp[-3], yyvsp[-1]); ;}
- break;
-
- case 29:
-#line 175 "pars0grm.y"
- { yyval = yyvsp[0];;}
- break;
-
- case 30:
-#line 176 "pars0grm.y"
- { yyval = yyvsp[0];;}
- break;
-
- case 31:
-#line 177 "pars0grm.y"
- { yyval = yyvsp[0];;}
- break;
-
- case 32:
-#line 178 "pars0grm.y"
- { yyval = yyvsp[0];;}
- break;
-
- case 33:
-#line 179 "pars0grm.y"
- { yyval = yyvsp[0];;}
- break;
-
- case 34:
-#line 180 "pars0grm.y"
- { yyval = yyvsp[0];;}
- break;
-
- case 35:
-#line 181 "pars0grm.y"
- { yyval = yyvsp[0];;}
- break;
-
- case 36:
-#line 182 "pars0grm.y"
- { yyval = pars_op('+', yyvsp[-2], yyvsp[0]); ;}
- break;
-
- case 37:
-#line 183 "pars0grm.y"
- { yyval = pars_op('-', yyvsp[-2], yyvsp[0]); ;}
- break;
-
- case 38:
-#line 184 "pars0grm.y"
- { yyval = pars_op('*', yyvsp[-2], yyvsp[0]); ;}
- break;
-
- case 39:
-#line 185 "pars0grm.y"
- { yyval = pars_op('/', yyvsp[-2], yyvsp[0]); ;}
- break;
-
- case 40:
-#line 186 "pars0grm.y"
- { yyval = pars_op('-', yyvsp[0], NULL); ;}
- break;
-
- case 41:
-#line 187 "pars0grm.y"
- { yyval = yyvsp[-1]; ;}
- break;
-
- case 42:
-#line 188 "pars0grm.y"
- { yyval = pars_op('=', yyvsp[-2], yyvsp[0]); ;}
- break;
-
- case 43:
-#line 189 "pars0grm.y"
- { yyval = pars_op('<', yyvsp[-2], yyvsp[0]); ;}
- break;
-
- case 44:
-#line 190 "pars0grm.y"
- { yyval = pars_op('>', yyvsp[-2], yyvsp[0]); ;}
- break;
-
- case 45:
-#line 191 "pars0grm.y"
- { yyval = pars_op(PARS_GE_TOKEN, yyvsp[-2], yyvsp[0]); ;}
- break;
-
- case 46:
-#line 192 "pars0grm.y"
- { yyval = pars_op(PARS_LE_TOKEN, yyvsp[-2], yyvsp[0]); ;}
- break;
-
- case 47:
-#line 193 "pars0grm.y"
- { yyval = pars_op(PARS_NE_TOKEN, yyvsp[-2], yyvsp[0]); ;}
- break;
-
- case 48:
-#line 194 "pars0grm.y"
- { yyval = pars_op(PARS_AND_TOKEN, yyvsp[-2], yyvsp[0]); ;}
- break;
-
- case 49:
-#line 195 "pars0grm.y"
- { yyval = pars_op(PARS_OR_TOKEN, yyvsp[-2], yyvsp[0]); ;}
- break;
-
- case 50:
-#line 196 "pars0grm.y"
- { yyval = pars_op(PARS_NOT_TOKEN, yyvsp[0], NULL); ;}
- break;
-
- case 51:
-#line 198 "pars0grm.y"
- { yyval = pars_op(PARS_NOTFOUND_TOKEN, yyvsp[-2], NULL); ;}
- break;
-
- case 52:
-#line 200 "pars0grm.y"
- { yyval = pars_op(PARS_NOTFOUND_TOKEN, yyvsp[-2], NULL); ;}
- break;
-
- case 53:
-#line 204 "pars0grm.y"
- { yyval = &pars_to_char_token; ;}
- break;
-
- case 54:
-#line 205 "pars0grm.y"
- { yyval = &pars_to_number_token; ;}
- break;
-
- case 55:
-#line 206 "pars0grm.y"
- { yyval = &pars_to_binary_token; ;}
- break;
-
- case 56:
-#line 208 "pars0grm.y"
- { yyval = &pars_binary_to_number_token; ;}
- break;
-
- case 57:
-#line 209 "pars0grm.y"
- { yyval = &pars_substr_token; ;}
- break;
-
- case 58:
-#line 210 "pars0grm.y"
- { yyval = &pars_concat_token; ;}
- break;
-
- case 59:
-#line 211 "pars0grm.y"
- { yyval = &pars_instr_token; ;}
- break;
-
- case 60:
-#line 212 "pars0grm.y"
- { yyval = &pars_length_token; ;}
- break;
-
- case 61:
-#line 213 "pars0grm.y"
- { yyval = &pars_sysdate_token; ;}
- break;
-
- case 62:
-#line 214 "pars0grm.y"
- { yyval = &pars_rnd_token; ;}
- break;
-
- case 63:
-#line 215 "pars0grm.y"
- { yyval = &pars_rnd_str_token; ;}
- break;
-
- case 67:
-#line 226 "pars0grm.y"
- { yyval = pars_stored_procedure_call(yyvsp[-4]); ;}
- break;
-
- case 68:
-#line 231 "pars0grm.y"
- { yyval = pars_procedure_call(yyvsp[-3], yyvsp[-1]); ;}
- break;
-
- case 69:
-#line 235 "pars0grm.y"
- { yyval = &pars_replstr_token; ;}
- break;
-
- case 70:
-#line 236 "pars0grm.y"
- { yyval = &pars_printf_token; ;}
- break;
-
- case 71:
-#line 237 "pars0grm.y"
- { yyval = &pars_assert_token; ;}
- break;
-
- case 72:
-#line 241 "pars0grm.y"
- { yyval = yyvsp[-2]; ;}
- break;
-
- case 73:
-#line 245 "pars0grm.y"
- { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;}
- break;
-
- case 74:
-#line 247 "pars0grm.y"
- { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;}
- break;
-
- case 75:
-#line 251 "pars0grm.y"
- { yyval = NULL; ;}
- break;
-
- case 76:
-#line 252 "pars0grm.y"
- { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;}
- break;
-
- case 77:
-#line 254 "pars0grm.y"
- { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;}
- break;
-
- case 78:
-#line 258 "pars0grm.y"
- { yyval = NULL; ;}
- break;
-
- case 79:
-#line 259 "pars0grm.y"
- { yyval = que_node_list_add_last(NULL, yyvsp[0]);;}
- break;
-
- case 80:
-#line 260 "pars0grm.y"
- { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;}
- break;
-
- case 81:
-#line 264 "pars0grm.y"
- { yyval = yyvsp[0]; ;}
- break;
-
- case 82:
-#line 266 "pars0grm.y"
- { yyval = pars_func(&pars_count_token,
- que_node_list_add_last(NULL,
- sym_tab_add_int_lit(
- pars_sym_tab_global, 1))); ;}
- break;
-
- case 83:
-#line 271 "pars0grm.y"
- { yyval = pars_func(&pars_count_token,
- que_node_list_add_last(NULL,
- pars_func(&pars_distinct_token,
- que_node_list_add_last(
- NULL, yyvsp[-1])))); ;}
- break;
-
- case 84:
-#line 277 "pars0grm.y"
- { yyval = pars_func(&pars_sum_token,
- que_node_list_add_last(NULL,
- yyvsp[-1])); ;}
- break;
-
- case 85:
-#line 283 "pars0grm.y"
- { yyval = NULL; ;}
- break;
-
- case 86:
-#line 284 "pars0grm.y"
- { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;}
- break;
-
- case 87:
-#line 286 "pars0grm.y"
- { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;}
- break;
-
- case 88:
-#line 290 "pars0grm.y"
- { yyval = pars_select_list(&pars_star_denoter,
- NULL); ;}
- break;
-
- case 89:
-#line 293 "pars0grm.y"
- { yyval = pars_select_list(yyvsp[-2], yyvsp[0]); ;}
- break;
-
- case 90:
-#line 294 "pars0grm.y"
- { yyval = pars_select_list(yyvsp[0], NULL); ;}
- break;
-
- case 91:
-#line 298 "pars0grm.y"
- { yyval = NULL; ;}
- break;
-
- case 92:
-#line 299 "pars0grm.y"
- { yyval = yyvsp[0]; ;}
- break;
-
- case 93:
-#line 303 "pars0grm.y"
- { yyval = NULL; ;}
- break;
-
- case 94:
-#line 305 "pars0grm.y"
- { yyval = &pars_update_token; ;}
- break;
-
- case 95:
-#line 309 "pars0grm.y"
- { yyval = NULL; ;}
- break;
-
- case 96:
-#line 311 "pars0grm.y"
- { yyval = &pars_share_token; ;}
- break;
-
- case 97:
-#line 315 "pars0grm.y"
- { yyval = &pars_asc_token; ;}
- break;
-
- case 98:
-#line 316 "pars0grm.y"
- { yyval = &pars_asc_token; ;}
- break;
-
- case 99:
-#line 317 "pars0grm.y"
- { yyval = &pars_desc_token; ;}
- break;
-
- case 100:
-#line 321 "pars0grm.y"
- { yyval = NULL; ;}
- break;
-
- case 101:
-#line 323 "pars0grm.y"
- { yyval = pars_order_by(yyvsp[-1], yyvsp[0]); ;}
- break;
-
- case 102:
-#line 332 "pars0grm.y"
- { yyval = pars_select_statement(yyvsp[-6], yyvsp[-4], yyvsp[-3],
- yyvsp[-2], yyvsp[-1], yyvsp[0]); ;}
- break;
-
- case 103:
-#line 338 "pars0grm.y"
- { yyval = yyvsp[0]; ;}
- break;
-
- case 104:
-#line 343 "pars0grm.y"
- { yyval = pars_insert_statement(yyvsp[-4], yyvsp[-1], NULL); ;}
- break;
-
- case 105:
-#line 345 "pars0grm.y"
- { yyval = pars_insert_statement(yyvsp[-1], NULL, yyvsp[0]); ;}
- break;
-
- case 106:
-#line 349 "pars0grm.y"
- { yyval = pars_column_assignment(yyvsp[-2], yyvsp[0]); ;}
- break;
-
- case 107:
-#line 353 "pars0grm.y"
- { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;}
- break;
-
- case 108:
-#line 355 "pars0grm.y"
- { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;}
- break;
-
- case 109:
-#line 361 "pars0grm.y"
- { yyval = yyvsp[0]; ;}
- break;
-
- case 110:
-#line 367 "pars0grm.y"
- { yyval = pars_update_statement_start(FALSE,
- yyvsp[-2], yyvsp[0]); ;}
- break;
-
- case 111:
-#line 373 "pars0grm.y"
- { yyval = pars_update_statement(yyvsp[-1], NULL, yyvsp[0]); ;}
- break;
-
- case 112:
-#line 378 "pars0grm.y"
- { yyval = pars_update_statement(yyvsp[-1], yyvsp[0], NULL); ;}
- break;
-
- case 113:
-#line 383 "pars0grm.y"
- { yyval = pars_update_statement_start(TRUE,
- yyvsp[0], NULL); ;}
- break;
-
- case 114:
-#line 389 "pars0grm.y"
- { yyval = pars_update_statement(yyvsp[-1], NULL, yyvsp[0]); ;}
- break;
-
- case 115:
-#line 394 "pars0grm.y"
- { yyval = pars_update_statement(yyvsp[-1], yyvsp[0], NULL); ;}
- break;
-
- case 116:
-#line 399 "pars0grm.y"
- { yyval = pars_row_printf_statement(yyvsp[0]); ;}
- break;
-
- case 117:
-#line 404 "pars0grm.y"
- { yyval = pars_assignment_statement(yyvsp[-2], yyvsp[0]); ;}
- break;
-
- case 118:
-#line 410 "pars0grm.y"
- { yyval = pars_elsif_element(yyvsp[-2], yyvsp[0]); ;}
- break;
-
- case 119:
-#line 414 "pars0grm.y"
- { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;}
- break;
-
- case 120:
-#line 416 "pars0grm.y"
- { yyval = que_node_list_add_last(yyvsp[-1], yyvsp[0]); ;}
- break;
-
- case 121:
-#line 420 "pars0grm.y"
- { yyval = NULL; ;}
- break;
-
- case 122:
-#line 422 "pars0grm.y"
- { yyval = yyvsp[0]; ;}
- break;
-
- case 123:
-#line 423 "pars0grm.y"
- { yyval = yyvsp[0]; ;}
- break;
-
- case 124:
-#line 430 "pars0grm.y"
- { yyval = pars_if_statement(yyvsp[-5], yyvsp[-3], yyvsp[-2]); ;}
- break;
-
- case 125:
-#line 436 "pars0grm.y"
- { yyval = pars_while_statement(yyvsp[-4], yyvsp[-2]); ;}
- break;
-
- case 126:
-#line 444 "pars0grm.y"
- { yyval = pars_for_statement(yyvsp[-8], yyvsp[-6], yyvsp[-4], yyvsp[-2]); ;}
- break;
-
- case 127:
-#line 448 "pars0grm.y"
- { yyval = pars_exit_statement(); ;}
- break;
-
- case 128:
-#line 452 "pars0grm.y"
- { yyval = pars_return_statement(); ;}
- break;
-
- case 129:
-#line 457 "pars0grm.y"
- { yyval = pars_open_statement(
- ROW_SEL_OPEN_CURSOR, yyvsp[0]); ;}
- break;
-
- case 130:
-#line 463 "pars0grm.y"
- { yyval = pars_open_statement(
- ROW_SEL_CLOSE_CURSOR, yyvsp[0]); ;}
- break;
-
- case 131:
-#line 469 "pars0grm.y"
- { yyval = pars_fetch_statement(yyvsp[-2], yyvsp[0], NULL); ;}
- break;
-
- case 132:
-#line 471 "pars0grm.y"
- { yyval = pars_fetch_statement(yyvsp[-2], NULL, yyvsp[0]); ;}
- break;
-
- case 133:
-#line 476 "pars0grm.y"
- { yyval = pars_column_def(yyvsp[-4], yyvsp[-3], yyvsp[-2], yyvsp[-1], yyvsp[0]); ;}
- break;
-
- case 134:
-#line 480 "pars0grm.y"
- { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;}
- break;
-
- case 135:
-#line 482 "pars0grm.y"
- { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;}
- break;
-
- case 136:
-#line 486 "pars0grm.y"
- { yyval = NULL; ;}
- break;
-
- case 137:
-#line 488 "pars0grm.y"
- { yyval = yyvsp[-1]; ;}
- break;
-
- case 138:
-#line 492 "pars0grm.y"
- { yyval = NULL; ;}
- break;
-
- case 139:
-#line 494 "pars0grm.y"
- { yyval = &pars_int_token;
- /* pass any non-NULL pointer */ ;}
- break;
-
- case 140:
-#line 499 "pars0grm.y"
- { yyval = NULL; ;}
- break;
-
- case 141:
-#line 501 "pars0grm.y"
- { yyval = &pars_int_token;
- /* pass any non-NULL pointer */ ;}
- break;
-
- case 142:
-#line 506 "pars0grm.y"
- { yyval = NULL; ;}
- break;
-
- case 143:
-#line 508 "pars0grm.y"
- { yyval = &pars_int_token;
- /* pass any non-NULL pointer */ ;}
- break;
-
- case 144:
-#line 515 "pars0grm.y"
- { yyval = pars_create_table(yyvsp[-4], yyvsp[-2], yyvsp[0]); ;}
- break;
-
- case 145:
-#line 519 "pars0grm.y"
- { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;}
- break;
-
- case 146:
-#line 521 "pars0grm.y"
- { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;}
- break;
-
- case 147:
-#line 525 "pars0grm.y"
- { yyval = NULL; ;}
- break;
-
- case 148:
-#line 526 "pars0grm.y"
- { yyval = &pars_unique_token; ;}
- break;
-
- case 149:
-#line 530 "pars0grm.y"
- { yyval = NULL; ;}
- break;
-
- case 150:
-#line 531 "pars0grm.y"
- { yyval = &pars_clustered_token; ;}
- break;
-
- case 151:
-#line 539 "pars0grm.y"
- { yyval = pars_create_index(yyvsp[-8], yyvsp[-7], yyvsp[-5], yyvsp[-3], yyvsp[-1]); ;}
- break;
-
- case 152:
-#line 544 "pars0grm.y"
- { yyval = pars_commit_statement(); ;}
- break;
-
- case 153:
-#line 549 "pars0grm.y"
- { yyval = pars_rollback_statement(); ;}
- break;
-
- case 154:
-#line 553 "pars0grm.y"
- { yyval = &pars_int_token; ;}
- break;
-
- case 155:
-#line 554 "pars0grm.y"
- { yyval = &pars_int_token; ;}
- break;
-
- case 156:
-#line 555 "pars0grm.y"
- { yyval = &pars_char_token; ;}
- break;
-
- case 157:
-#line 556 "pars0grm.y"
- { yyval = &pars_binary_token; ;}
- break;
-
- case 158:
-#line 557 "pars0grm.y"
- { yyval = &pars_blob_token; ;}
- break;
-
- case 159:
-#line 562 "pars0grm.y"
- { yyval = pars_parameter_declaration(yyvsp[-2],
- PARS_INPUT, yyvsp[0]); ;}
- break;
-
- case 160:
-#line 565 "pars0grm.y"
- { yyval = pars_parameter_declaration(yyvsp[-2],
- PARS_OUTPUT, yyvsp[0]); ;}
- break;
-
- case 161:
-#line 570 "pars0grm.y"
- { yyval = NULL; ;}
- break;
-
- case 162:
-#line 571 "pars0grm.y"
- { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;}
- break;
-
- case 163:
-#line 573 "pars0grm.y"
- { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;}
- break;
-
- case 164:
-#line 578 "pars0grm.y"
- { yyval = pars_variable_declaration(yyvsp[-2], yyvsp[-1]); ;}
- break;
-
- case 168:
-#line 590 "pars0grm.y"
- { yyval = pars_cursor_declaration(yyvsp[-3], yyvsp[-1]); ;}
- break;
-
- case 169:
-#line 595 "pars0grm.y"
- { yyval = pars_function_declaration(yyvsp[-1]); ;}
- break;
-
- case 175:
-#line 616 "pars0grm.y"
- { yyval = pars_procedure_definition(yyvsp[-9], yyvsp[-7],
- yyvsp[-1]); ;}
- break;
-
-
- }
-
-/* Line 1010 of yacc.c. */
-#line 2345 "pars0grm.tab.c"
-
- yyvsp -= yylen;
- yyssp -= yylen;
-
-
- YY_STACK_PRINT (yyss, yyssp);
-
- *++yyvsp = yyval;
-
-
- /* Now `shift' the result of the reduction. Determine what state
- that goes to, based on the state we popped back to and the rule
- number reduced by. */
-
- yyn = yyr1[yyn];
-
- yystate = yypgoto[yyn - YYNTOKENS] + *yyssp;
- if (0 <= yystate && yystate <= YYLAST && yycheck[yystate] == *yyssp)
- yystate = yytable[yystate];
- else
- yystate = yydefgoto[yyn - YYNTOKENS];
-
- goto yynewstate;
-
-
-/*------------------------------------.
-| yyerrlab -- here on detecting error |
-`------------------------------------*/
-yyerrlab:
- /* If not already recovering from an error, report this error. */
- if (!yyerrstatus)
- {
- ++yynerrs;
-#if YYERROR_VERBOSE
- yyn = yypact[yystate];
-
- if (YYPACT_NINF < yyn && yyn < YYLAST)
- {
- YYSIZE_T yysize = 0;
- int yytype = YYTRANSLATE (yychar);
- const char* yyprefix;
- char *yymsg;
- int yyx;
-
- /* Start YYX at -YYN if negative to avoid negative indexes in
- YYCHECK. */
- int yyxbegin = yyn < 0 ? -yyn : 0;
-
- /* Stay within bounds of both yycheck and yytname. */
- int yychecklim = YYLAST - yyn;
- int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS;
- int yycount = 0;
-
- yyprefix = ", expecting ";
- for (yyx = yyxbegin; yyx < yyxend; ++yyx)
- if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR)
- {
- yysize += yystrlen (yyprefix) + yystrlen (yytname [yyx]);
- yycount += 1;
- if (yycount == 5)
- {
- yysize = 0;
- break;
- }
- }
- yysize += (sizeof ("syntax error, unexpected ")
- + yystrlen (yytname[yytype]));
- yymsg = (char *) YYSTACK_ALLOC (yysize);
- if (yymsg != 0)
- {
- char *yyp = yystpcpy (yymsg, "syntax error, unexpected ");
- yyp = yystpcpy (yyp, yytname[yytype]);
-
- if (yycount < 5)
- {
- yyprefix = ", expecting ";
- for (yyx = yyxbegin; yyx < yyxend; ++yyx)
- if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR)
- {
- yyp = yystpcpy (yyp, yyprefix);
- yyp = yystpcpy (yyp, yytname[yyx]);
- yyprefix = " or ";
- }
- }
- yyerror (yymsg);
- YYSTACK_FREE (yymsg);
- }
- else
- yyerror ("syntax error; also virtual memory exhausted");
- }
- else
-#endif /* YYERROR_VERBOSE */
- yyerror ("syntax error");
- }
-
-
-
- if (yyerrstatus == 3)
- {
- /* If just tried and failed to reuse lookahead token after an
- error, discard it. */
-
- if (yychar <= YYEOF)
- {
- /* If at end of input, pop the error token,
- then the rest of the stack, then return failure. */
- if (yychar == YYEOF)
- for (;;)
- {
- YYPOPSTACK;
- if (yyssp == yyss)
- YYABORT;
- YYDSYMPRINTF ("Error: popping", yystos[*yyssp], yyvsp, yylsp);
- yydestruct (yystos[*yyssp], yyvsp);
- }
- }
- else
- {
- YYDSYMPRINTF ("Error: discarding", yytoken, &yylval, &yylloc);
- yydestruct (yytoken, &yylval);
- yychar = YYEMPTY;
-
- }
- }
-
- /* Else will try to reuse lookahead token after shifting the error
- token. */
- goto yyerrlab1;
-
-
-/*---------------------------------------------------.
-| yyerrorlab -- error raised explicitly by YYERROR. |
-`---------------------------------------------------*/
-yyerrorlab:
-
-#ifdef __GNUC__
- /* Pacify GCC when the user code never invokes YYERROR and the label
- yyerrorlab therefore never appears in user code. */
- if (0)
- goto yyerrorlab;
-#endif
-
- yyvsp -= yylen;
- yyssp -= yylen;
- yystate = *yyssp;
- goto yyerrlab1;
-
-
-/*-------------------------------------------------------------.
-| yyerrlab1 -- common code for both syntax error and YYERROR. |
-`-------------------------------------------------------------*/
-yyerrlab1:
- yyerrstatus = 3; /* Each real token shifted decrements this. */
-
- for (;;)
- {
- yyn = yypact[yystate];
- if (yyn != YYPACT_NINF)
- {
- yyn += YYTERROR;
- if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR)
- {
- yyn = yytable[yyn];
- if (0 < yyn)
- break;
- }
- }
-
- /* Pop the current state because it cannot handle the error token. */
- if (yyssp == yyss)
- YYABORT;
-
- YYDSYMPRINTF ("Error: popping", yystos[*yyssp], yyvsp, yylsp);
- yydestruct (yystos[yystate], yyvsp);
- YYPOPSTACK;
- yystate = *yyssp;
- YY_STACK_PRINT (yyss, yyssp);
- }
-
- if (yyn == YYFINAL)
- YYACCEPT;
-
- YYDPRINTF ((stderr, "Shifting error token, "));
-
- *++yyvsp = yylval;
-
-
- yystate = yyn;
- goto yynewstate;
-
-
-/*-------------------------------------.
-| yyacceptlab -- YYACCEPT comes here. |
-`-------------------------------------*/
-yyacceptlab:
- yyresult = 0;
- goto yyreturn;
-
-/*-----------------------------------.
-| yyabortlab -- YYABORT comes here. |
-`-----------------------------------*/
-yyabortlab:
- yyresult = 1;
- goto yyreturn;
-
-#ifndef yyoverflow
-/*----------------------------------------------.
-| yyoverflowlab -- parser overflow comes here. |
-`----------------------------------------------*/
-yyoverflowlab:
- yyerror ("parser stack overflow");
- yyresult = 2;
- /* Fall through. */
-#endif
-
-yyreturn:
-#ifndef yyoverflow
- if (yyss != yyssa)
- YYSTACK_FREE (yyss);
-#endif
- return yyresult;
-}
-
-
-#line 620 "pars0grm.y"
-
-
diff --git a/storage/innobase/pars/pars0grm.h b/storage/innobase/pars/pars0grm.h
deleted file mode 100644
index 0062b8314ee..00000000000
--- a/storage/innobase/pars/pars0grm.h
+++ /dev/null
@@ -1,234 +0,0 @@
-/* A Bison parser, made by GNU Bison 1.875d. */
-
-/* Skeleton parser for Yacc-like parsing with Bison,
- Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
-
-/* As a special exception, when this file is copied by Bison into a
- Bison output file, you may use that output file without restriction.
- This special exception was added by the Free Software Foundation
- in version 1.24 of Bison. */
-
-/* Tokens. */
-#ifndef YYTOKENTYPE
-# define YYTOKENTYPE
- /* Put the tokens into the symbol table, so that GDB and other debuggers
- know about them. */
- enum yytokentype {
- PARS_INT_LIT = 258,
- PARS_FLOAT_LIT = 259,
- PARS_STR_LIT = 260,
- PARS_FIXBINARY_LIT = 261,
- PARS_BLOB_LIT = 262,
- PARS_NULL_LIT = 263,
- PARS_ID_TOKEN = 264,
- PARS_AND_TOKEN = 265,
- PARS_OR_TOKEN = 266,
- PARS_NOT_TOKEN = 267,
- PARS_GE_TOKEN = 268,
- PARS_LE_TOKEN = 269,
- PARS_NE_TOKEN = 270,
- PARS_PROCEDURE_TOKEN = 271,
- PARS_IN_TOKEN = 272,
- PARS_OUT_TOKEN = 273,
- PARS_BINARY_TOKEN = 274,
- PARS_BLOB_TOKEN = 275,
- PARS_INT_TOKEN = 276,
- PARS_INTEGER_TOKEN = 277,
- PARS_FLOAT_TOKEN = 278,
- PARS_CHAR_TOKEN = 279,
- PARS_IS_TOKEN = 280,
- PARS_BEGIN_TOKEN = 281,
- PARS_END_TOKEN = 282,
- PARS_IF_TOKEN = 283,
- PARS_THEN_TOKEN = 284,
- PARS_ELSE_TOKEN = 285,
- PARS_ELSIF_TOKEN = 286,
- PARS_LOOP_TOKEN = 287,
- PARS_WHILE_TOKEN = 288,
- PARS_RETURN_TOKEN = 289,
- PARS_SELECT_TOKEN = 290,
- PARS_SUM_TOKEN = 291,
- PARS_COUNT_TOKEN = 292,
- PARS_DISTINCT_TOKEN = 293,
- PARS_FROM_TOKEN = 294,
- PARS_WHERE_TOKEN = 295,
- PARS_FOR_TOKEN = 296,
- PARS_DDOT_TOKEN = 297,
- PARS_READ_TOKEN = 298,
- PARS_ORDER_TOKEN = 299,
- PARS_BY_TOKEN = 300,
- PARS_ASC_TOKEN = 301,
- PARS_DESC_TOKEN = 302,
- PARS_INSERT_TOKEN = 303,
- PARS_INTO_TOKEN = 304,
- PARS_VALUES_TOKEN = 305,
- PARS_UPDATE_TOKEN = 306,
- PARS_SET_TOKEN = 307,
- PARS_DELETE_TOKEN = 308,
- PARS_CURRENT_TOKEN = 309,
- PARS_OF_TOKEN = 310,
- PARS_CREATE_TOKEN = 311,
- PARS_TABLE_TOKEN = 312,
- PARS_INDEX_TOKEN = 313,
- PARS_UNIQUE_TOKEN = 314,
- PARS_CLUSTERED_TOKEN = 315,
- PARS_DOES_NOT_FIT_IN_MEM_TOKEN = 316,
- PARS_ON_TOKEN = 317,
- PARS_ASSIGN_TOKEN = 318,
- PARS_DECLARE_TOKEN = 319,
- PARS_CURSOR_TOKEN = 320,
- PARS_SQL_TOKEN = 321,
- PARS_OPEN_TOKEN = 322,
- PARS_FETCH_TOKEN = 323,
- PARS_CLOSE_TOKEN = 324,
- PARS_NOTFOUND_TOKEN = 325,
- PARS_TO_CHAR_TOKEN = 326,
- PARS_TO_NUMBER_TOKEN = 327,
- PARS_TO_BINARY_TOKEN = 328,
- PARS_BINARY_TO_NUMBER_TOKEN = 329,
- PARS_SUBSTR_TOKEN = 330,
- PARS_REPLSTR_TOKEN = 331,
- PARS_CONCAT_TOKEN = 332,
- PARS_INSTR_TOKEN = 333,
- PARS_LENGTH_TOKEN = 334,
- PARS_SYSDATE_TOKEN = 335,
- PARS_PRINTF_TOKEN = 336,
- PARS_ASSERT_TOKEN = 337,
- PARS_RND_TOKEN = 338,
- PARS_RND_STR_TOKEN = 339,
- PARS_ROW_PRINTF_TOKEN = 340,
- PARS_COMMIT_TOKEN = 341,
- PARS_ROLLBACK_TOKEN = 342,
- PARS_WORK_TOKEN = 343,
- PARS_UNSIGNED_TOKEN = 344,
- PARS_EXIT_TOKEN = 345,
- PARS_FUNCTION_TOKEN = 346,
- PARS_LOCK_TOKEN = 347,
- PARS_SHARE_TOKEN = 348,
- PARS_MODE_TOKEN = 349,
- NEG = 350
- };
-#endif
-#define PARS_INT_LIT 258
-#define PARS_FLOAT_LIT 259
-#define PARS_STR_LIT 260
-#define PARS_FIXBINARY_LIT 261
-#define PARS_BLOB_LIT 262
-#define PARS_NULL_LIT 263
-#define PARS_ID_TOKEN 264
-#define PARS_AND_TOKEN 265
-#define PARS_OR_TOKEN 266
-#define PARS_NOT_TOKEN 267
-#define PARS_GE_TOKEN 268
-#define PARS_LE_TOKEN 269
-#define PARS_NE_TOKEN 270
-#define PARS_PROCEDURE_TOKEN 271
-#define PARS_IN_TOKEN 272
-#define PARS_OUT_TOKEN 273
-#define PARS_BINARY_TOKEN 274
-#define PARS_BLOB_TOKEN 275
-#define PARS_INT_TOKEN 276
-#define PARS_INTEGER_TOKEN 277
-#define PARS_FLOAT_TOKEN 278
-#define PARS_CHAR_TOKEN 279
-#define PARS_IS_TOKEN 280
-#define PARS_BEGIN_TOKEN 281
-#define PARS_END_TOKEN 282
-#define PARS_IF_TOKEN 283
-#define PARS_THEN_TOKEN 284
-#define PARS_ELSE_TOKEN 285
-#define PARS_ELSIF_TOKEN 286
-#define PARS_LOOP_TOKEN 287
-#define PARS_WHILE_TOKEN 288
-#define PARS_RETURN_TOKEN 289
-#define PARS_SELECT_TOKEN 290
-#define PARS_SUM_TOKEN 291
-#define PARS_COUNT_TOKEN 292
-#define PARS_DISTINCT_TOKEN 293
-#define PARS_FROM_TOKEN 294
-#define PARS_WHERE_TOKEN 295
-#define PARS_FOR_TOKEN 296
-#define PARS_DDOT_TOKEN 297
-#define PARS_READ_TOKEN 298
-#define PARS_ORDER_TOKEN 299
-#define PARS_BY_TOKEN 300
-#define PARS_ASC_TOKEN 301
-#define PARS_DESC_TOKEN 302
-#define PARS_INSERT_TOKEN 303
-#define PARS_INTO_TOKEN 304
-#define PARS_VALUES_TOKEN 305
-#define PARS_UPDATE_TOKEN 306
-#define PARS_SET_TOKEN 307
-#define PARS_DELETE_TOKEN 308
-#define PARS_CURRENT_TOKEN 309
-#define PARS_OF_TOKEN 310
-#define PARS_CREATE_TOKEN 311
-#define PARS_TABLE_TOKEN 312
-#define PARS_INDEX_TOKEN 313
-#define PARS_UNIQUE_TOKEN 314
-#define PARS_CLUSTERED_TOKEN 315
-#define PARS_DOES_NOT_FIT_IN_MEM_TOKEN 316
-#define PARS_ON_TOKEN 317
-#define PARS_ASSIGN_TOKEN 318
-#define PARS_DECLARE_TOKEN 319
-#define PARS_CURSOR_TOKEN 320
-#define PARS_SQL_TOKEN 321
-#define PARS_OPEN_TOKEN 322
-#define PARS_FETCH_TOKEN 323
-#define PARS_CLOSE_TOKEN 324
-#define PARS_NOTFOUND_TOKEN 325
-#define PARS_TO_CHAR_TOKEN 326
-#define PARS_TO_NUMBER_TOKEN 327
-#define PARS_TO_BINARY_TOKEN 328
-#define PARS_BINARY_TO_NUMBER_TOKEN 329
-#define PARS_SUBSTR_TOKEN 330
-#define PARS_REPLSTR_TOKEN 331
-#define PARS_CONCAT_TOKEN 332
-#define PARS_INSTR_TOKEN 333
-#define PARS_LENGTH_TOKEN 334
-#define PARS_SYSDATE_TOKEN 335
-#define PARS_PRINTF_TOKEN 336
-#define PARS_ASSERT_TOKEN 337
-#define PARS_RND_TOKEN 338
-#define PARS_RND_STR_TOKEN 339
-#define PARS_ROW_PRINTF_TOKEN 340
-#define PARS_COMMIT_TOKEN 341
-#define PARS_ROLLBACK_TOKEN 342
-#define PARS_WORK_TOKEN 343
-#define PARS_UNSIGNED_TOKEN 344
-#define PARS_EXIT_TOKEN 345
-#define PARS_FUNCTION_TOKEN 346
-#define PARS_LOCK_TOKEN 347
-#define PARS_SHARE_TOKEN 348
-#define PARS_MODE_TOKEN 349
-#define NEG 350
-
-
-
-
-#if ! defined (YYSTYPE) && ! defined (YYSTYPE_IS_DECLARED)
-typedef int YYSTYPE;
-# define yystype YYSTYPE /* obsolescent; will be withdrawn */
-# define YYSTYPE_IS_DECLARED 1
-# define YYSTYPE_IS_TRIVIAL 1
-#endif
-
-extern YYSTYPE yylval;
-
-
-
diff --git a/storage/innobase/pars/pars0grm.y b/storage/innobase/pars/pars0grm.y
deleted file mode 100644
index a07be9975a1..00000000000
--- a/storage/innobase/pars/pars0grm.y
+++ /dev/null
@@ -1,620 +0,0 @@
-/******************************************************
-SQL parser: input file for the GNU Bison parser generator
-
-(c) 1997 Innobase Oy
-
-Created 12/14/1997 Heikki Tuuri
-Published under the GPL version 2
-
-Look from pars0lex.l for instructions how to generate the C files for
-the InnoDB parser.
-*******************************************************/
-
-%{
-/* The value of the semantic attribute is a pointer to a query tree node
-que_node_t */
-
-#include "univ.i"
-#include <math.h> /* Can't be before univ.i */
-#include "pars0pars.h"
-#include "mem0mem.h"
-#include "que0types.h"
-#include "que0que.h"
-#include "row0sel.h"
-
-#define YYSTYPE que_node_t*
-
-/* #define __STDC__ */
-
-int
-yylex(void);
-%}
-
-%token PARS_INT_LIT
-%token PARS_FLOAT_LIT
-%token PARS_STR_LIT
-%token PARS_FIXBINARY_LIT
-%token PARS_BLOB_LIT
-%token PARS_NULL_LIT
-%token PARS_ID_TOKEN
-%token PARS_AND_TOKEN
-%token PARS_OR_TOKEN
-%token PARS_NOT_TOKEN
-%token PARS_GE_TOKEN
-%token PARS_LE_TOKEN
-%token PARS_NE_TOKEN
-%token PARS_PROCEDURE_TOKEN
-%token PARS_IN_TOKEN
-%token PARS_OUT_TOKEN
-%token PARS_BINARY_TOKEN
-%token PARS_BLOB_TOKEN
-%token PARS_INT_TOKEN
-%token PARS_INTEGER_TOKEN
-%token PARS_FLOAT_TOKEN
-%token PARS_CHAR_TOKEN
-%token PARS_IS_TOKEN
-%token PARS_BEGIN_TOKEN
-%token PARS_END_TOKEN
-%token PARS_IF_TOKEN
-%token PARS_THEN_TOKEN
-%token PARS_ELSE_TOKEN
-%token PARS_ELSIF_TOKEN
-%token PARS_LOOP_TOKEN
-%token PARS_WHILE_TOKEN
-%token PARS_RETURN_TOKEN
-%token PARS_SELECT_TOKEN
-%token PARS_SUM_TOKEN
-%token PARS_COUNT_TOKEN
-%token PARS_DISTINCT_TOKEN
-%token PARS_FROM_TOKEN
-%token PARS_WHERE_TOKEN
-%token PARS_FOR_TOKEN
-%token PARS_DDOT_TOKEN
-%token PARS_READ_TOKEN
-%token PARS_ORDER_TOKEN
-%token PARS_BY_TOKEN
-%token PARS_ASC_TOKEN
-%token PARS_DESC_TOKEN
-%token PARS_INSERT_TOKEN
-%token PARS_INTO_TOKEN
-%token PARS_VALUES_TOKEN
-%token PARS_UPDATE_TOKEN
-%token PARS_SET_TOKEN
-%token PARS_DELETE_TOKEN
-%token PARS_CURRENT_TOKEN
-%token PARS_OF_TOKEN
-%token PARS_CREATE_TOKEN
-%token PARS_TABLE_TOKEN
-%token PARS_INDEX_TOKEN
-%token PARS_UNIQUE_TOKEN
-%token PARS_CLUSTERED_TOKEN
-%token PARS_DOES_NOT_FIT_IN_MEM_TOKEN
-%token PARS_ON_TOKEN
-%token PARS_ASSIGN_TOKEN
-%token PARS_DECLARE_TOKEN
-%token PARS_CURSOR_TOKEN
-%token PARS_SQL_TOKEN
-%token PARS_OPEN_TOKEN
-%token PARS_FETCH_TOKEN
-%token PARS_CLOSE_TOKEN
-%token PARS_NOTFOUND_TOKEN
-%token PARS_TO_CHAR_TOKEN
-%token PARS_TO_NUMBER_TOKEN
-%token PARS_TO_BINARY_TOKEN
-%token PARS_BINARY_TO_NUMBER_TOKEN
-%token PARS_SUBSTR_TOKEN
-%token PARS_REPLSTR_TOKEN
-%token PARS_CONCAT_TOKEN
-%token PARS_INSTR_TOKEN
-%token PARS_LENGTH_TOKEN
-%token PARS_SYSDATE_TOKEN
-%token PARS_PRINTF_TOKEN
-%token PARS_ASSERT_TOKEN
-%token PARS_RND_TOKEN
-%token PARS_RND_STR_TOKEN
-%token PARS_ROW_PRINTF_TOKEN
-%token PARS_COMMIT_TOKEN
-%token PARS_ROLLBACK_TOKEN
-%token PARS_WORK_TOKEN
-%token PARS_UNSIGNED_TOKEN
-%token PARS_EXIT_TOKEN
-%token PARS_FUNCTION_TOKEN
-%token PARS_LOCK_TOKEN
-%token PARS_SHARE_TOKEN
-%token PARS_MODE_TOKEN
-
-%left PARS_AND_TOKEN PARS_OR_TOKEN
-%left PARS_NOT_TOKEN
-%left '=' '<' '>' PARS_GE_TOKEN PARS_LE_TOKEN
-%left '-' '+'
-%left '*' '/'
-%left NEG /* negation--unary minus */
-%left '%'
-
-/* Grammar follows */
-%%
-
-top_statement:
- procedure_definition ';'
-
-statement:
- stored_procedure_call
- | predefined_procedure_call ';'
- | while_statement ';'
- | for_statement ';'
- | exit_statement ';'
- | if_statement ';'
- | return_statement ';'
- | assignment_statement ';'
- | select_statement ';'
- | insert_statement ';'
- | row_printf_statement ';'
- | delete_statement_searched ';'
- | delete_statement_positioned ';'
- | update_statement_searched ';'
- | update_statement_positioned ';'
- | open_cursor_statement ';'
- | fetch_statement ';'
- | close_cursor_statement ';'
- | commit_statement ';'
- | rollback_statement ';'
- | create_table ';'
- | create_index ';'
-;
-
-statement_list:
- statement { $$ = que_node_list_add_last(NULL, $1); }
- | statement_list statement
- { $$ = que_node_list_add_last($1, $2); }
-;
-
-exp:
- PARS_ID_TOKEN { $$ = $1;}
- | function_name '(' exp_list ')'
- { $$ = pars_func($1, $3); }
- | PARS_INT_LIT { $$ = $1;}
- | PARS_FLOAT_LIT { $$ = $1;}
- | PARS_STR_LIT { $$ = $1;}
- | PARS_FIXBINARY_LIT { $$ = $1;}
- | PARS_BLOB_LIT { $$ = $1;}
- | PARS_NULL_LIT { $$ = $1;}
- | PARS_SQL_TOKEN { $$ = $1;}
- | exp '+' exp { $$ = pars_op('+', $1, $3); }
- | exp '-' exp { $$ = pars_op('-', $1, $3); }
- | exp '*' exp { $$ = pars_op('*', $1, $3); }
- | exp '/' exp { $$ = pars_op('/', $1, $3); }
- | '-' exp %prec NEG { $$ = pars_op('-', $2, NULL); }
- | '(' exp ')' { $$ = $2; }
- | exp '=' exp { $$ = pars_op('=', $1, $3); }
- | exp '<' exp { $$ = pars_op('<', $1, $3); }
- | exp '>' exp { $$ = pars_op('>', $1, $3); }
- | exp PARS_GE_TOKEN exp { $$ = pars_op(PARS_GE_TOKEN, $1, $3); }
- | exp PARS_LE_TOKEN exp { $$ = pars_op(PARS_LE_TOKEN, $1, $3); }
- | exp PARS_NE_TOKEN exp { $$ = pars_op(PARS_NE_TOKEN, $1, $3); }
- | exp PARS_AND_TOKEN exp{ $$ = pars_op(PARS_AND_TOKEN, $1, $3); }
- | exp PARS_OR_TOKEN exp { $$ = pars_op(PARS_OR_TOKEN, $1, $3); }
- | PARS_NOT_TOKEN exp { $$ = pars_op(PARS_NOT_TOKEN, $2, NULL); }
- | PARS_ID_TOKEN '%' PARS_NOTFOUND_TOKEN
- { $$ = pars_op(PARS_NOTFOUND_TOKEN, $1, NULL); }
- | PARS_SQL_TOKEN '%' PARS_NOTFOUND_TOKEN
- { $$ = pars_op(PARS_NOTFOUND_TOKEN, $1, NULL); }
-;
-
-function_name:
- PARS_TO_CHAR_TOKEN { $$ = &pars_to_char_token; }
- | PARS_TO_NUMBER_TOKEN { $$ = &pars_to_number_token; }
- | PARS_TO_BINARY_TOKEN { $$ = &pars_to_binary_token; }
- | PARS_BINARY_TO_NUMBER_TOKEN
- { $$ = &pars_binary_to_number_token; }
- | PARS_SUBSTR_TOKEN { $$ = &pars_substr_token; }
- | PARS_CONCAT_TOKEN { $$ = &pars_concat_token; }
- | PARS_INSTR_TOKEN { $$ = &pars_instr_token; }
- | PARS_LENGTH_TOKEN { $$ = &pars_length_token; }
- | PARS_SYSDATE_TOKEN { $$ = &pars_sysdate_token; }
- | PARS_RND_TOKEN { $$ = &pars_rnd_token; }
- | PARS_RND_STR_TOKEN { $$ = &pars_rnd_str_token; }
-;
-
-question_mark_list:
- /* Nothing */
- | '?'
- | question_mark_list ',' '?'
-;
-
-stored_procedure_call:
- '{' PARS_ID_TOKEN '(' question_mark_list ')' '}'
- { $$ = pars_stored_procedure_call($2); }
-;
-
-predefined_procedure_call:
- predefined_procedure_name '(' exp_list ')'
- { $$ = pars_procedure_call($1, $3); }
-;
-
-predefined_procedure_name:
- PARS_REPLSTR_TOKEN { $$ = &pars_replstr_token; }
- | PARS_PRINTF_TOKEN { $$ = &pars_printf_token; }
- | PARS_ASSERT_TOKEN { $$ = &pars_assert_token; }
-;
-
-user_function_call:
- PARS_ID_TOKEN '(' ')' { $$ = $1; }
-;
-
-table_list:
- PARS_ID_TOKEN { $$ = que_node_list_add_last(NULL, $1); }
- | table_list ',' PARS_ID_TOKEN
- { $$ = que_node_list_add_last($1, $3); }
-;
-
-variable_list:
- /* Nothing */ { $$ = NULL; }
- | PARS_ID_TOKEN { $$ = que_node_list_add_last(NULL, $1); }
- | variable_list ',' PARS_ID_TOKEN
- { $$ = que_node_list_add_last($1, $3); }
-;
-
-exp_list:
- /* Nothing */ { $$ = NULL; }
- | exp { $$ = que_node_list_add_last(NULL, $1);}
- | exp_list ',' exp { $$ = que_node_list_add_last($1, $3); }
-;
-
-select_item:
- exp { $$ = $1; }
- | PARS_COUNT_TOKEN '(' '*' ')'
- { $$ = pars_func(&pars_count_token,
- que_node_list_add_last(NULL,
- sym_tab_add_int_lit(
- pars_sym_tab_global, 1))); }
- | PARS_COUNT_TOKEN '(' PARS_DISTINCT_TOKEN PARS_ID_TOKEN ')'
- { $$ = pars_func(&pars_count_token,
- que_node_list_add_last(NULL,
- pars_func(&pars_distinct_token,
- que_node_list_add_last(
- NULL, $4)))); }
- | PARS_SUM_TOKEN '(' exp ')'
- { $$ = pars_func(&pars_sum_token,
- que_node_list_add_last(NULL,
- $3)); }
-;
-
-select_item_list:
- /* Nothing */ { $$ = NULL; }
- | select_item { $$ = que_node_list_add_last(NULL, $1); }
- | select_item_list ',' select_item
- { $$ = que_node_list_add_last($1, $3); }
-;
-
-select_list:
- '*' { $$ = pars_select_list(&pars_star_denoter,
- NULL); }
- | select_item_list PARS_INTO_TOKEN variable_list
- { $$ = pars_select_list($1, $3); }
- | select_item_list { $$ = pars_select_list($1, NULL); }
-;
-
-search_condition:
- /* Nothing */ { $$ = NULL; }
- | PARS_WHERE_TOKEN exp { $$ = $2; }
-;
-
-for_update_clause:
- /* Nothing */ { $$ = NULL; }
- | PARS_FOR_TOKEN PARS_UPDATE_TOKEN
- { $$ = &pars_update_token; }
-;
-
-lock_shared_clause:
- /* Nothing */ { $$ = NULL; }
- | PARS_LOCK_TOKEN PARS_IN_TOKEN PARS_SHARE_TOKEN PARS_MODE_TOKEN
- { $$ = &pars_share_token; }
-;
-
-order_direction:
- /* Nothing */ { $$ = &pars_asc_token; }
- | PARS_ASC_TOKEN { $$ = &pars_asc_token; }
- | PARS_DESC_TOKEN { $$ = &pars_desc_token; }
-;
-
-order_by_clause:
- /* Nothing */ { $$ = NULL; }
- | PARS_ORDER_TOKEN PARS_BY_TOKEN PARS_ID_TOKEN order_direction
- { $$ = pars_order_by($3, $4); }
-;
-
-select_statement:
- PARS_SELECT_TOKEN select_list
- PARS_FROM_TOKEN table_list
- search_condition
- for_update_clause
- lock_shared_clause
- order_by_clause { $$ = pars_select_statement($2, $4, $5,
- $6, $7, $8); }
-;
-
-insert_statement_start:
- PARS_INSERT_TOKEN PARS_INTO_TOKEN
- PARS_ID_TOKEN { $$ = $3; }
-;
-
-insert_statement:
- insert_statement_start PARS_VALUES_TOKEN '(' exp_list ')'
- { $$ = pars_insert_statement($1, $4, NULL); }
- | insert_statement_start select_statement
- { $$ = pars_insert_statement($1, NULL, $2); }
-;
-
-column_assignment:
- PARS_ID_TOKEN '=' exp { $$ = pars_column_assignment($1, $3); }
-;
-
-column_assignment_list:
- column_assignment { $$ = que_node_list_add_last(NULL, $1); }
- | column_assignment_list ',' column_assignment
- { $$ = que_node_list_add_last($1, $3); }
-;
-
-cursor_positioned:
- PARS_WHERE_TOKEN
- PARS_CURRENT_TOKEN PARS_OF_TOKEN
- PARS_ID_TOKEN { $$ = $4; }
-;
-
-update_statement_start:
- PARS_UPDATE_TOKEN PARS_ID_TOKEN
- PARS_SET_TOKEN
- column_assignment_list { $$ = pars_update_statement_start(FALSE,
- $2, $4); }
-;
-
-update_statement_searched:
- update_statement_start
- search_condition { $$ = pars_update_statement($1, NULL, $2); }
-;
-
-update_statement_positioned:
- update_statement_start
- cursor_positioned { $$ = pars_update_statement($1, $2, NULL); }
-;
-
-delete_statement_start:
- PARS_DELETE_TOKEN PARS_FROM_TOKEN
- PARS_ID_TOKEN { $$ = pars_update_statement_start(TRUE,
- $3, NULL); }
-;
-
-delete_statement_searched:
- delete_statement_start
- search_condition { $$ = pars_update_statement($1, NULL, $2); }
-;
-
-delete_statement_positioned:
- delete_statement_start
- cursor_positioned { $$ = pars_update_statement($1, $2, NULL); }
-;
-
-row_printf_statement:
- PARS_ROW_PRINTF_TOKEN select_statement
- { $$ = pars_row_printf_statement($2); }
-;
-
-assignment_statement:
- PARS_ID_TOKEN PARS_ASSIGN_TOKEN exp
- { $$ = pars_assignment_statement($1, $3); }
-;
-
-elsif_element:
- PARS_ELSIF_TOKEN
- exp PARS_THEN_TOKEN statement_list
- { $$ = pars_elsif_element($2, $4); }
-;
-
-elsif_list:
- elsif_element { $$ = que_node_list_add_last(NULL, $1); }
- | elsif_list elsif_element
- { $$ = que_node_list_add_last($1, $2); }
-;
-
-else_part:
- /* Nothing */ { $$ = NULL; }
- | PARS_ELSE_TOKEN statement_list
- { $$ = $2; }
- | elsif_list { $$ = $1; }
-;
-
-if_statement:
- PARS_IF_TOKEN exp PARS_THEN_TOKEN statement_list
- else_part
- PARS_END_TOKEN PARS_IF_TOKEN
- { $$ = pars_if_statement($2, $4, $5); }
-;
-
-while_statement:
- PARS_WHILE_TOKEN exp PARS_LOOP_TOKEN statement_list
- PARS_END_TOKEN PARS_LOOP_TOKEN
- { $$ = pars_while_statement($2, $4); }
-;
-
-for_statement:
- PARS_FOR_TOKEN PARS_ID_TOKEN PARS_IN_TOKEN
- exp PARS_DDOT_TOKEN exp
- PARS_LOOP_TOKEN statement_list
- PARS_END_TOKEN PARS_LOOP_TOKEN
- { $$ = pars_for_statement($2, $4, $6, $8); }
-;
-
-exit_statement:
- PARS_EXIT_TOKEN { $$ = pars_exit_statement(); }
-;
-
-return_statement:
- PARS_RETURN_TOKEN { $$ = pars_return_statement(); }
-;
-
-open_cursor_statement:
- PARS_OPEN_TOKEN PARS_ID_TOKEN
- { $$ = pars_open_statement(
- ROW_SEL_OPEN_CURSOR, $2); }
-;
-
-close_cursor_statement:
- PARS_CLOSE_TOKEN PARS_ID_TOKEN
- { $$ = pars_open_statement(
- ROW_SEL_CLOSE_CURSOR, $2); }
-;
-
-fetch_statement:
- PARS_FETCH_TOKEN PARS_ID_TOKEN PARS_INTO_TOKEN variable_list
- { $$ = pars_fetch_statement($2, $4, NULL); }
- | PARS_FETCH_TOKEN PARS_ID_TOKEN PARS_INTO_TOKEN user_function_call
- { $$ = pars_fetch_statement($2, NULL, $4); }
-;
-
-column_def:
- PARS_ID_TOKEN type_name opt_column_len opt_unsigned opt_not_null
- { $$ = pars_column_def($1, $2, $3, $4, $5); }
-;
-
-column_def_list:
- column_def { $$ = que_node_list_add_last(NULL, $1); }
- | column_def_list ',' column_def
- { $$ = que_node_list_add_last($1, $3); }
-;
-
-opt_column_len:
- /* Nothing */ { $$ = NULL; }
- | '(' PARS_INT_LIT ')'
- { $$ = $2; }
-;
-
-opt_unsigned:
- /* Nothing */ { $$ = NULL; }
- | PARS_UNSIGNED_TOKEN
- { $$ = &pars_int_token;
- /* pass any non-NULL pointer */ }
-;
-
-opt_not_null:
- /* Nothing */ { $$ = NULL; }
- | PARS_NOT_TOKEN PARS_NULL_LIT
- { $$ = &pars_int_token;
- /* pass any non-NULL pointer */ }
-;
-
-not_fit_in_memory:
- /* Nothing */ { $$ = NULL; }
- | PARS_DOES_NOT_FIT_IN_MEM_TOKEN
- { $$ = &pars_int_token;
- /* pass any non-NULL pointer */ }
-;
-
-create_table:
- PARS_CREATE_TOKEN PARS_TABLE_TOKEN
- PARS_ID_TOKEN '(' column_def_list ')'
- not_fit_in_memory { $$ = pars_create_table($3, $5, $7); }
-;
-
-column_list:
- PARS_ID_TOKEN { $$ = que_node_list_add_last(NULL, $1); }
- | column_list ',' PARS_ID_TOKEN
- { $$ = que_node_list_add_last($1, $3); }
-;
-
-unique_def:
- /* Nothing */ { $$ = NULL; }
- | PARS_UNIQUE_TOKEN { $$ = &pars_unique_token; }
-;
-
-clustered_def:
- /* Nothing */ { $$ = NULL; }
- | PARS_CLUSTERED_TOKEN { $$ = &pars_clustered_token; }
-;
-
-create_index:
- PARS_CREATE_TOKEN unique_def
- clustered_def
- PARS_INDEX_TOKEN
- PARS_ID_TOKEN PARS_ON_TOKEN PARS_ID_TOKEN
- '(' column_list ')' { $$ = pars_create_index($2, $3, $5, $7, $9); }
-;
-
-commit_statement:
- PARS_COMMIT_TOKEN PARS_WORK_TOKEN
- { $$ = pars_commit_statement(); }
-;
-
-rollback_statement:
- PARS_ROLLBACK_TOKEN PARS_WORK_TOKEN
- { $$ = pars_rollback_statement(); }
-;
-
-type_name:
- PARS_INT_TOKEN { $$ = &pars_int_token; }
- | PARS_INTEGER_TOKEN { $$ = &pars_int_token; }
- | PARS_CHAR_TOKEN { $$ = &pars_char_token; }
- | PARS_BINARY_TOKEN { $$ = &pars_binary_token; }
- | PARS_BLOB_TOKEN { $$ = &pars_blob_token; }
-;
-
-parameter_declaration:
- PARS_ID_TOKEN PARS_IN_TOKEN type_name
- { $$ = pars_parameter_declaration($1,
- PARS_INPUT, $3); }
- | PARS_ID_TOKEN PARS_OUT_TOKEN type_name
- { $$ = pars_parameter_declaration($1,
- PARS_OUTPUT, $3); }
-;
-
-parameter_declaration_list:
- /* Nothing */ { $$ = NULL; }
- | parameter_declaration { $$ = que_node_list_add_last(NULL, $1); }
- | parameter_declaration_list ',' parameter_declaration
- { $$ = que_node_list_add_last($1, $3); }
-;
-
-variable_declaration:
- PARS_ID_TOKEN type_name ';'
- { $$ = pars_variable_declaration($1, $2); }
-;
-
-variable_declaration_list:
- /* Nothing */
- | variable_declaration
- | variable_declaration_list variable_declaration
-;
-
-cursor_declaration:
- PARS_DECLARE_TOKEN PARS_CURSOR_TOKEN PARS_ID_TOKEN
- PARS_IS_TOKEN select_statement ';'
- { $$ = pars_cursor_declaration($3, $5); }
-;
-
-function_declaration:
- PARS_DECLARE_TOKEN PARS_FUNCTION_TOKEN PARS_ID_TOKEN ';'
- { $$ = pars_function_declaration($3); }
-;
-
-declaration:
- cursor_declaration
- | function_declaration
-;
-
-declaration_list:
- /* Nothing */
- | declaration
- | declaration_list declaration
-;
-
-procedure_definition:
- PARS_PROCEDURE_TOKEN PARS_ID_TOKEN '(' parameter_declaration_list ')'
- PARS_IS_TOKEN
- variable_declaration_list
- declaration_list
- PARS_BEGIN_TOKEN
- statement_list
- PARS_END_TOKEN { $$ = pars_procedure_definition($2, $4,
- $10); }
-;
-
-%%
diff --git a/storage/innobase/pars/pars0lex.l b/storage/innobase/pars/pars0lex.l
deleted file mode 100644
index ad65034fab0..00000000000
--- a/storage/innobase/pars/pars0lex.l
+++ /dev/null
@@ -1,648 +0,0 @@
-/******************************************************
-SQL parser lexical analyzer: input file for the GNU Flex lexer generator
-
-(c) 1997 Innobase Oy
-
-Created 12/14/1997 Heikki Tuuri
-Published under the GPL version 2
-
-The InnoDB parser is frozen because MySQL takes care of SQL parsing.
-Therefore we normally keep the InnoDB parser C files as they are, and do
-not automatically generate them from pars0grm.y and pars0lex.l.
-
-How to make the InnoDB parser and lexer C files:
-
-1. Run ./make_flex.sh to generate lexer files.
-
-2. Run ./make_bison.sh to generate parser files.
-
-These instructions seem to work at least with bison-1.875d and flex-2.5.31 on
-Linux.
-*******************************************************/
-
-%option nostdinit
-%option 8bit
-%option warn
-%option pointer
-%option never-interactive
-%option nodefault
-%option noinput
-%option nounput
-%option noyywrap
-%option noyy_scan_buffer
-%option noyy_scan_bytes
-%option noyy_scan_string
-%option nounistd
-
-%{
-#define YYSTYPE que_node_t*
-
-#include "univ.i"
-#include "pars0pars.h"
-#include "pars0grm.h"
-#include "pars0sym.h"
-#include "mem0mem.h"
-#include "os0proc.h"
-
-#define malloc(A) ut_malloc(A)
-#define free(A) ut_free(A)
-#define realloc(P, A) ut_realloc(P, A)
-#define exit(A) ut_error
-
-#define YY_INPUT(buf, result, max_size) pars_get_lex_chars(buf, &result, max_size)
-
-/* String buffer for removing quotes */
-static ulint stringbuf_len_alloc = 0; /* Allocated length */
-static ulint stringbuf_len = 0; /* Current length */
-static char* stringbuf; /* Start of buffer */
-/* Appends a string to the buffer. */
-static
-void
-string_append(
-/*==========*/
- const char* str, /* in: string to be appended */
- ulint len) /* in: length of the string */
-{
- if (stringbuf == NULL) {
- stringbuf = malloc(1);
- stringbuf_len_alloc = 1;
- }
-
- if (stringbuf_len + len > stringbuf_len_alloc) {
- while (stringbuf_len + len > stringbuf_len_alloc) {
- stringbuf_len_alloc <<= 1;
- }
- stringbuf = realloc(stringbuf, stringbuf_len_alloc);
- }
-
- memcpy(stringbuf + stringbuf_len, str, len);
- stringbuf_len += len;
-}
-
-%}
-
-DIGIT [0-9]
-ID [a-z_A-Z][a-z_A-Z0-9]*
-BOUND_LIT \:[a-z_A-Z0-9]+
-BOUND_ID \$[a-z_A-Z0-9]+
-
-%x comment
-%x quoted
-%x id
-%%
-
-{DIGIT}+ {
- yylval = sym_tab_add_int_lit(pars_sym_tab_global,
- atoi(yytext));
- return(PARS_INT_LIT);
-}
-
-{DIGIT}+"."{DIGIT}* {
- ut_error; /* not implemented */
-
- return(PARS_FLOAT_LIT);
-}
-
-{BOUND_LIT} {
- ulint type;
-
- yylval = sym_tab_add_bound_lit(pars_sym_tab_global,
- yytext + 1, &type);
-
- return((int) type);
-}
-
-{BOUND_ID} {
- yylval = sym_tab_add_bound_id(pars_sym_tab_global,
- yytext + 1);
-
- return(PARS_ID_TOKEN);
-}
-
-"'" {
-/* Quoted character string literals are handled in an explicit
-start state 'quoted'. This state is entered and the buffer for
-the scanned string is emptied upon encountering a starting quote.
-
-In the state 'quoted', only two actions are possible (defined below). */
- BEGIN(quoted);
- stringbuf_len = 0;
-}
-<quoted>[^\']+ {
- /* Got a sequence of characters other than "'":
- append to string buffer */
- string_append(yytext, yyleng);
-}
-<quoted>"'"+ {
- /* Got a sequence of "'" characters:
- append half of them to string buffer,
- as "''" represents a single "'".
- We apply truncating division,
- so that "'''" will result in "'". */
-
- string_append(yytext, yyleng / 2);
-
- /* If we got an odd number of quotes, then the
- last quote we got is the terminating quote.
- At the end of the string, we return to the
- initial start state and report the scanned
- string literal. */
-
- if (yyleng % 2) {
- BEGIN(INITIAL);
- yylval = sym_tab_add_str_lit(
- pars_sym_tab_global,
- (byte*) stringbuf, stringbuf_len);
- return(PARS_STR_LIT);
- }
-}
-
-\" {
-/* Quoted identifiers are handled in an explicit start state 'id'.
-This state is entered and the buffer for the scanned string is emptied
-upon encountering a starting quote.
-
-In the state 'id', only two actions are possible (defined below). */
- BEGIN(id);
- stringbuf_len = 0;
-}
-<id>[^\"]+ {
- /* Got a sequence of characters other than '"':
- append to string buffer */
- string_append(yytext, yyleng);
-}
-<id>\"+ {
- /* Got a sequence of '"' characters:
- append half of them to string buffer,
- as '""' represents a single '"'.
- We apply truncating division,
- so that '"""' will result in '"'. */
-
- string_append(yytext, yyleng / 2);
-
- /* If we got an odd number of quotes, then the
- last quote we got is the terminating quote.
- At the end of the string, we return to the
- initial start state and report the scanned
- identifier. */
-
- if (yyleng % 2) {
- BEGIN(INITIAL);
- yylval = sym_tab_add_id(
- pars_sym_tab_global,
- (byte*) stringbuf, stringbuf_len);
-
- return(PARS_ID_TOKEN);
- }
-}
-
-"NULL" {
- yylval = sym_tab_add_null_lit(pars_sym_tab_global);
-
- return(PARS_NULL_LIT);
-}
-
-"SQL" {
- /* Implicit cursor name */
- yylval = sym_tab_add_str_lit(pars_sym_tab_global,
- (byte*) yytext, yyleng);
- return(PARS_SQL_TOKEN);
-}
-
-"AND" {
- return(PARS_AND_TOKEN);
-}
-
-"OR" {
- return(PARS_OR_TOKEN);
-}
-
-"NOT" {
- return(PARS_NOT_TOKEN);
-}
-
-"PROCEDURE" {
- return(PARS_PROCEDURE_TOKEN);
-}
-
-"IN" {
- return(PARS_IN_TOKEN);
-}
-
-"OUT" {
- return(PARS_OUT_TOKEN);
-}
-
-"BINARY" {
- return(PARS_BINARY_TOKEN);
-}
-
-"BLOB" {
- return(PARS_BLOB_TOKEN);
-}
-
-"INT" {
- return(PARS_INT_TOKEN);
-}
-
-"INTEGER" {
- return(PARS_INT_TOKEN);
-}
-
-"FLOAT" {
- return(PARS_FLOAT_TOKEN);
-}
-
-"CHAR" {
- return(PARS_CHAR_TOKEN);
-}
-
-"IS" {
- return(PARS_IS_TOKEN);
-}
-
-"BEGIN" {
- return(PARS_BEGIN_TOKEN);
-}
-
-"END" {
- return(PARS_END_TOKEN);
-}
-
-"IF" {
- return(PARS_IF_TOKEN);
-}
-
-"THEN" {
- return(PARS_THEN_TOKEN);
-}
-
-"ELSE" {
- return(PARS_ELSE_TOKEN);
-}
-
-"ELSIF" {
- return(PARS_ELSIF_TOKEN);
-}
-
-"LOOP" {
- return(PARS_LOOP_TOKEN);
-}
-
-"WHILE" {
- return(PARS_WHILE_TOKEN);
-}
-
-"RETURN" {
- return(PARS_RETURN_TOKEN);
-}
-
-"SELECT" {
- return(PARS_SELECT_TOKEN);
-}
-
-"SUM" {
- return(PARS_SUM_TOKEN);
-}
-
-"COUNT" {
- return(PARS_COUNT_TOKEN);
-}
-
-"DISTINCT" {
- return(PARS_DISTINCT_TOKEN);
-}
-
-"FROM" {
- return(PARS_FROM_TOKEN);
-}
-
-"WHERE" {
- return(PARS_WHERE_TOKEN);
-}
-
-"FOR" {
- return(PARS_FOR_TOKEN);
-}
-
-"READ" {
- return(PARS_READ_TOKEN);
-}
-
-"ORDER" {
- return(PARS_ORDER_TOKEN);
-}
-
-"BY" {
- return(PARS_BY_TOKEN);
-}
-
-"ASC" {
- return(PARS_ASC_TOKEN);
-}
-
-"DESC" {
- return(PARS_DESC_TOKEN);
-}
-
-"INSERT" {
- return(PARS_INSERT_TOKEN);
-}
-
-"INTO" {
- return(PARS_INTO_TOKEN);
-}
-
-"VALUES" {
- return(PARS_VALUES_TOKEN);
-}
-
-"UPDATE" {
- return(PARS_UPDATE_TOKEN);
-}
-
-"SET" {
- return(PARS_SET_TOKEN);
-}
-
-"DELETE" {
- return(PARS_DELETE_TOKEN);
-}
-
-"CURRENT" {
- return(PARS_CURRENT_TOKEN);
-}
-
-"OF" {
- return(PARS_OF_TOKEN);
-}
-
-"CREATE" {
- return(PARS_CREATE_TOKEN);
-}
-
-"TABLE" {
- return(PARS_TABLE_TOKEN);
-}
-
-"INDEX" {
- return(PARS_INDEX_TOKEN);
-}
-
-"UNIQUE" {
- return(PARS_UNIQUE_TOKEN);
-}
-
-"CLUSTERED" {
- return(PARS_CLUSTERED_TOKEN);
-}
-
-"DOES_NOT_FIT_IN_MEMORY" {
- return(PARS_DOES_NOT_FIT_IN_MEM_TOKEN);
-}
-
-"ON" {
- return(PARS_ON_TOKEN);
-}
-
-"DECLARE" {
- return(PARS_DECLARE_TOKEN);
-}
-
-"CURSOR" {
- return(PARS_CURSOR_TOKEN);
-}
-
-"OPEN" {
- return(PARS_OPEN_TOKEN);
-}
-
-"FETCH" {
- return(PARS_FETCH_TOKEN);
-}
-
-"CLOSE" {
- return(PARS_CLOSE_TOKEN);
-}
-
-"NOTFOUND" {
- return(PARS_NOTFOUND_TOKEN);
-}
-
-"TO_CHAR" {
- return(PARS_TO_CHAR_TOKEN);
-}
-
-"TO_NUMBER" {
- return(PARS_TO_NUMBER_TOKEN);
-}
-
-"TO_BINARY" {
- return(PARS_TO_BINARY_TOKEN);
-}
-
-"BINARY_TO_NUMBER" {
- return(PARS_BINARY_TO_NUMBER_TOKEN);
-}
-
-"SUBSTR" {
- return(PARS_SUBSTR_TOKEN);
-}
-
-"REPLSTR" {
- return(PARS_REPLSTR_TOKEN);
-}
-
-"CONCAT" {
- return(PARS_CONCAT_TOKEN);
-}
-
-"INSTR" {
- return(PARS_INSTR_TOKEN);
-}
-
-"LENGTH" {
- return(PARS_LENGTH_TOKEN);
-}
-
-"SYSDATE" {
- return(PARS_SYSDATE_TOKEN);
-}
-
-"PRINTF" {
- return(PARS_PRINTF_TOKEN);
-}
-
-"ASSERT" {
- return(PARS_ASSERT_TOKEN);
-}
-
-"RND" {
- return(PARS_RND_TOKEN);
-}
-
-"RND_STR" {
- return(PARS_RND_STR_TOKEN);
-}
-
-"ROW_PRINTF" {
- return(PARS_ROW_PRINTF_TOKEN);
-}
-
-"COMMIT" {
- return(PARS_COMMIT_TOKEN);
-}
-
-"ROLLBACK" {
- return(PARS_ROLLBACK_TOKEN);
-}
-
-"WORK" {
- return(PARS_WORK_TOKEN);
-}
-
-"UNSIGNED" {
- return(PARS_UNSIGNED_TOKEN);
-}
-
-"EXIT" {
- return(PARS_EXIT_TOKEN);
-}
-
-"FUNCTION" {
- return(PARS_FUNCTION_TOKEN);
-}
-
-"LOCK" {
- return(PARS_LOCK_TOKEN);
-}
-
-"SHARE" {
- return(PARS_SHARE_TOKEN);
-}
-
-"MODE" {
- return(PARS_MODE_TOKEN);
-}
-
-{ID} {
- yylval = sym_tab_add_id(pars_sym_tab_global,
- (byte*)yytext,
- ut_strlen(yytext));
- return(PARS_ID_TOKEN);
-}
-
-".." {
- return(PARS_DDOT_TOKEN);
-}
-
-":=" {
- return(PARS_ASSIGN_TOKEN);
-}
-
-"<=" {
- return(PARS_LE_TOKEN);
-}
-
-">=" {
- return(PARS_GE_TOKEN);
-}
-
-"<>" {
- return(PARS_NE_TOKEN);
-}
-
-"(" {
-
- return((int)(*yytext));
-}
-
-"=" {
-
- return((int)(*yytext));
-}
-
-">" {
-
- return((int)(*yytext));
-}
-
-"<" {
-
- return((int)(*yytext));
-}
-
-"," {
-
- return((int)(*yytext));
-}
-
-";" {
-
- return((int)(*yytext));
-}
-
-")" {
-
- return((int)(*yytext));
-}
-
-"+" {
-
- return((int)(*yytext));
-}
-
-"-" {
-
- return((int)(*yytext));
-}
-
-"*" {
-
- return((int)(*yytext));
-}
-
-"/" {
-
- return((int)(*yytext));
-}
-
-"%" {
-
- return((int)(*yytext));
-}
-
-"{" {
-
- return((int)(*yytext));
-}
-
-"}" {
-
- return((int)(*yytext));
-}
-
-"?" {
-
- return((int)(*yytext));
-}
-
-"/*" BEGIN(comment); /* eat up comment */
-
-<comment>[^*]*
-<comment>"*"+[^*/]*
-<comment>"*"+"/" BEGIN(INITIAL);
-
-[ \t\n]+ /* eat up whitespace */
-
-
-. {
- fprintf(stderr,"Unrecognized character: %02x\n",
- *yytext);
-
- ut_error;
-
- return(0);
-}
-
-%%
diff --git a/storage/innobase/pars/pars0opt.c b/storage/innobase/pars/pars0opt.c
deleted file mode 100644
index 2abe6720235..00000000000
--- a/storage/innobase/pars/pars0opt.c
+++ /dev/null
@@ -1,1208 +0,0 @@
-/******************************************************
-Simple SQL optimizer
-
-(c) 1997 Innobase Oy
-
-Created 12/21/1997 Heikki Tuuri
-*******************************************************/
-
-#include "pars0opt.h"
-
-#ifdef UNIV_NONINL
-#include "pars0opt.ic"
-#endif
-
-#include "row0sel.h"
-#include "row0ins.h"
-#include "row0upd.h"
-#include "dict0dict.h"
-#include "dict0mem.h"
-#include "que0que.h"
-#include "pars0grm.h"
-#include "pars0pars.h"
-#include "lock0lock.h"
-
-#define OPT_EQUAL 1 /* comparison by = */
-#define OPT_COMPARISON 2 /* comparison by <, >, <=, or >= */
-
-#define OPT_NOT_COND 1
-#define OPT_END_COND 2
-#define OPT_TEST_COND 3
-#define OPT_SCROLL_COND 4
-
-
-/***********************************************************************
-Inverts a comparison operator. */
-static
-int
-opt_invert_cmp_op(
-/*==============*/
- /* out: the equivalent operator when the order of
- the arguments is switched */
- int op) /* in: operator */
-{
- if (op == '<') {
- return('>');
- } else if (op == '>') {
- return('<');
- } else if (op == '=') {
- return('=');
- } else if (op == PARS_LE_TOKEN) {
- return(PARS_GE_TOKEN);
- } else if (op == PARS_GE_TOKEN) {
- return(PARS_LE_TOKEN);
- } else {
- ut_error;
- }
-
- return(0);
-}
-
-/***********************************************************************
-Checks if the value of an expression can be calculated BEFORE the nth table
-in a join is accessed. If this is the case, it can possibly be used in an
-index search for the nth table. */
-static
-ibool
-opt_check_exp_determined_before(
-/*============================*/
- /* out: TRUE if already determined */
- que_node_t* exp, /* in: expression */
- sel_node_t* sel_node, /* in: select node */
- ulint nth_table) /* in: nth table will be accessed */
-{
- func_node_t* func_node;
- sym_node_t* sym_node;
- dict_table_t* table;
- que_node_t* arg;
- ulint i;
-
- ut_ad(exp && sel_node);
-
- if (que_node_get_type(exp) == QUE_NODE_FUNC) {
- func_node = exp;
-
- arg = func_node->args;
-
- while (arg) {
- if (!opt_check_exp_determined_before(arg, sel_node,
- nth_table)) {
- return(FALSE);
- }
-
- arg = que_node_get_next(arg);
- }
-
- return(TRUE);
- }
-
- ut_a(que_node_get_type(exp) == QUE_NODE_SYMBOL);
-
- sym_node = exp;
-
- if (sym_node->token_type != SYM_COLUMN) {
-
- return(TRUE);
- }
-
- for (i = 0; i < nth_table; i++) {
-
- table = sel_node_get_nth_plan(sel_node, i)->table;
-
- if (sym_node->table == table) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/***********************************************************************
-Looks in a comparison condition if a column value is already restricted by
-it BEFORE the nth table is accessed. */
-static
-que_node_t*
-opt_look_for_col_in_comparison_before(
-/*==================================*/
- /* out: expression restricting the
- value of the column, or NULL if not
- known */
- ulint cmp_type, /* in: OPT_EQUAL, OPT_COMPARISON */
- ulint col_no, /* in: column number */
- func_node_t* search_cond, /* in: comparison condition */
- sel_node_t* sel_node, /* in: select node */
- ulint nth_table, /* in: nth table in a join (a query
- from a single table is considered a
- join of 1 table) */
- ulint* op) /* out: comparison operator ('=',
- PARS_GE_TOKEN, ... ); this is inverted
- if the column appears on the right
- side */
-{
- sym_node_t* sym_node;
- dict_table_t* table;
- que_node_t* exp;
- que_node_t* arg;
-
- ut_ad(search_cond);
-
- ut_a((search_cond->func == '<')
- || (search_cond->func == '>')
- || (search_cond->func == '=')
- || (search_cond->func == PARS_GE_TOKEN)
- || (search_cond->func == PARS_LE_TOKEN));
-
- table = sel_node_get_nth_plan(sel_node, nth_table)->table;
-
- if ((cmp_type == OPT_EQUAL) && (search_cond->func != '=')) {
-
- return(NULL);
-
- } else if ((cmp_type == OPT_COMPARISON)
- && (search_cond->func != '<')
- && (search_cond->func != '>')
- && (search_cond->func != PARS_GE_TOKEN)
- && (search_cond->func != PARS_LE_TOKEN)) {
-
- return(NULL);
- }
-
- arg = search_cond->args;
-
- if (que_node_get_type(arg) == QUE_NODE_SYMBOL) {
- sym_node = arg;
-
- if ((sym_node->token_type == SYM_COLUMN)
- && (sym_node->table == table)
- && (sym_node->col_no == col_no)) {
-
- /* sym_node contains the desired column id */
-
- /* Check if the expression on the right side of the
- operator is already determined */
-
- exp = que_node_get_next(arg);
-
- if (opt_check_exp_determined_before(exp, sel_node,
- nth_table)) {
- *op = search_cond->func;
-
- return(exp);
- }
- }
- }
-
- exp = search_cond->args;
- arg = que_node_get_next(arg);
-
- if (que_node_get_type(arg) == QUE_NODE_SYMBOL) {
- sym_node = arg;
-
- if ((sym_node->token_type == SYM_COLUMN)
- && (sym_node->table == table)
- && (sym_node->col_no == col_no)) {
-
- if (opt_check_exp_determined_before(exp, sel_node,
- nth_table)) {
- *op = opt_invert_cmp_op(search_cond->func);
-
- return(exp);
- }
- }
- }
-
- return(NULL);
-}
-
-/***********************************************************************
-Looks in a search condition if a column value is already restricted by the
-search condition BEFORE the nth table is accessed. Takes into account that
-if we will fetch in an ascending order, we cannot utilize an upper limit for
-a column value; in a descending order, respectively, a lower limit. */
-static
-que_node_t*
-opt_look_for_col_in_cond_before(
-/*============================*/
- /* out: expression restricting the
- value of the column, or NULL if not
- known */
- ulint cmp_type, /* in: OPT_EQUAL, OPT_COMPARISON */
- ulint col_no, /* in: column number */
- func_node_t* search_cond, /* in: search condition or NULL */
- sel_node_t* sel_node, /* in: select node */
- ulint nth_table, /* in: nth table in a join (a query
- from a single table is considered a
- join of 1 table) */
- ulint* op) /* out: comparison operator ('=',
- PARS_GE_TOKEN, ... ) */
-{
- func_node_t* new_cond;
- que_node_t* exp;
-
- if (search_cond == NULL) {
-
- return(NULL);
- }
-
- ut_a(que_node_get_type(search_cond) == QUE_NODE_FUNC);
- ut_a(search_cond->func != PARS_OR_TOKEN);
- ut_a(search_cond->func != PARS_NOT_TOKEN);
-
- if (search_cond->func == PARS_AND_TOKEN) {
- new_cond = search_cond->args;
-
- exp = opt_look_for_col_in_cond_before(cmp_type, col_no,
- new_cond, sel_node,
- nth_table, op);
- if (exp) {
-
- return(exp);
- }
-
- new_cond = que_node_get_next(new_cond);
-
- exp = opt_look_for_col_in_cond_before(cmp_type, col_no,
- new_cond, sel_node,
- nth_table, op);
- return(exp);
- }
-
- exp = opt_look_for_col_in_comparison_before(cmp_type, col_no,
- search_cond, sel_node,
- nth_table, op);
- if (exp == NULL) {
-
- return(NULL);
- }
-
- /* If we will fetch in an ascending order, we cannot utilize an upper
- limit for a column value; in a descending order, respectively, a lower
- limit */
-
- if (sel_node->asc && ((*op == '<') || (*op == PARS_LE_TOKEN))) {
-
- return(NULL);
-
- } else if (!sel_node->asc
- && ((*op == '>') || (*op == PARS_GE_TOKEN))) {
-
- return(NULL);
- }
-
- return(exp);
-}
-
-/***********************************************************************
-Calculates the goodness for an index according to a select node. The
-goodness is 4 times the number of first fields in index whose values we
-already know exactly in the query. If we have a comparison condition for
-an additional field, 2 point are added. If the index is unique, and we know
-all the unique fields for the index we add 1024 points. For a clustered index
-we add 1 point. */
-static
-ulint
-opt_calc_index_goodness(
-/*====================*/
- /* out: goodness */
- dict_index_t* index, /* in: index */
- sel_node_t* sel_node, /* in: parsed select node */
- ulint nth_table, /* in: nth table in a join */
- que_node_t** index_plan, /* in/out: comparison expressions for
- this index */
- ulint* last_op) /* out: last comparison operator, if
- goodness > 1 */
-{
- que_node_t* exp;
- ulint goodness;
- ulint n_fields;
- ulint col_no;
- ulint op;
- ulint j;
-
- goodness = 0;
-
- /* Note that as higher level node pointers in the B-tree contain
- page addresses as the last field, we must not put more fields in
- the search tuple than dict_index_get_n_unique_in_tree(index); see
- the note in btr_cur_search_to_nth_level. */
-
- n_fields = dict_index_get_n_unique_in_tree(index);
-
- for (j = 0; j < n_fields; j++) {
-
- col_no = dict_index_get_nth_col_no(index, j);
-
- exp = opt_look_for_col_in_cond_before(
- OPT_EQUAL, col_no, sel_node->search_cond,
- sel_node, nth_table, &op);
- if (exp) {
- /* The value for this column is exactly known already
- at this stage of the join */
-
- index_plan[j] = exp;
- *last_op = op;
- goodness += 4;
- } else {
- /* Look for non-equality comparisons */
-
- exp = opt_look_for_col_in_cond_before(
- OPT_COMPARISON, col_no, sel_node->search_cond,
- sel_node, nth_table, &op);
- if (exp) {
- index_plan[j] = exp;
- *last_op = op;
- goodness += 2;
- }
-
- break;
- }
- }
-
- if (goodness >= 4 * dict_index_get_n_unique(index)) {
- goodness += 1024;
-
- if (index->type & DICT_CLUSTERED) {
-
- goodness += 1024;
- }
- }
-
- /* We have to test for goodness here, as last_op may note be set */
- if (goodness && index->type & DICT_CLUSTERED) {
-
- goodness++;
- }
-
- return(goodness);
-}
-
-/***********************************************************************
-Calculates the number of matched fields based on an index goodness. */
-UNIV_INLINE
-ulint
-opt_calc_n_fields_from_goodness(
-/*============================*/
- /* out: number of excatly or partially matched
- fields */
- ulint goodness) /* in: goodness */
-{
- return(((goodness % 1024) + 2) / 4);
-}
-
-/***********************************************************************
-Converts a comparison operator to the corresponding search mode PAGE_CUR_GE,
-... */
-UNIV_INLINE
-ulint
-opt_op_to_search_mode(
-/*==================*/
- /* out: search mode */
- ibool asc, /* in: TRUE if the rows should be fetched in an
- ascending order */
- ulint op) /* in: operator '=', PARS_GE_TOKEN, ... */
-{
- if (op == '=') {
- if (asc) {
- return(PAGE_CUR_GE);
- } else {
- return(PAGE_CUR_LE);
- }
- } else if (op == '<') {
- ut_a(!asc);
- return(PAGE_CUR_L);
- } else if (op == '>') {
- ut_a(asc);
- return(PAGE_CUR_G);
- } else if (op == PARS_GE_TOKEN) {
- ut_a(asc);
- return(PAGE_CUR_GE);
- } else if (op == PARS_LE_TOKEN) {
- ut_a(!asc);
- return(PAGE_CUR_LE);
- } else {
- ut_error;
- }
-
- return(0);
-}
-
-/***********************************************************************
-Determines if a node is an argument node of a function node. */
-static
-ibool
-opt_is_arg(
-/*=======*/
- /* out: TRUE if is an argument */
- que_node_t* arg_node, /* in: possible argument node */
- func_node_t* func_node) /* in: function node */
-{
- que_node_t* arg;
-
- arg = func_node->args;
-
- while (arg) {
- if (arg == arg_node) {
-
- return(TRUE);
- }
-
- arg = que_node_get_next(arg);
- }
-
- return(FALSE);
-}
-
-/***********************************************************************
-Decides if the fetching of rows should be made in a descending order, and
-also checks that the chosen query plan produces a result which satisfies
-the order-by. */
-static
-void
-opt_check_order_by(
-/*===============*/
- sel_node_t* sel_node) /* in: select node; asserts an error
- if the plan does not agree with the
- order-by */
-{
- order_node_t* order_node;
- dict_table_t* order_table;
- ulint order_col_no;
- plan_t* plan;
- ulint i;
-
- if (!sel_node->order_by) {
-
- return;
- }
-
- order_node = sel_node->order_by;
- order_col_no = order_node->column->col_no;
- order_table = order_node->column->table;
-
- /* If there is an order-by clause, the first non-exactly matched field
- in the index used for the last table in the table list should be the
- column defined in the order-by clause, and for all the other tables
- we should get only at most a single row, otherwise we cannot presently
- calculate the order-by, as we have no sort utility */
-
- for (i = 0; i < sel_node->n_tables; i++) {
-
- plan = sel_node_get_nth_plan(sel_node, i);
-
- if (i < sel_node->n_tables - 1) {
- ut_a(dict_index_get_n_unique(plan->index)
- <= plan->n_exact_match);
- } else {
- ut_a(plan->table == order_table);
-
- ut_a((dict_index_get_n_unique(plan->index)
- <= plan->n_exact_match)
- || (dict_index_get_nth_col_no(plan->index,
- plan->n_exact_match)
- == order_col_no));
- }
- }
-}
-
-/***********************************************************************
-Optimizes a select. Decides which indexes to tables to use. The tables
-are accessed in the order that they were written to the FROM part in the
-select statement. */
-static
-void
-opt_search_plan_for_table(
-/*======================*/
- sel_node_t* sel_node, /* in: parsed select node */
- ulint i, /* in: this is the ith table */
- dict_table_t* table) /* in: table */
-{
- plan_t* plan;
- dict_index_t* index;
- dict_index_t* best_index;
- ulint n_fields;
- ulint goodness;
- ulint last_op = 75946965; /* Eliminate a Purify
- warning */
- ulint best_goodness;
- ulint best_last_op = 0; /* remove warning */
- que_node_t* index_plan[256];
- que_node_t* best_index_plan[256];
-
- plan = sel_node_get_nth_plan(sel_node, i);
-
- plan->table = table;
- plan->asc = sel_node->asc;
- plan->pcur_is_open = FALSE;
- plan->cursor_at_end = FALSE;
-
- /* Calculate goodness for each index of the table */
-
- index = dict_table_get_first_index(table);
- best_index = index; /* Eliminate compiler warning */
- best_goodness = 0;
-
- /* should be do ... until ? comment by Jani */
- while (index) {
- goodness = opt_calc_index_goodness(index, sel_node, i,
- index_plan, &last_op);
- if (goodness > best_goodness) {
-
- best_index = index;
- best_goodness = goodness;
- n_fields = opt_calc_n_fields_from_goodness(goodness);
-
- ut_memcpy(best_index_plan, index_plan,
- n_fields * sizeof(void*));
- best_last_op = last_op;
- }
-
- index = dict_table_get_next_index(index);
- }
-
- plan->index = best_index;
-
- n_fields = opt_calc_n_fields_from_goodness(best_goodness);
-
- if (n_fields == 0) {
- plan->tuple = NULL;
- plan->n_exact_match = 0;
- } else {
- plan->tuple = dtuple_create(pars_sym_tab_global->heap,
- n_fields);
- dict_index_copy_types(plan->tuple, plan->index, n_fields);
-
- plan->tuple_exps = mem_heap_alloc(pars_sym_tab_global->heap,
- n_fields * sizeof(void*));
-
- ut_memcpy(plan->tuple_exps, best_index_plan,
- n_fields * sizeof(void*));
- if (best_last_op == '=') {
- plan->n_exact_match = n_fields;
- } else {
- plan->n_exact_match = n_fields - 1;
- }
-
- plan->mode = opt_op_to_search_mode(sel_node->asc,
- best_last_op);
- }
-
- if ((best_index->type & DICT_CLUSTERED)
- && (plan->n_exact_match >= dict_index_get_n_unique(best_index))) {
-
- plan->unique_search = TRUE;
- } else {
- plan->unique_search = FALSE;
- }
-
- plan->old_vers_heap = NULL;
-
- btr_pcur_init(&(plan->pcur));
- btr_pcur_init(&(plan->clust_pcur));
-}
-
-/***********************************************************************
-Looks at a comparison condition and decides if it can, and need, be tested for
-a table AFTER the table has been accessed. */
-static
-ulint
-opt_classify_comparison(
-/*====================*/
- /* out: OPT_NOT_COND if not for this
- table, else OPT_END_COND,
- OPT_TEST_COND, or OPT_SCROLL_COND,
- where the last means that the
- condition need not be tested, except
- when scroll cursors are used */
- sel_node_t* sel_node, /* in: select node */
- ulint i, /* in: ith table in the join */
- func_node_t* cond) /* in: comparison condition */
-{
- plan_t* plan;
- ulint n_fields;
- ulint op;
- ulint j;
-
- ut_ad(cond && sel_node);
-
- plan = sel_node_get_nth_plan(sel_node, i);
-
- /* Check if the condition is determined after the ith table has been
- accessed, but not after the i - 1:th */
-
- if (!opt_check_exp_determined_before(cond, sel_node, i + 1)) {
-
- return(OPT_NOT_COND);
- }
-
- if ((i > 0) && opt_check_exp_determined_before(cond, sel_node, i)) {
-
- return(OPT_NOT_COND);
- }
-
- /* If the condition is an exact match condition used in constructing
- the search tuple, it is classified as OPT_END_COND */
-
- if (plan->tuple) {
- n_fields = dtuple_get_n_fields(plan->tuple);
- } else {
- n_fields = 0;
- }
-
- for (j = 0; j < plan->n_exact_match; j++) {
-
- if (opt_is_arg(plan->tuple_exps[j], cond)) {
-
- return(OPT_END_COND);
- }
- }
-
- /* If the condition is an non-exact match condition used in
- constructing the search tuple, it is classified as OPT_SCROLL_COND.
- When the cursor is positioned, and if a non-scroll cursor is used,
- there is no need to test this condition; if a scroll cursor is used
- the testing is necessary when the cursor is reversed. */
-
- if ((n_fields > plan->n_exact_match)
- && opt_is_arg(plan->tuple_exps[n_fields - 1], cond)) {
-
- return(OPT_SCROLL_COND);
- }
-
- /* If the condition is a non-exact match condition on the first field
- in index for which there is no exact match, and it limits the search
- range from the opposite side of the search tuple already BEFORE we
- access the table, it is classified as OPT_END_COND */
-
- if ((dict_index_get_n_fields(plan->index) > plan->n_exact_match)
- && opt_look_for_col_in_comparison_before(
- OPT_COMPARISON,
- dict_index_get_nth_col_no(plan->index,
- plan->n_exact_match),
- cond, sel_node, i, &op)) {
-
- if (sel_node->asc && ((op == '<') || (op == PARS_LE_TOKEN))) {
-
- return(OPT_END_COND);
- }
-
- if (!sel_node->asc && ((op == '>') || (op == PARS_GE_TOKEN))) {
-
- return(OPT_END_COND);
- }
- }
-
- /* Otherwise, cond is classified as OPT_TEST_COND */
-
- return(OPT_TEST_COND);
-}
-
-/***********************************************************************
-Recursively looks for test conditions for a table in a join. */
-static
-void
-opt_find_test_conds(
-/*================*/
- sel_node_t* sel_node, /* in: select node */
- ulint i, /* in: ith table in the join */
- func_node_t* cond) /* in: conjunction of search
- conditions or NULL */
-{
- func_node_t* new_cond;
- ulint class;
- plan_t* plan;
-
- if (cond == NULL) {
-
- return;
- }
-
- if (cond->func == PARS_AND_TOKEN) {
- new_cond = cond->args;
-
- opt_find_test_conds(sel_node, i, new_cond);
-
- new_cond = que_node_get_next(new_cond);
-
- opt_find_test_conds(sel_node, i, new_cond);
-
- return;
- }
-
- plan = sel_node_get_nth_plan(sel_node, i);
-
- class = opt_classify_comparison(sel_node, i, cond);
-
- if (class == OPT_END_COND) {
- UT_LIST_ADD_LAST(cond_list, plan->end_conds, cond);
-
- } else if (class == OPT_TEST_COND) {
- UT_LIST_ADD_LAST(cond_list, plan->other_conds, cond);
-
- }
-}
-
-/***********************************************************************
-Normalizes a list of comparison conditions so that a column of the table
-appears on the left side of the comparison if possible. This is accomplished
-by switching the arguments of the operator. */
-static
-void
-opt_normalize_cmp_conds(
-/*====================*/
- func_node_t* cond, /* in: first in a list of comparison
- conditions, or NULL */
- dict_table_t* table) /* in: table */
-{
- que_node_t* arg1;
- que_node_t* arg2;
- sym_node_t* sym_node;
-
- while (cond) {
- arg1 = cond->args;
- arg2 = que_node_get_next(arg1);
-
- if (que_node_get_type(arg2) == QUE_NODE_SYMBOL) {
-
- sym_node = arg2;
-
- if ((sym_node->token_type == SYM_COLUMN)
- && (sym_node->table == table)) {
-
- /* Switch the order of the arguments */
-
- cond->args = arg2;
- que_node_list_add_last(NULL, arg2);
- que_node_list_add_last(arg2, arg1);
-
- /* Invert the operator */
- cond->func = opt_invert_cmp_op(cond->func);
- }
- }
-
- cond = UT_LIST_GET_NEXT(cond_list, cond);
- }
-}
-
-/***********************************************************************
-Finds out the search condition conjuncts we can, and need, to test as the ith
-table in a join is accessed. The search tuple can eliminate the need to test
-some conjuncts. */
-static
-void
-opt_determine_and_normalize_test_conds(
-/*===================================*/
- sel_node_t* sel_node, /* in: select node */
- ulint i) /* in: ith table in the join */
-{
- plan_t* plan;
-
- plan = sel_node_get_nth_plan(sel_node, i);
-
- UT_LIST_INIT(plan->end_conds);
- UT_LIST_INIT(plan->other_conds);
-
- /* Recursively go through the conjuncts and classify them */
-
- opt_find_test_conds(sel_node, i, sel_node->search_cond);
-
- opt_normalize_cmp_conds(UT_LIST_GET_FIRST(plan->end_conds),
- plan->table);
-
- ut_a(UT_LIST_GET_LEN(plan->end_conds) >= plan->n_exact_match);
-}
-
-/***********************************************************************
-Looks for occurrences of the columns of the table in the query subgraph and
-adds them to the list of columns if an occurrence of the same column does not
-already exist in the list. If the column is already in the list, puts a value
-indirection to point to the occurrence in the column list, except if the
-column occurrence we are looking at is in the column list, in which case
-nothing is done. */
-
-void
-opt_find_all_cols(
-/*==============*/
- ibool copy_val, /* in: if TRUE, new found columns are
- added as columns to copy */
- dict_index_t* index, /* in: index of the table to use */
- sym_node_list_t* col_list, /* in: base node of a list where
- to add new found columns */
- plan_t* plan, /* in: plan or NULL */
- que_node_t* exp) /* in: expression or condition or
- NULL */
-{
- func_node_t* func_node;
- que_node_t* arg;
- sym_node_t* sym_node;
- sym_node_t* col_node;
- ulint col_pos;
-
- if (exp == NULL) {
-
- return;
- }
-
- if (que_node_get_type(exp) == QUE_NODE_FUNC) {
- func_node = exp;
-
- arg = func_node->args;
-
- while (arg) {
- opt_find_all_cols(copy_val, index, col_list, plan,
- arg);
- arg = que_node_get_next(arg);
- }
-
- return;
- }
-
- ut_a(que_node_get_type(exp) == QUE_NODE_SYMBOL);
-
- sym_node = exp;
-
- if (sym_node->token_type != SYM_COLUMN) {
-
- return;
- }
-
- if (sym_node->table != index->table) {
-
- return;
- }
-
- /* Look for an occurrence of the same column in the plan column
- list */
-
- col_node = UT_LIST_GET_FIRST(*col_list);
-
- while (col_node) {
- if (col_node->col_no == sym_node->col_no) {
-
- if (col_node == sym_node) {
- /* sym_node was already in a list: do
- nothing */
-
- return;
- }
-
- /* Put an indirection */
- sym_node->indirection = col_node;
- sym_node->alias = col_node;
-
- return;
- }
-
- col_node = UT_LIST_GET_NEXT(col_var_list, col_node);
- }
-
- /* The same column did not occur in the list: add it */
-
- UT_LIST_ADD_LAST(col_var_list, *col_list, sym_node);
-
- sym_node->copy_val = copy_val;
-
- /* Fill in the field_no fields in sym_node */
-
- sym_node->field_nos[SYM_CLUST_FIELD_NO] = dict_index_get_nth_col_pos(
- dict_table_get_first_index(index->table), sym_node->col_no);
- if (!(index->type & DICT_CLUSTERED)) {
-
- ut_a(plan);
-
- col_pos = dict_index_get_nth_col_pos(index, sym_node->col_no);
-
- if (col_pos == ULINT_UNDEFINED) {
-
- plan->must_get_clust = TRUE;
- }
-
- sym_node->field_nos[SYM_SEC_FIELD_NO] = col_pos;
- }
-}
-
-/***********************************************************************
-Looks for occurrences of the columns of the table in conditions which are
-not yet determined AFTER the join operation has fetched a row in the ith
-table. The values for these column must be copied to dynamic memory for
-later use. */
-static
-void
-opt_find_copy_cols(
-/*===============*/
- sel_node_t* sel_node, /* in: select node */
- ulint i, /* in: ith table in the join */
- func_node_t* search_cond) /* in: search condition or NULL */
-{
- func_node_t* new_cond;
- plan_t* plan;
-
- if (search_cond == NULL) {
-
- return;
- }
-
- ut_ad(que_node_get_type(search_cond) == QUE_NODE_FUNC);
-
- if (search_cond->func == PARS_AND_TOKEN) {
- new_cond = search_cond->args;
-
- opt_find_copy_cols(sel_node, i, new_cond);
-
- new_cond = que_node_get_next(new_cond);
-
- opt_find_copy_cols(sel_node, i, new_cond);
-
- return;
- }
-
- if (!opt_check_exp_determined_before(search_cond, sel_node, i + 1)) {
-
- /* Any ith table columns occurring in search_cond should be
- copied, as this condition cannot be tested already on the
- fetch from the ith table */
-
- plan = sel_node_get_nth_plan(sel_node, i);
-
- opt_find_all_cols(TRUE, plan->index, &(plan->columns), plan,
- search_cond);
- }
-}
-
-/***********************************************************************
-Classifies the table columns according to whether we use the column only while
-holding the latch on the page, or whether we have to copy the column value to
-dynamic memory. Puts the first occurrence of a column to either list in the
-plan node, and puts indirections to later occurrences of the column. */
-static
-void
-opt_classify_cols(
-/*==============*/
- sel_node_t* sel_node, /* in: select node */
- ulint i) /* in: ith table in the join */
-{
- plan_t* plan;
- que_node_t* exp;
-
- plan = sel_node_get_nth_plan(sel_node, i);
-
- /* The final value of the following field will depend on the
- environment of the select statement: */
-
- plan->must_get_clust = FALSE;
-
- UT_LIST_INIT(plan->columns);
-
- /* All select list columns should be copied: therefore TRUE as the
- first argument */
-
- exp = sel_node->select_list;
-
- while (exp) {
- opt_find_all_cols(TRUE, plan->index, &(plan->columns), plan,
- exp);
- exp = que_node_get_next(exp);
- }
-
- opt_find_copy_cols(sel_node, i, sel_node->search_cond);
-
- /* All remaining columns in the search condition are temporary
- columns: therefore FALSE */
-
- opt_find_all_cols(FALSE, plan->index, &(plan->columns), plan,
- sel_node->search_cond);
-}
-
-/***********************************************************************
-Fills in the info in plan which is used in accessing a clustered index
-record. The columns must already be classified for the plan node. */
-static
-void
-opt_clust_access(
-/*=============*/
- sel_node_t* sel_node, /* in: select node */
- ulint n) /* in: nth table in select */
-{
- plan_t* plan;
- dict_table_t* table;
- dict_index_t* clust_index;
- dict_index_t* index;
- mem_heap_t* heap;
- ulint n_fields;
- ulint pos;
- ulint i;
-
- plan = sel_node_get_nth_plan(sel_node, n);
-
- index = plan->index;
-
- /* The final value of the following field depends on the environment
- of the select statement: */
-
- plan->no_prefetch = FALSE;
-
- if (index->type & DICT_CLUSTERED) {
- plan->clust_map = NULL;
- plan->clust_ref = NULL;
-
- return;
- }
-
- table = index->table;
-
- clust_index = dict_table_get_first_index(table);
-
- n_fields = dict_index_get_n_unique(clust_index);
-
- heap = pars_sym_tab_global->heap;
-
- plan->clust_ref = dtuple_create(heap, n_fields);
-
- dict_index_copy_types(plan->clust_ref, clust_index, n_fields);
-
- plan->clust_map = mem_heap_alloc(heap, n_fields * sizeof(ulint));
-
- for (i = 0; i < n_fields; i++) {
- pos = dict_index_get_nth_field_pos(index, clust_index, i);
-
- ut_a(pos != ULINT_UNDEFINED);
-
- /* We optimize here only queries to InnoDB's internal system
- tables, and they should not contain column prefix indexes. */
-
- if (dict_index_get_nth_field(index, pos)->prefix_len != 0
- || dict_index_get_nth_field(clust_index, i)
- ->prefix_len != 0) {
- fprintf(stderr,
- "InnoDB: Error in pars0opt.c:"
- " table %s has prefix_len != 0\n",
- index->table_name);
- }
-
- *(plan->clust_map + i) = pos;
-
- ut_ad(pos != ULINT_UNDEFINED);
- }
-}
-
-/***********************************************************************
-Optimizes a select. Decides which indexes to tables to use. The tables
-are accessed in the order that they were written to the FROM part in the
-select statement. */
-
-void
-opt_search_plan(
-/*============*/
- sel_node_t* sel_node) /* in: parsed select node */
-{
- sym_node_t* table_node;
- dict_table_t* table;
- order_node_t* order_by;
- ulint i;
-
- sel_node->plans = mem_heap_alloc(pars_sym_tab_global->heap,
- sel_node->n_tables * sizeof(plan_t));
-
- /* Analyze the search condition to find out what we know at each
- join stage about the conditions that the columns of a table should
- satisfy */
-
- table_node = sel_node->table_list;
-
- if (sel_node->order_by == NULL) {
- sel_node->asc = TRUE;
- } else {
- order_by = sel_node->order_by;
-
- sel_node->asc = order_by->asc;
- }
-
- for (i = 0; i < sel_node->n_tables; i++) {
-
- table = table_node->table;
-
- /* Choose index through which to access the table */
-
- opt_search_plan_for_table(sel_node, i, table);
-
- /* Determine the search condition conjuncts we can test at
- this table; normalize the end conditions */
-
- opt_determine_and_normalize_test_conds(sel_node, i);
-
- table_node = que_node_get_next(table_node);
- }
-
- table_node = sel_node->table_list;
-
- for (i = 0; i < sel_node->n_tables; i++) {
-
- /* Classify the table columns into those we only need to access
- but not copy, and to those we must copy to dynamic memory */
-
- opt_classify_cols(sel_node, i);
-
- /* Calculate possible info for accessing the clustered index
- record */
-
- opt_clust_access(sel_node, i);
-
- table_node = que_node_get_next(table_node);
- }
-
- /* Check that the plan obeys a possible order-by clause: if not,
- an assertion error occurs */
-
- opt_check_order_by(sel_node);
-
-#ifdef UNIV_SQL_DEBUG
- opt_print_query_plan(sel_node);
-#endif
-}
-
-/************************************************************************
-Prints info of a query plan. */
-
-void
-opt_print_query_plan(
-/*=================*/
- sel_node_t* sel_node) /* in: select node */
-{
- plan_t* plan;
- ulint n_fields;
- ulint i;
-
- fputs("QUERY PLAN FOR A SELECT NODE\n", stderr);
-
- fputs(sel_node->asc ? "Asc. search; " : "Desc. search; ", stderr);
-
- if (sel_node->set_x_locks) {
- fputs("sets row x-locks; ", stderr);
- ut_a(sel_node->row_lock_mode == LOCK_X);
- ut_a(!sel_node->consistent_read);
- } else if (sel_node->consistent_read) {
- fputs("consistent read; ", stderr);
- } else {
- ut_a(sel_node->row_lock_mode == LOCK_S);
- fputs("sets row s-locks; ", stderr);
- }
-
- putc('\n', stderr);
-
- for (i = 0; i < sel_node->n_tables; i++) {
- plan = sel_node_get_nth_plan(sel_node, i);
-
- if (plan->tuple) {
- n_fields = dtuple_get_n_fields(plan->tuple);
- } else {
- n_fields = 0;
- }
-
- fputs("Table ", stderr);
- dict_index_name_print(stderr, NULL, plan->index);
- fprintf(stderr,"; exact m. %lu, match %lu, end conds %lu\n",
- (unsigned long) plan->n_exact_match,
- (unsigned long) n_fields,
- (unsigned long) UT_LIST_GET_LEN(plan->end_conds));
- }
-}
diff --git a/storage/innobase/pars/pars0pars.c b/storage/innobase/pars/pars0pars.c
deleted file mode 100644
index 89f6f862995..00000000000
--- a/storage/innobase/pars/pars0pars.c
+++ /dev/null
@@ -1,2200 +0,0 @@
-/******************************************************
-SQL parser
-
-(c) 1996 Innobase Oy
-
-Created 11/19/1996 Heikki Tuuri
-*******************************************************/
-
-/* Historical note: Innobase executed its first SQL string (CREATE TABLE)
-on 1/27/1998 */
-
-#include "pars0pars.h"
-
-#ifdef UNIV_NONINL
-#include "pars0pars.ic"
-#endif
-
-#include "row0sel.h"
-#include "row0ins.h"
-#include "row0upd.h"
-#include "dict0dict.h"
-#include "dict0mem.h"
-#include "dict0crea.h"
-#include "que0que.h"
-#include "pars0grm.h"
-#include "pars0opt.h"
-#include "data0data.h"
-#include "data0type.h"
-#include "trx0trx.h"
-#include "trx0roll.h"
-#include "lock0lock.h"
-#include "eval0eval.h"
-
-#ifdef UNIV_SQL_DEBUG
-/* If the following is set TRUE, the lexer will print the SQL string
-as it tokenizes it */
-
-ibool pars_print_lexed = FALSE;
-#endif /* UNIV_SQL_DEBUG */
-
-/* Global variable used while parsing a single procedure or query : the code is
-NOT re-entrant */
-sym_tab_t* pars_sym_tab_global;
-
-/* Global variables used to denote certain reserved words, used in
-constructing the parsing tree */
-
-pars_res_word_t pars_to_char_token = {PARS_TO_CHAR_TOKEN};
-pars_res_word_t pars_to_number_token = {PARS_TO_NUMBER_TOKEN};
-pars_res_word_t pars_to_binary_token = {PARS_TO_BINARY_TOKEN};
-pars_res_word_t pars_binary_to_number_token = {PARS_BINARY_TO_NUMBER_TOKEN};
-pars_res_word_t pars_substr_token = {PARS_SUBSTR_TOKEN};
-pars_res_word_t pars_replstr_token = {PARS_REPLSTR_TOKEN};
-pars_res_word_t pars_concat_token = {PARS_CONCAT_TOKEN};
-pars_res_word_t pars_instr_token = {PARS_INSTR_TOKEN};
-pars_res_word_t pars_length_token = {PARS_LENGTH_TOKEN};
-pars_res_word_t pars_sysdate_token = {PARS_SYSDATE_TOKEN};
-pars_res_word_t pars_printf_token = {PARS_PRINTF_TOKEN};
-pars_res_word_t pars_assert_token = {PARS_ASSERT_TOKEN};
-pars_res_word_t pars_rnd_token = {PARS_RND_TOKEN};
-pars_res_word_t pars_rnd_str_token = {PARS_RND_STR_TOKEN};
-pars_res_word_t pars_count_token = {PARS_COUNT_TOKEN};
-pars_res_word_t pars_sum_token = {PARS_SUM_TOKEN};
-pars_res_word_t pars_distinct_token = {PARS_DISTINCT_TOKEN};
-pars_res_word_t pars_binary_token = {PARS_BINARY_TOKEN};
-pars_res_word_t pars_blob_token = {PARS_BLOB_TOKEN};
-pars_res_word_t pars_int_token = {PARS_INT_TOKEN};
-pars_res_word_t pars_char_token = {PARS_CHAR_TOKEN};
-pars_res_word_t pars_float_token = {PARS_FLOAT_TOKEN};
-pars_res_word_t pars_update_token = {PARS_UPDATE_TOKEN};
-pars_res_word_t pars_asc_token = {PARS_ASC_TOKEN};
-pars_res_word_t pars_desc_token = {PARS_DESC_TOKEN};
-pars_res_word_t pars_open_token = {PARS_OPEN_TOKEN};
-pars_res_word_t pars_close_token = {PARS_CLOSE_TOKEN};
-pars_res_word_t pars_share_token = {PARS_SHARE_TOKEN};
-pars_res_word_t pars_unique_token = {PARS_UNIQUE_TOKEN};
-pars_res_word_t pars_clustered_token = {PARS_CLUSTERED_TOKEN};
-
-/* Global variable used to denote the '*' in SELECT * FROM.. */
-#define PARS_STAR_DENOTER 12345678
-ulint pars_star_denoter = PARS_STAR_DENOTER;
-
-
-/*************************************************************************
-Determines the class of a function code. */
-static
-ulint
-pars_func_get_class(
-/*================*/
- /* out: function class: PARS_FUNC_ARITH, ... */
- int func) /* in: function code: '=', PARS_GE_TOKEN, ... */
-{
- if ((func == '+') || (func == '-') || (func == '*') || (func == '/')) {
-
- return(PARS_FUNC_ARITH);
-
- } else if ((func == '=') || (func == '<') || (func == '>')
- || (func == PARS_GE_TOKEN) || (func == PARS_LE_TOKEN)
- || (func == PARS_NE_TOKEN)) {
-
- return(PARS_FUNC_CMP);
-
- } else if ((func == PARS_AND_TOKEN) || (func == PARS_OR_TOKEN)
- || (func == PARS_NOT_TOKEN)) {
-
- return(PARS_FUNC_LOGICAL);
-
- } else if ((func == PARS_COUNT_TOKEN) || (func == PARS_SUM_TOKEN)) {
-
- return(PARS_FUNC_AGGREGATE);
-
- } else if ((func == PARS_TO_CHAR_TOKEN)
- || (func == PARS_TO_NUMBER_TOKEN)
- || (func == PARS_TO_BINARY_TOKEN)
- || (func == PARS_BINARY_TO_NUMBER_TOKEN)
- || (func == PARS_SUBSTR_TOKEN)
- || (func == PARS_CONCAT_TOKEN)
- || (func == PARS_LENGTH_TOKEN)
- || (func == PARS_INSTR_TOKEN)
- || (func == PARS_SYSDATE_TOKEN)
- || (func == PARS_NOTFOUND_TOKEN)
- || (func == PARS_PRINTF_TOKEN)
- || (func == PARS_ASSERT_TOKEN)
- || (func == PARS_RND_TOKEN)
- || (func == PARS_RND_STR_TOKEN)
- || (func == PARS_REPLSTR_TOKEN)) {
-
- return(PARS_FUNC_PREDEFINED);
- } else {
- return(PARS_FUNC_OTHER);
- }
-}
-
-/*************************************************************************
-Parses an operator or predefined function expression. */
-static
-func_node_t*
-pars_func_low(
-/*==========*/
- /* out, own: function node in a query tree */
- int func, /* in: function token code */
- que_node_t* arg) /* in: first argument in the argument list */
-{
- func_node_t* node;
-
- node = mem_heap_alloc(pars_sym_tab_global->heap, sizeof(func_node_t));
-
- node->common.type = QUE_NODE_FUNC;
- dfield_set_data(&(node->common.val), NULL, 0);
- node->common.val_buf_size = 0;
-
- node->func = func;
-
- node->class = pars_func_get_class(func);
-
- node->args = arg;
-
- UT_LIST_ADD_LAST(func_node_list, pars_sym_tab_global->func_node_list,
- node);
- return(node);
-}
-
-/*************************************************************************
-Parses a function expression. */
-
-func_node_t*
-pars_func(
-/*======*/
- /* out, own: function node in a query tree */
- que_node_t* res_word,/* in: function name reserved word */
- que_node_t* arg) /* in: first argument in the argument list */
-{
- return(pars_func_low(((pars_res_word_t*)res_word)->code, arg));
-}
-
-/*************************************************************************
-Parses an operator expression. */
-
-func_node_t*
-pars_op(
-/*====*/
- /* out, own: function node in a query tree */
- int func, /* in: operator token code */
- que_node_t* arg1, /* in: first argument */
- que_node_t* arg2) /* in: second argument or NULL for an unary
- operator */
-{
- que_node_list_add_last(NULL, arg1);
-
- if (arg2) {
- que_node_list_add_last(arg1, arg2);
- }
-
- return(pars_func_low(func, arg1));
-}
-
-/*************************************************************************
-Parses an ORDER BY clause. Order by a single column only is supported. */
-
-order_node_t*
-pars_order_by(
-/*==========*/
- /* out, own: order-by node in a query tree */
- sym_node_t* column, /* in: column name */
- pars_res_word_t* asc) /* in: &pars_asc_token or pars_desc_token */
-{
- order_node_t* node;
-
- node = mem_heap_alloc(pars_sym_tab_global->heap, sizeof(order_node_t));
-
- node->common.type = QUE_NODE_ORDER;
-
- node->column = column;
-
- if (asc == &pars_asc_token) {
- node->asc = TRUE;
- } else {
- ut_a(asc == &pars_desc_token);
- node->asc = FALSE;
- }
-
- return(node);
-}
-
-/*************************************************************************
-Resolves the data type of a function in an expression. The argument data
-types must already be resolved. */
-static
-void
-pars_resolve_func_data_type(
-/*========================*/
- func_node_t* node) /* in: function node */
-{
- que_node_t* arg;
- ulint func;
-
- ut_a(que_node_get_type(node) == QUE_NODE_FUNC);
-
- arg = node->args;
-
- func = node->func;
-
- if ((func == PARS_SUM_TOKEN)
- || (func == '+') || (func == '-') || (func == '*')
- || (func == '/') || (func == '+')) {
-
- /* Inherit the data type from the first argument (which must
- not be the SQL null literal whose type is DATA_ERROR) */
-
- dtype_copy(que_node_get_data_type(node),
- que_node_get_data_type(arg));
-
- ut_a(dtype_get_mtype(que_node_get_data_type(node))
- == DATA_INT);
- } else if (func == PARS_COUNT_TOKEN) {
- ut_a(arg);
- dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
-
- } else if (func == PARS_TO_CHAR_TOKEN) {
- ut_a(dtype_get_mtype(que_node_get_data_type(arg)) == DATA_INT);
- dtype_set(que_node_get_data_type(node), DATA_VARCHAR,
- DATA_ENGLISH, 0);
- } else if (func == PARS_TO_BINARY_TOKEN) {
- if (dtype_get_mtype(que_node_get_data_type(arg)) == DATA_INT) {
- dtype_set(que_node_get_data_type(node), DATA_VARCHAR,
- DATA_ENGLISH, 0);
- } else {
- dtype_set(que_node_get_data_type(node), DATA_BINARY,
- 0, 0);
- }
- } else if (func == PARS_TO_NUMBER_TOKEN) {
- ut_a(dtype_get_mtype(que_node_get_data_type(arg))
- == DATA_VARCHAR);
- dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
-
- } else if (func == PARS_BINARY_TO_NUMBER_TOKEN) {
- ut_a(dtype_get_mtype(que_node_get_data_type(arg))
- == DATA_VARCHAR);
- dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
-
- } else if (func == PARS_LENGTH_TOKEN) {
- ut_a(dtype_get_mtype(que_node_get_data_type(arg))
- == DATA_VARCHAR);
- dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
-
- } else if (func == PARS_INSTR_TOKEN) {
- ut_a(dtype_get_mtype(que_node_get_data_type(arg))
- == DATA_VARCHAR);
- dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
-
- } else if (func == PARS_SYSDATE_TOKEN) {
- ut_a(arg == NULL);
- dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
-
- } else if ((func == PARS_SUBSTR_TOKEN)
- || (func == PARS_CONCAT_TOKEN)) {
-
- ut_a(dtype_get_mtype(que_node_get_data_type(arg))
- == DATA_VARCHAR);
- dtype_set(que_node_get_data_type(node), DATA_VARCHAR,
- DATA_ENGLISH, 0);
-
- } else if ((func == '>') || (func == '<') || (func == '=')
- || (func == PARS_GE_TOKEN)
- || (func == PARS_LE_TOKEN)
- || (func == PARS_NE_TOKEN)
- || (func == PARS_AND_TOKEN)
- || (func == PARS_OR_TOKEN)
- || (func == PARS_NOT_TOKEN)
- || (func == PARS_NOTFOUND_TOKEN)) {
-
- /* We currently have no iboolean type: use integer type */
- dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
-
- } else if (func == PARS_RND_TOKEN) {
- ut_a(dtype_get_mtype(que_node_get_data_type(arg)) == DATA_INT);
-
- dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
-
- } else if (func == PARS_RND_STR_TOKEN) {
- ut_a(dtype_get_mtype(que_node_get_data_type(arg)) == DATA_INT);
-
- dtype_set(que_node_get_data_type(node), DATA_VARCHAR,
- DATA_ENGLISH, 0);
- } else {
- ut_error;
- }
-}
-
-/*************************************************************************
-Resolves the meaning of variables in an expression and the data types of
-functions. It is an error if some identifier cannot be resolved here. */
-static
-void
-pars_resolve_exp_variables_and_types(
-/*=================================*/
- sel_node_t* select_node, /* in: select node or NULL; if
- this is not NULL then the variable
- sym nodes are added to the
- copy_variables list of select_node */
- que_node_t* exp_node) /* in: expression */
-{
- func_node_t* func_node;
- que_node_t* arg;
- sym_node_t* sym_node;
- sym_node_t* node;
-
- ut_a(exp_node);
-
- if (que_node_get_type(exp_node) == QUE_NODE_FUNC) {
- func_node = exp_node;
-
- arg = func_node->args;
-
- while (arg) {
- pars_resolve_exp_variables_and_types(select_node, arg);
-
- arg = que_node_get_next(arg);
- }
-
- pars_resolve_func_data_type(func_node);
-
- return;
- }
-
- ut_a(que_node_get_type(exp_node) == QUE_NODE_SYMBOL);
-
- sym_node = exp_node;
-
- if (sym_node->resolved) {
-
- return;
- }
-
- /* Not resolved yet: look in the symbol table for a variable
- or a cursor or a function with the same name */
-
- node = UT_LIST_GET_FIRST(pars_sym_tab_global->sym_list);
-
- while (node) {
- if (node->resolved
- && ((node->token_type == SYM_VAR)
- || (node->token_type == SYM_CURSOR)
- || (node->token_type == SYM_FUNCTION))
- && node->name
- && (sym_node->name_len == node->name_len)
- && (ut_memcmp(sym_node->name, node->name,
- node->name_len) == 0)) {
-
- /* Found a variable or a cursor declared with
- the same name */
-
- break;
- }
-
- node = UT_LIST_GET_NEXT(sym_list, node);
- }
-
- if (!node) {
- fprintf(stderr, "PARSER ERROR: Unresolved identifier %s\n",
- sym_node->name);
- }
-
- ut_a(node);
-
- sym_node->resolved = TRUE;
- sym_node->token_type = SYM_IMPLICIT_VAR;
- sym_node->alias = node;
- sym_node->indirection = node;
-
- if (select_node) {
- UT_LIST_ADD_LAST(col_var_list, select_node->copy_variables,
- sym_node);
- }
-
- dfield_set_type(que_node_get_val(sym_node),
- que_node_get_data_type(node));
-}
-
-/*************************************************************************
-Resolves the meaning of variables in an expression list. It is an error if
-some identifier cannot be resolved here. Resolves also the data types of
-functions. */
-static
-void
-pars_resolve_exp_list_variables_and_types(
-/*======================================*/
- sel_node_t* select_node, /* in: select node or NULL */
- que_node_t* exp_node) /* in: expression list first node, or
- NULL */
-{
- while (exp_node) {
- pars_resolve_exp_variables_and_types(select_node, exp_node);
-
- exp_node = que_node_get_next(exp_node);
- }
-}
-
-/*************************************************************************
-Resolves the columns in an expression. */
-static
-void
-pars_resolve_exp_columns(
-/*=====================*/
- sym_node_t* table_node, /* in: first node in a table list */
- que_node_t* exp_node) /* in: expression */
-{
- func_node_t* func_node;
- que_node_t* arg;
- sym_node_t* sym_node;
- dict_table_t* table;
- sym_node_t* t_node;
- ulint n_cols;
- ulint i;
-
- ut_a(exp_node);
-
- if (que_node_get_type(exp_node) == QUE_NODE_FUNC) {
- func_node = exp_node;
-
- arg = func_node->args;
-
- while (arg) {
- pars_resolve_exp_columns(table_node, arg);
-
- arg = que_node_get_next(arg);
- }
-
- return;
- }
-
- ut_a(que_node_get_type(exp_node) == QUE_NODE_SYMBOL);
-
- sym_node = exp_node;
-
- if (sym_node->resolved) {
-
- return;
- }
-
- /* Not resolved yet: look in the table list for a column with the
- same name */
-
- t_node = table_node;
-
- while (t_node) {
- table = t_node->table;
-
- n_cols = dict_table_get_n_cols(table);
-
- for (i = 0; i < n_cols; i++) {
- const dict_col_t* col
- = dict_table_get_nth_col(table, i);
- const char* col_name
- = dict_table_get_col_name(table, i);
-
- if ((sym_node->name_len == ut_strlen(col_name))
- && (0 == ut_memcmp(sym_node->name, col_name,
- sym_node->name_len))) {
- /* Found */
- sym_node->resolved = TRUE;
- sym_node->token_type = SYM_COLUMN;
- sym_node->table = table;
- sym_node->col_no = i;
- sym_node->prefetch_buf = NULL;
-
- dict_col_copy_type(
- col,
- dfield_get_type(&sym_node
- ->common.val));
-
- return;
- }
- }
-
- t_node = que_node_get_next(t_node);
- }
-}
-
-/*************************************************************************
-Resolves the meaning of columns in an expression list. */
-static
-void
-pars_resolve_exp_list_columns(
-/*==========================*/
- sym_node_t* table_node, /* in: first node in a table list */
- que_node_t* exp_node) /* in: expression list first node, or
- NULL */
-{
- while (exp_node) {
- pars_resolve_exp_columns(table_node, exp_node);
-
- exp_node = que_node_get_next(exp_node);
- }
-}
-
-/*************************************************************************
-Retrieves the table definition for a table name id. */
-static
-void
-pars_retrieve_table_def(
-/*====================*/
- sym_node_t* sym_node) /* in: table node */
-{
- const char* table_name;
-
- ut_a(sym_node);
- ut_a(que_node_get_type(sym_node) == QUE_NODE_SYMBOL);
-
- sym_node->resolved = TRUE;
- sym_node->token_type = SYM_TABLE;
-
- table_name = (const char*) sym_node->name;
-
- sym_node->table = dict_table_get_low(table_name);
-
- ut_a(sym_node->table);
-}
-
-/*************************************************************************
-Retrieves the table definitions for a list of table name ids. */
-static
-ulint
-pars_retrieve_table_list_defs(
-/*==========================*/
- /* out: number of tables */
- sym_node_t* sym_node) /* in: first table node in list */
-{
- ulint count = 0;
-
- if (sym_node == NULL) {
-
- return(count);
- }
-
- while (sym_node) {
- pars_retrieve_table_def(sym_node);
-
- count++;
-
- sym_node = que_node_get_next(sym_node);
- }
-
- return(count);
-}
-
-/*************************************************************************
-Adds all columns to the select list if the query is SELECT * FROM ... */
-static
-void
-pars_select_all_columns(
-/*====================*/
- sel_node_t* select_node) /* in: select node already containing
- the table list */
-{
- sym_node_t* col_node;
- sym_node_t* table_node;
- dict_table_t* table;
- ulint i;
-
- select_node->select_list = NULL;
-
- table_node = select_node->table_list;
-
- while (table_node) {
- table = table_node->table;
-
- for (i = 0; i < dict_table_get_n_user_cols(table); i++) {
- const char* col_name = dict_table_get_col_name(
- table, i);
-
- col_node = sym_tab_add_id(pars_sym_tab_global,
- (byte*)col_name,
- ut_strlen(col_name));
-
- select_node->select_list = que_node_list_add_last(
- select_node->select_list, col_node);
- }
-
- table_node = que_node_get_next(table_node);
- }
-}
-
-/*************************************************************************
-Parses a select list; creates a query graph node for the whole SELECT
-statement. */
-
-sel_node_t*
-pars_select_list(
-/*=============*/
- /* out, own: select node in a query
- tree */
- que_node_t* select_list, /* in: select list */
- sym_node_t* into_list) /* in: variables list or NULL */
-{
- sel_node_t* node;
-
- node = sel_node_create(pars_sym_tab_global->heap);
-
- node->select_list = select_list;
- node->into_list = into_list;
-
- pars_resolve_exp_list_variables_and_types(NULL, into_list);
-
- return(node);
-}
-
-/*************************************************************************
-Checks if the query is an aggregate query, in which case the selct list must
-contain only aggregate function items. */
-static
-void
-pars_check_aggregate(
-/*=================*/
- sel_node_t* select_node) /* in: select node already containing
- the select list */
-{
- que_node_t* exp_node;
- func_node_t* func_node;
- ulint n_nodes = 0;
- ulint n_aggregate_nodes = 0;
-
- exp_node = select_node->select_list;
-
- while (exp_node) {
-
- n_nodes++;
-
- if (que_node_get_type(exp_node) == QUE_NODE_FUNC) {
-
- func_node = exp_node;
-
- if (func_node->class == PARS_FUNC_AGGREGATE) {
-
- n_aggregate_nodes++;
- }
- }
-
- exp_node = que_node_get_next(exp_node);
- }
-
- if (n_aggregate_nodes > 0) {
- ut_a(n_nodes == n_aggregate_nodes);
-
- select_node->is_aggregate = TRUE;
- } else {
- select_node->is_aggregate = FALSE;
- }
-}
-
-/*************************************************************************
-Parses a select statement. */
-
-sel_node_t*
-pars_select_statement(
-/*==================*/
- /* out, own: select node in a query
- tree */
- sel_node_t* select_node, /* in: select node already containing
- the select list */
- sym_node_t* table_list, /* in: table list */
- que_node_t* search_cond, /* in: search condition or NULL */
- pars_res_word_t* for_update, /* in: NULL or &pars_update_token */
- pars_res_word_t* lock_shared, /* in: NULL or &pars_share_token */
- order_node_t* order_by) /* in: NULL or an order-by node */
-{
- select_node->state = SEL_NODE_OPEN;
-
- select_node->table_list = table_list;
- select_node->n_tables = pars_retrieve_table_list_defs(table_list);
-
- if (select_node->select_list == &pars_star_denoter) {
-
- /* SELECT * FROM ... */
- pars_select_all_columns(select_node);
- }
-
- if (select_node->into_list) {
- ut_a(que_node_list_get_len(select_node->into_list)
- == que_node_list_get_len(select_node->select_list));
- }
-
- UT_LIST_INIT(select_node->copy_variables);
-
- pars_resolve_exp_list_columns(table_list, select_node->select_list);
- pars_resolve_exp_list_variables_and_types(select_node,
- select_node->select_list);
- pars_check_aggregate(select_node);
-
- select_node->search_cond = search_cond;
-
- if (search_cond) {
- pars_resolve_exp_columns(table_list, search_cond);
- pars_resolve_exp_variables_and_types(select_node, search_cond);
- }
-
- if (for_update) {
- ut_a(!lock_shared);
-
- select_node->set_x_locks = TRUE;
- select_node->row_lock_mode = LOCK_X;
-
- select_node->consistent_read = FALSE;
- select_node->read_view = NULL;
- } else if (lock_shared){
- select_node->set_x_locks = FALSE;
- select_node->row_lock_mode = LOCK_S;
-
- select_node->consistent_read = FALSE;
- select_node->read_view = NULL;
- } else {
- select_node->set_x_locks = FALSE;
- select_node->row_lock_mode = LOCK_S;
-
- select_node->consistent_read = TRUE;
- }
-
- select_node->order_by = order_by;
-
- if (order_by) {
- pars_resolve_exp_columns(table_list, order_by->column);
- }
-
- /* The final value of the following fields depend on the environment
- where the select statement appears: */
-
- select_node->can_get_updated = FALSE;
- select_node->explicit_cursor = NULL;
-
- opt_search_plan(select_node);
-
- return(select_node);
-}
-
-/*************************************************************************
-Parses a cursor declaration. */
-
-que_node_t*
-pars_cursor_declaration(
-/*====================*/
- /* out: sym_node */
- sym_node_t* sym_node, /* in: cursor id node in the symbol
- table */
- sel_node_t* select_node) /* in: select node */
-{
- sym_node->resolved = TRUE;
- sym_node->token_type = SYM_CURSOR;
- sym_node->cursor_def = select_node;
-
- select_node->state = SEL_NODE_CLOSED;
- select_node->explicit_cursor = sym_node;
-
- return(sym_node);
-}
-
-/*************************************************************************
-Parses a function declaration. */
-
-que_node_t*
-pars_function_declaration(
-/*======================*/
- /* out: sym_node */
- sym_node_t* sym_node) /* in: function id node in the symbol
- table */
-{
- sym_node->resolved = TRUE;
- sym_node->token_type = SYM_FUNCTION;
-
- /* Check that the function exists. */
- ut_a(pars_info_get_user_func(pars_sym_tab_global->info,
- sym_node->name));
-
- return(sym_node);
-}
-
-/*************************************************************************
-Parses a delete or update statement start. */
-
-upd_node_t*
-pars_update_statement_start(
-/*========================*/
- /* out, own: update node in a query
- tree */
- ibool is_delete, /* in: TRUE if delete */
- sym_node_t* table_sym, /* in: table name node */
- col_assign_node_t* col_assign_list)/* in: column assignment list, NULL
- if delete */
-{
- upd_node_t* node;
-
- node = upd_node_create(pars_sym_tab_global->heap);
-
- node->is_delete = is_delete;
-
- node->table_sym = table_sym;
- node->col_assign_list = col_assign_list;
-
- return(node);
-}
-
-/*************************************************************************
-Parses a column assignment in an update. */
-
-col_assign_node_t*
-pars_column_assignment(
-/*===================*/
- /* out: column assignment node */
- sym_node_t* column, /* in: column to assign */
- que_node_t* exp) /* in: value to assign */
-{
- col_assign_node_t* node;
-
- node = mem_heap_alloc(pars_sym_tab_global->heap,
- sizeof(col_assign_node_t));
- node->common.type = QUE_NODE_COL_ASSIGNMENT;
-
- node->col = column;
- node->val = exp;
-
- return(node);
-}
-
-/*************************************************************************
-Processes an update node assignment list. */
-static
-void
-pars_process_assign_list(
-/*=====================*/
- upd_node_t* node) /* in: update node */
-{
- col_assign_node_t* col_assign_list;
- sym_node_t* table_sym;
- col_assign_node_t* assign_node;
- upd_field_t* upd_field;
- dict_index_t* clust_index;
- sym_node_t* col_sym;
- ulint changes_ord_field;
- ulint changes_field_size;
- ulint n_assigns;
- ulint i;
-
- table_sym = node->table_sym;
- col_assign_list = node->col_assign_list;
- clust_index = dict_table_get_first_index(node->table);
-
- assign_node = col_assign_list;
- n_assigns = 0;
-
- while (assign_node) {
- pars_resolve_exp_columns(table_sym, assign_node->col);
- pars_resolve_exp_columns(table_sym, assign_node->val);
- pars_resolve_exp_variables_and_types(NULL, assign_node->val);
-#if 0
- ut_a(dtype_get_mtype(
- dfield_get_type(que_node_get_val(
- assign_node->col)))
- == dtype_get_mtype(
- dfield_get_type(que_node_get_val(
- assign_node->val))));
-#endif
-
- /* Add to the update node all the columns found in assignment
- values as columns to copy: therefore, TRUE */
-
- opt_find_all_cols(TRUE, clust_index, &(node->columns), NULL,
- assign_node->val);
- n_assigns++;
-
- assign_node = que_node_get_next(assign_node);
- }
-
- node->update = upd_create(n_assigns, pars_sym_tab_global->heap);
-
- assign_node = col_assign_list;
-
- changes_field_size = UPD_NODE_NO_SIZE_CHANGE;
-
- for (i = 0; i < n_assigns; i++) {
- upd_field = upd_get_nth_field(node->update, i);
-
- col_sym = assign_node->col;
-
- upd_field_set_field_no(upd_field, dict_index_get_nth_col_pos(
- clust_index, col_sym->col_no),
- clust_index, NULL);
- upd_field->exp = assign_node->val;
-
- if (!dict_col_get_fixed_size(
- dict_index_get_nth_col(clust_index,
- upd_field->field_no))) {
- changes_field_size = 0;
- }
-
- assign_node = que_node_get_next(assign_node);
- }
-
- /* Find out if the update can modify an ordering field in any index */
-
- changes_ord_field = UPD_NODE_NO_ORD_CHANGE;
-
- if (row_upd_changes_some_index_ord_field_binary(node->table,
- node->update)) {
- changes_ord_field = 0;
- }
-
- node->cmpl_info = changes_ord_field | changes_field_size;
-}
-
-/*************************************************************************
-Parses an update or delete statement. */
-
-upd_node_t*
-pars_update_statement(
-/*==================*/
- /* out, own: update node in a query
- tree */
- upd_node_t* node, /* in: update node */
- sym_node_t* cursor_sym, /* in: pointer to a cursor entry in
- the symbol table or NULL */
- que_node_t* search_cond) /* in: search condition or NULL */
-{
- sym_node_t* table_sym;
- sel_node_t* sel_node;
- plan_t* plan;
-
- table_sym = node->table_sym;
-
- pars_retrieve_table_def(table_sym);
- node->table = table_sym->table;
-
- UT_LIST_INIT(node->columns);
-
- /* Make the single table node into a list of table nodes of length 1 */
-
- que_node_list_add_last(NULL, table_sym);
-
- if (cursor_sym) {
- pars_resolve_exp_variables_and_types(NULL, cursor_sym);
-
- sel_node = cursor_sym->alias->cursor_def;
-
- node->searched_update = FALSE;
- } else {
- sel_node = pars_select_list(NULL, NULL);
-
- pars_select_statement(sel_node, table_sym, search_cond, NULL,
- &pars_share_token, NULL);
- node->searched_update = TRUE;
- sel_node->common.parent = node;
- }
-
- node->select = sel_node;
-
- ut_a(!node->is_delete || (node->col_assign_list == NULL));
- ut_a(node->is_delete || (node->col_assign_list != NULL));
-
- if (node->is_delete) {
- node->cmpl_info = 0;
- } else {
- pars_process_assign_list(node);
- }
-
- if (node->searched_update) {
- node->has_clust_rec_x_lock = TRUE;
- sel_node->set_x_locks = TRUE;
- sel_node->row_lock_mode = LOCK_X;
- } else {
- node->has_clust_rec_x_lock = sel_node->set_x_locks;
- }
-
- ut_a(sel_node->n_tables == 1);
- ut_a(sel_node->consistent_read == FALSE);
- ut_a(sel_node->order_by == NULL);
- ut_a(sel_node->is_aggregate == FALSE);
-
- sel_node->can_get_updated = TRUE;
-
- node->state = UPD_NODE_UPDATE_CLUSTERED;
-
- plan = sel_node_get_nth_plan(sel_node, 0);
-
- plan->no_prefetch = TRUE;
-
- if (!((plan->index)->type & DICT_CLUSTERED)) {
-
- plan->must_get_clust = TRUE;
-
- node->pcur = &(plan->clust_pcur);
- } else {
- node->pcur = &(plan->pcur);
- }
-
- if (!node->is_delete && node->searched_update
- && (node->cmpl_info & UPD_NODE_NO_SIZE_CHANGE)
- && (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
-
- /* The select node can perform the update in-place */
-
- ut_a(plan->asc);
-
- node->select_will_do_update = TRUE;
- sel_node->select_will_do_update = TRUE;
- sel_node->latch_mode = BTR_MODIFY_LEAF;
- }
-
- return(node);
-}
-
-/*************************************************************************
-Parses an insert statement. */
-
-ins_node_t*
-pars_insert_statement(
-/*==================*/
- /* out, own: update node in a query
- tree */
- sym_node_t* table_sym, /* in: table name node */
- que_node_t* values_list, /* in: value expression list or NULL */
- sel_node_t* select) /* in: select condition or NULL */
-{
- ins_node_t* node;
- dtuple_t* row;
- ulint ins_type;
-
- ut_a(values_list || select);
- ut_a(!values_list || !select);
-
- if (values_list) {
- ins_type = INS_VALUES;
- } else {
- ins_type = INS_SEARCHED;
- }
-
- pars_retrieve_table_def(table_sym);
-
- node = ins_node_create(ins_type, table_sym->table,
- pars_sym_tab_global->heap);
-
- row = dtuple_create(pars_sym_tab_global->heap,
- dict_table_get_n_cols(node->table));
-
- dict_table_copy_types(row, table_sym->table);
-
- ins_node_set_new_row(node, row);
-
- node->select = select;
-
- if (select) {
- select->common.parent = node;
-
- ut_a(que_node_list_get_len(select->select_list)
- == dict_table_get_n_user_cols(table_sym->table));
- }
-
- node->values_list = values_list;
-
- if (node->values_list) {
- pars_resolve_exp_list_variables_and_types(NULL, values_list);
-
- ut_a(que_node_list_get_len(values_list)
- == dict_table_get_n_user_cols(table_sym->table));
- }
-
- return(node);
-}
-
-/*************************************************************************
-Set the type of a dfield. */
-static
-void
-pars_set_dfield_type(
-/*=================*/
- dfield_t* dfield, /* in: dfield */
- pars_res_word_t* type, /* in: pointer to a type
- token */
- ulint len, /* in: length, or 0 */
- ibool is_unsigned, /* in: if TRUE, column is
- UNSIGNED. */
- ibool is_not_null) /* in: if TRUE, column is
- NOT NULL. */
-{
- ulint flags = 0;
-
- if (is_not_null) {
- flags |= DATA_NOT_NULL;
- }
-
- if (is_unsigned) {
- flags |= DATA_UNSIGNED;
- }
-
- if (type == &pars_int_token) {
- ut_a(len == 0);
-
- dtype_set(dfield_get_type(dfield), DATA_INT, flags, 4);
-
- } else if (type == &pars_char_token) {
- ut_a(len == 0);
-
- dtype_set(dfield_get_type(dfield), DATA_VARCHAR,
- DATA_ENGLISH | flags, 0);
- } else if (type == &pars_binary_token) {
- ut_a(len != 0);
-
- dtype_set(dfield_get_type(dfield), DATA_FIXBINARY,
- DATA_BINARY_TYPE | flags, len);
- } else if (type == &pars_blob_token) {
- ut_a(len == 0);
-
- dtype_set(dfield_get_type(dfield), DATA_BLOB,
- DATA_BINARY_TYPE | flags, 0);
- } else {
- ut_error;
- }
-}
-
-/*************************************************************************
-Parses a variable declaration. */
-
-sym_node_t*
-pars_variable_declaration(
-/*======================*/
- /* out, own: symbol table node of type
- SYM_VAR */
- sym_node_t* node, /* in: symbol table node allocated for the
- id of the variable */
- pars_res_word_t* type) /* in: pointer to a type token */
-{
- node->resolved = TRUE;
- node->token_type = SYM_VAR;
-
- node->param_type = PARS_NOT_PARAM;
-
- pars_set_dfield_type(que_node_get_val(node), type, 0, FALSE, FALSE);
-
- return(node);
-}
-
-/*************************************************************************
-Parses a procedure parameter declaration. */
-
-sym_node_t*
-pars_parameter_declaration(
-/*=======================*/
- /* out, own: symbol table node of type
- SYM_VAR */
- sym_node_t* node, /* in: symbol table node allocated for the
- id of the parameter */
- ulint param_type,
- /* in: PARS_INPUT or PARS_OUTPUT */
- pars_res_word_t* type) /* in: pointer to a type token */
-{
- ut_a((param_type == PARS_INPUT) || (param_type == PARS_OUTPUT));
-
- pars_variable_declaration(node, type);
-
- node->param_type = param_type;
-
- return(node);
-}
-
-/*************************************************************************
-Sets the parent field in a query node list. */
-static
-void
-pars_set_parent_in_list(
-/*====================*/
- que_node_t* node_list, /* in: first node in a list */
- que_node_t* parent) /* in: parent value to set in all
- nodes of the list */
-{
- que_common_t* common;
-
- common = node_list;
-
- while (common) {
- common->parent = parent;
-
- common = que_node_get_next(common);
- }
-}
-
-/*************************************************************************
-Parses an elsif element. */
-
-elsif_node_t*
-pars_elsif_element(
-/*===============*/
- /* out: elsif node */
- que_node_t* cond, /* in: if-condition */
- que_node_t* stat_list) /* in: statement list */
-{
- elsif_node_t* node;
-
- node = mem_heap_alloc(pars_sym_tab_global->heap, sizeof(elsif_node_t));
-
- node->common.type = QUE_NODE_ELSIF;
-
- node->cond = cond;
-
- pars_resolve_exp_variables_and_types(NULL, cond);
-
- node->stat_list = stat_list;
-
- return(node);
-}
-
-/*************************************************************************
-Parses an if-statement. */
-
-if_node_t*
-pars_if_statement(
-/*==============*/
- /* out: if-statement node */
- que_node_t* cond, /* in: if-condition */
- que_node_t* stat_list, /* in: statement list */
- que_node_t* else_part) /* in: else-part statement list
- or elsif element list */
-{
- if_node_t* node;
- elsif_node_t* elsif_node;
-
- node = mem_heap_alloc(pars_sym_tab_global->heap, sizeof(if_node_t));
-
- node->common.type = QUE_NODE_IF;
-
- node->cond = cond;
-
- pars_resolve_exp_variables_and_types(NULL, cond);
-
- node->stat_list = stat_list;
-
- if (else_part && (que_node_get_type(else_part) == QUE_NODE_ELSIF)) {
-
- /* There is a list of elsif conditions */
-
- node->else_part = NULL;
- node->elsif_list = else_part;
-
- elsif_node = else_part;
-
- while (elsif_node) {
- pars_set_parent_in_list(elsif_node->stat_list, node);
-
- elsif_node = que_node_get_next(elsif_node);
- }
- } else {
- node->else_part = else_part;
- node->elsif_list = NULL;
-
- pars_set_parent_in_list(else_part, node);
- }
-
- pars_set_parent_in_list(stat_list, node);
-
- return(node);
-}
-
-/*************************************************************************
-Parses a while-statement. */
-
-while_node_t*
-pars_while_statement(
-/*=================*/
- /* out: while-statement node */
- que_node_t* cond, /* in: while-condition */
- que_node_t* stat_list) /* in: statement list */
-{
- while_node_t* node;
-
- node = mem_heap_alloc(pars_sym_tab_global->heap, sizeof(while_node_t));
-
- node->common.type = QUE_NODE_WHILE;
-
- node->cond = cond;
-
- pars_resolve_exp_variables_and_types(NULL, cond);
-
- node->stat_list = stat_list;
-
- pars_set_parent_in_list(stat_list, node);
-
- return(node);
-}
-
-/*************************************************************************
-Parses a for-loop-statement. */
-
-for_node_t*
-pars_for_statement(
-/*===============*/
- /* out: for-statement node */
- sym_node_t* loop_var, /* in: loop variable */
- que_node_t* loop_start_limit,/* in: loop start expression */
- que_node_t* loop_end_limit, /* in: loop end expression */
- que_node_t* stat_list) /* in: statement list */
-{
- for_node_t* node;
-
- node = mem_heap_alloc(pars_sym_tab_global->heap, sizeof(for_node_t));
-
- node->common.type = QUE_NODE_FOR;
-
- pars_resolve_exp_variables_and_types(NULL, loop_var);
- pars_resolve_exp_variables_and_types(NULL, loop_start_limit);
- pars_resolve_exp_variables_and_types(NULL, loop_end_limit);
-
- node->loop_var = loop_var->indirection;
-
- ut_a(loop_var->indirection);
-
- node->loop_start_limit = loop_start_limit;
- node->loop_end_limit = loop_end_limit;
-
- node->stat_list = stat_list;
-
- pars_set_parent_in_list(stat_list, node);
-
- return(node);
-}
-
-/*************************************************************************
-Parses an exit statement. */
-
-exit_node_t*
-pars_exit_statement(void)
-/*=====================*/
- /* out: exit statement node */
-{
- exit_node_t* node;
-
- node = mem_heap_alloc(pars_sym_tab_global->heap, sizeof(exit_node_t));
- node->common.type = QUE_NODE_EXIT;
-
- return(node);
-}
-
-/*************************************************************************
-Parses a return-statement. */
-
-return_node_t*
-pars_return_statement(void)
-/*=======================*/
- /* out: return-statement node */
-{
- return_node_t* node;
-
- node = mem_heap_alloc(pars_sym_tab_global->heap,
- sizeof(return_node_t));
- node->common.type = QUE_NODE_RETURN;
-
- return(node);
-}
-
-/*************************************************************************
-Parses an assignment statement. */
-
-assign_node_t*
-pars_assignment_statement(
-/*======================*/
- /* out: assignment statement node */
- sym_node_t* var, /* in: variable to assign */
- que_node_t* val) /* in: value to assign */
-{
- assign_node_t* node;
-
- node = mem_heap_alloc(pars_sym_tab_global->heap,
- sizeof(assign_node_t));
- node->common.type = QUE_NODE_ASSIGNMENT;
-
- node->var = var;
- node->val = val;
-
- pars_resolve_exp_variables_and_types(NULL, var);
- pars_resolve_exp_variables_and_types(NULL, val);
-
- ut_a(dtype_get_mtype(dfield_get_type(que_node_get_val(var)))
- == dtype_get_mtype(dfield_get_type(que_node_get_val(val))));
-
- return(node);
-}
-
-/*************************************************************************
-Parses a procedure call. */
-
-func_node_t*
-pars_procedure_call(
-/*================*/
- /* out: function node */
- que_node_t* res_word,/* in: procedure name reserved word */
- que_node_t* args) /* in: argument list */
-{
- func_node_t* node;
-
- node = pars_func(res_word, args);
-
- pars_resolve_exp_list_variables_and_types(NULL, args);
-
- return(node);
-}
-
-/*************************************************************************
-Parses a fetch statement. into_list or user_func (but not both) must be
-non-NULL. */
-
-fetch_node_t*
-pars_fetch_statement(
-/*=================*/
- /* out: fetch statement node */
- sym_node_t* cursor, /* in: cursor node */
- sym_node_t* into_list, /* in: variables to set, or NULL */
- sym_node_t* user_func) /* in: user function name, or NULL */
-{
- sym_node_t* cursor_decl;
- fetch_node_t* node;
-
- /* Logical XOR. */
- ut_a(!into_list != !user_func);
-
- node = mem_heap_alloc(pars_sym_tab_global->heap, sizeof(fetch_node_t));
-
- node->common.type = QUE_NODE_FETCH;
-
- pars_resolve_exp_variables_and_types(NULL, cursor);
-
- if (into_list) {
- pars_resolve_exp_list_variables_and_types(NULL, into_list);
- node->into_list = into_list;
- node->func = NULL;
- } else {
- pars_resolve_exp_variables_and_types(NULL, user_func);
-
- node->func = pars_info_get_user_func(pars_sym_tab_global->info,
- user_func->name);
- ut_a(node->func);
-
- node->into_list = NULL;
- }
-
- cursor_decl = cursor->alias;
-
- ut_a(cursor_decl->token_type == SYM_CURSOR);
-
- node->cursor_def = cursor_decl->cursor_def;
-
- if (into_list) {
- ut_a(que_node_list_get_len(into_list)
- == que_node_list_get_len(node->cursor_def->select_list));
- }
-
- return(node);
-}
-
-/*************************************************************************
-Parses an open or close cursor statement. */
-
-open_node_t*
-pars_open_statement(
-/*================*/
- /* out: fetch statement node */
- ulint type, /* in: ROW_SEL_OPEN_CURSOR
- or ROW_SEL_CLOSE_CURSOR */
- sym_node_t* cursor) /* in: cursor node */
-{
- sym_node_t* cursor_decl;
- open_node_t* node;
-
- node = mem_heap_alloc(pars_sym_tab_global->heap, sizeof(open_node_t));
-
- node->common.type = QUE_NODE_OPEN;
-
- pars_resolve_exp_variables_and_types(NULL, cursor);
-
- cursor_decl = cursor->alias;
-
- ut_a(cursor_decl->token_type == SYM_CURSOR);
-
- node->op_type = type;
- node->cursor_def = cursor_decl->cursor_def;
-
- return(node);
-}
-
-/*************************************************************************
-Parses a row_printf-statement. */
-
-row_printf_node_t*
-pars_row_printf_statement(
-/*======================*/
- /* out: row_printf-statement node */
- sel_node_t* sel_node) /* in: select node */
-{
- row_printf_node_t* node;
-
- node = mem_heap_alloc(pars_sym_tab_global->heap,
- sizeof(row_printf_node_t));
- node->common.type = QUE_NODE_ROW_PRINTF;
-
- node->sel_node = sel_node;
-
- sel_node->common.parent = node;
-
- return(node);
-}
-
-/*************************************************************************
-Parses a commit statement. */
-
-commit_node_t*
-pars_commit_statement(void)
-/*=======================*/
-{
- return(commit_node_create(pars_sym_tab_global->heap));
-}
-
-/*************************************************************************
-Parses a rollback statement. */
-
-roll_node_t*
-pars_rollback_statement(void)
-/*=========================*/
-{
- return(roll_node_create(pars_sym_tab_global->heap));
-}
-
-/*************************************************************************
-Parses a column definition at a table creation. */
-
-sym_node_t*
-pars_column_def(
-/*============*/
- /* out: column sym table
- node */
- sym_node_t* sym_node, /* in: column node in the
- symbol table */
- pars_res_word_t* type, /* in: data type */
- sym_node_t* len, /* in: length of column, or
- NULL */
- void* is_unsigned, /* in: if not NULL, column
- is of type UNSIGNED. */
- void* is_not_null) /* in: if not NULL, column
- is of type NOT NULL. */
-{
- ulint len2;
-
- if (len) {
- len2 = eval_node_get_int_val(len);
- } else {
- len2 = 0;
- }
-
- pars_set_dfield_type(que_node_get_val(sym_node), type, len2,
- is_unsigned != NULL, is_not_null != NULL);
-
- return(sym_node);
-}
-
-/*************************************************************************
-Parses a table creation operation. */
-
-tab_node_t*
-pars_create_table(
-/*==============*/
- /* out: table create subgraph */
- sym_node_t* table_sym, /* in: table name node in the symbol
- table */
- sym_node_t* column_defs, /* in: list of column names */
- void* not_fit_in_memory __attribute__((unused)))
- /* in: a non-NULL pointer means that
- this is a table which in simulations
- should be simulated as not fitting
- in memory; thread is put to sleep
- to simulate disk accesses; NOTE that
- this flag is not stored to the data
- dictionary on disk, and the database
- will forget about non-NULL value if
- it has to reload the table definition
- from disk */
-{
- dict_table_t* table;
- sym_node_t* column;
- tab_node_t* node;
- dtype_t* dtype;
- ulint n_cols;
-
- n_cols = que_node_list_get_len(column_defs);
-
- /* As the InnoDB SQL parser is for internal use only,
- for creating some system tables, this function will only
- create tables in the old (not compact) record format. */
- table = dict_mem_table_create(table_sym->name, 0, n_cols, 0);
-
-#ifdef UNIV_DEBUG
- if (not_fit_in_memory != NULL) {
- table->does_not_fit_in_memory = TRUE;
- }
-#endif /* UNIV_DEBUG */
- column = column_defs;
-
- while (column) {
- dtype = dfield_get_type(que_node_get_val(column));
-
- dict_mem_table_add_col(table, table->heap,
- column->name, dtype->mtype,
- dtype->prtype, dtype->len);
- column->resolved = TRUE;
- column->token_type = SYM_COLUMN;
-
- column = que_node_get_next(column);
- }
-
- node = tab_create_graph_create(table, pars_sym_tab_global->heap);
-
- table_sym->resolved = TRUE;
- table_sym->token_type = SYM_TABLE;
-
- return(node);
-}
-
-/*************************************************************************
-Parses an index creation operation. */
-
-ind_node_t*
-pars_create_index(
-/*==============*/
- /* out: index create subgraph */
- pars_res_word_t* unique_def, /* in: not NULL if a unique index */
- pars_res_word_t* clustered_def, /* in: not NULL if a clustered index */
- sym_node_t* index_sym, /* in: index name node in the symbol
- table */
- sym_node_t* table_sym, /* in: table name node in the symbol
- table */
- sym_node_t* column_list) /* in: list of column names */
-{
- dict_index_t* index;
- sym_node_t* column;
- ind_node_t* node;
- ulint n_fields;
- ulint ind_type;
-
- n_fields = que_node_list_get_len(column_list);
-
- ind_type = 0;
-
- if (unique_def) {
- ind_type = ind_type | DICT_UNIQUE;
- }
-
- if (clustered_def) {
- ind_type = ind_type | DICT_CLUSTERED;
- }
-
- index = dict_mem_index_create(table_sym->name, index_sym->name, 0,
- ind_type, n_fields);
- column = column_list;
-
- while (column) {
- dict_mem_index_add_field(index, column->name, 0);
-
- column->resolved = TRUE;
- column->token_type = SYM_COLUMN;
-
- column = que_node_get_next(column);
- }
-
- node = ind_create_graph_create(index, pars_sym_tab_global->heap);
-
- table_sym->resolved = TRUE;
- table_sym->token_type = SYM_TABLE;
-
- index_sym->resolved = TRUE;
- index_sym->token_type = SYM_TABLE;
-
- return(node);
-}
-
-/*************************************************************************
-Parses a procedure definition. */
-
-que_fork_t*
-pars_procedure_definition(
-/*======================*/
- /* out: query fork node */
- sym_node_t* sym_node, /* in: procedure id node in the symbol
- table */
- sym_node_t* param_list, /* in: parameter declaration list */
- que_node_t* stat_list) /* in: statement list */
-{
- proc_node_t* node;
- que_fork_t* fork;
- que_thr_t* thr;
- mem_heap_t* heap;
-
- heap = pars_sym_tab_global->heap;
-
- fork = que_fork_create(NULL, NULL, QUE_FORK_PROCEDURE, heap);
- fork->trx = NULL;
-
- thr = que_thr_create(fork, heap);
-
- node = mem_heap_alloc(heap, sizeof(proc_node_t));
-
- node->common.type = QUE_NODE_PROC;
- node->common.parent = thr;
-
- sym_node->token_type = SYM_PROCEDURE_NAME;
- sym_node->resolved = TRUE;
-
- node->proc_id = sym_node;
- node->param_list = param_list;
- node->stat_list = stat_list;
-
- pars_set_parent_in_list(stat_list, node);
-
- node->sym_tab = pars_sym_tab_global;
-
- thr->child = node;
-
- pars_sym_tab_global->query_graph = fork;
-
- return(fork);
-}
-
-/*****************************************************************
-Parses a stored procedure call, when this is not within another stored
-procedure, that is, the client issues a procedure call directly.
-In MySQL/InnoDB, stored InnoDB procedures are invoked via the
-parsed procedure tree, not via InnoDB SQL, so this function is not used. */
-
-que_fork_t*
-pars_stored_procedure_call(
-/*=======================*/
- /* out: query graph */
- sym_node_t* sym_node __attribute__((unused)))
- /* in: stored procedure name */
-{
- ut_error;
- return(NULL);
-}
-
-/*****************************************************************
-Retrieves characters to the lexical analyzer. */
-
-void
-pars_get_lex_chars(
-/*===============*/
- char* buf, /* in/out: buffer where to copy */
- int* result, /* out: number of characters copied or EOF */
- int max_size) /* in: maximum number of characters which fit
- in the buffer */
-{
- int len;
-
- len = pars_sym_tab_global->string_len
- - pars_sym_tab_global->next_char_pos;
- if (len == 0) {
-#ifdef YYDEBUG
- /* fputs("SQL string ends\n", stderr); */
-#endif
- *result = 0;
-
- return;
- }
-
- if (len > max_size) {
- len = max_size;
- }
-
-#ifdef UNIV_SQL_DEBUG
- if (pars_print_lexed) {
-
- if (len >= 5) {
- len = 5;
- }
-
- fwrite(pars_sym_tab_global->sql_string
- + pars_sym_tab_global->next_char_pos,
- 1, len, stderr);
- }
-#endif /* UNIV_SQL_DEBUG */
-
- ut_memcpy(buf, pars_sym_tab_global->sql_string
- + pars_sym_tab_global->next_char_pos, len);
- *result = len;
-
- pars_sym_tab_global->next_char_pos += len;
-}
-
-/*****************************************************************
-Called by yyparse on error. */
-
-void
-yyerror(
-/*====*/
- const char* s __attribute__((unused)))
- /* in: error message string */
-{
- ut_ad(s);
-
- fputs("PARSER ERROR: Syntax error in SQL string\n", stderr);
-
- ut_error;
-}
-
-/*****************************************************************
-Parses an SQL string returning the query graph. */
-
-que_t*
-pars_sql(
-/*=====*/
- /* out, own: the query graph */
- pars_info_t* info, /* in: extra information, or NULL */
- const char* str) /* in: SQL string */
-{
- sym_node_t* sym_node;
- mem_heap_t* heap;
- que_t* graph;
-
- ut_ad(str);
-
- heap = mem_heap_create(256);
-
- /* Currently, the parser is not reentrant: */
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- pars_sym_tab_global = sym_tab_create(heap);
-
- pars_sym_tab_global->string_len = strlen(str);
- pars_sym_tab_global->sql_string = mem_heap_dup(
- heap, str, pars_sym_tab_global->string_len + 1);
- pars_sym_tab_global->next_char_pos = 0;
- pars_sym_tab_global->info = info;
-
- yyparse();
-
- sym_node = UT_LIST_GET_FIRST(pars_sym_tab_global->sym_list);
-
- while (sym_node) {
- ut_a(sym_node->resolved);
-
- sym_node = UT_LIST_GET_NEXT(sym_list, sym_node);
- }
-
- graph = pars_sym_tab_global->query_graph;
-
- graph->sym_tab = pars_sym_tab_global;
- graph->info = info;
-
- /* fprintf(stderr, "SQL graph size %lu\n", mem_heap_get_size(heap)); */
-
- return(graph);
-}
-
-/**********************************************************************
-Completes a query graph by adding query thread and fork nodes
-above it and prepares the graph for running. The fork created is of
-type QUE_FORK_MYSQL_INTERFACE. */
-
-que_thr_t*
-pars_complete_graph_for_exec(
-/*=========================*/
- /* out: query thread node to run */
- que_node_t* node, /* in: root node for an incomplete
- query graph */
- trx_t* trx, /* in: transaction handle */
- mem_heap_t* heap) /* in: memory heap from which allocated */
-{
- que_fork_t* fork;
- que_thr_t* thr;
-
- fork = que_fork_create(NULL, NULL, QUE_FORK_MYSQL_INTERFACE, heap);
- fork->trx = trx;
-
- thr = que_thr_create(fork, heap);
-
- thr->child = node;
-
- que_node_set_parent(node, thr);
-
- trx->graph = NULL;
-
- return(thr);
-}
-
-/********************************************************************
-Create parser info struct.*/
-
-pars_info_t*
-pars_info_create(void)
-/*==================*/
- /* out, own: info struct */
-{
- pars_info_t* info;
- mem_heap_t* heap;
-
- heap = mem_heap_create(512);
-
- info = mem_heap_alloc(heap, sizeof(*info));
-
- info->heap = heap;
- info->funcs = NULL;
- info->bound_lits = NULL;
- info->bound_ids = NULL;
- info->graph_owns_us = TRUE;
-
- return(info);
-}
-
-/********************************************************************
-Free info struct and everything it contains.*/
-
-void
-pars_info_free(
-/*===========*/
- pars_info_t* info) /* in: info struct */
-{
- mem_heap_free(info->heap);
-}
-
-/********************************************************************
-Add bound literal. */
-
-void
-pars_info_add_literal(
-/*==================*/
- pars_info_t* info, /* in: info struct */
- const char* name, /* in: name */
- const void* address, /* in: address */
- ulint length, /* in: length of data */
- ulint type, /* in: type, e.g. DATA_FIXBINARY */
- ulint prtype) /* in: precise type, e.g.
- DATA_UNSIGNED */
-{
- pars_bound_lit_t* pbl;
-
- ut_ad(!pars_info_get_bound_lit(info, name));
-
- pbl = mem_heap_alloc(info->heap, sizeof(*pbl));
-
- pbl->name = name;
- pbl->address = address;
- pbl->length = length;
- pbl->type = type;
- pbl->prtype = prtype;
-
- if (!info->bound_lits) {
- info->bound_lits = ib_vector_create(info->heap, 8);
- }
-
- ib_vector_push(info->bound_lits, pbl);
-}
-
-/********************************************************************
-Equivalent to pars_info_add_literal(info, name, str, strlen(str),
-DATA_VARCHAR, DATA_ENGLISH). */
-
-void
-pars_info_add_str_literal(
-/*======================*/
- pars_info_t* info, /* in: info struct */
- const char* name, /* in: name */
- const char* str) /* in: string */
-{
- pars_info_add_literal(info, name, str, strlen(str),
- DATA_VARCHAR, DATA_ENGLISH);
-}
-
-/********************************************************************
-Equivalent to:
-
-char buf[4];
-mach_write_to_4(buf, val);
-pars_info_add_literal(info, name, buf, 4, DATA_INT, 0);
-
-except that the buffer is dynamically allocated from the info struct's
-heap. */
-
-void
-pars_info_add_int4_literal(
-/*=======================*/
- pars_info_t* info, /* in: info struct */
- const char* name, /* in: name */
- lint val) /* in: value */
-{
- byte* buf = mem_heap_alloc(info->heap, 4);
-
- mach_write_to_4(buf, val);
- pars_info_add_literal(info, name, buf, 4, DATA_INT, 0);
-}
-
-/********************************************************************
-Equivalent to:
-
-char buf[8];
-mach_write_to_8(buf, val);
-pars_info_add_literal(info, name, buf, 8, DATA_FIXBINARY, 0);
-
-except that the buffer is dynamically allocated from the info struct's
-heap. */
-
-void
-pars_info_add_dulint_literal(
-/*=========================*/
- pars_info_t* info, /* in: info struct */
- const char* name, /* in: name */
- dulint val) /* in: value */
-{
- byte* buf = mem_heap_alloc(info->heap, 8);
-
- mach_write_to_8(buf, val);
-
- pars_info_add_literal(info, name, buf, 8, DATA_FIXBINARY, 0);
-}
-
-/********************************************************************
-Add user function. */
-
-void
-pars_info_add_function(
-/*===================*/
- pars_info_t* info, /* in: info struct */
- const char* name, /* in: function name */
- pars_user_func_cb_t func, /* in: function address */
- void* arg) /* in: user-supplied argument */
-{
- pars_user_func_t* puf;
-
- ut_ad(!pars_info_get_user_func(info, name));
-
- puf = mem_heap_alloc(info->heap, sizeof(*puf));
-
- puf->name = name;
- puf->func = func;
- puf->arg = arg;
-
- if (!info->funcs) {
- info->funcs = ib_vector_create(info->heap, 8);
- }
-
- ib_vector_push(info->funcs, puf);
-}
-
-/********************************************************************
-Add bound id. */
-
-void
-pars_info_add_id(
-/*=============*/
- pars_info_t* info, /* in: info struct */
- const char* name, /* in: name */
- const char* id) /* in: id */
-{
- pars_bound_id_t* bid;
-
- ut_ad(!pars_info_get_bound_id(info, name));
-
- bid = mem_heap_alloc(info->heap, sizeof(*bid));
-
- bid->name = name;
- bid->id = id;
-
- if (!info->bound_ids) {
- info->bound_ids = ib_vector_create(info->heap, 8);
- }
-
- ib_vector_push(info->bound_ids, bid);
-}
-
-/********************************************************************
-Get user function with the given name.*/
-
-pars_user_func_t*
-pars_info_get_user_func(
-/*====================*/
- /* out: user func, or NULL if not
- found */
- pars_info_t* info, /* in: info struct */
- const char* name) /* in: function name to find*/
-{
- ulint i;
- ib_vector_t* vec;
-
- if (!info || !info->funcs) {
- return(NULL);
- }
-
- vec = info->funcs;
-
- for (i = 0; i < ib_vector_size(vec); i++) {
- pars_user_func_t* puf = ib_vector_get(vec, i);
-
- if (strcmp(puf->name, name) == 0) {
- return(puf);
- }
- }
-
- return(NULL);
-}
-
-/********************************************************************
-Get bound literal with the given name.*/
-
-pars_bound_lit_t*
-pars_info_get_bound_lit(
-/*====================*/
- /* out: bound literal, or NULL if
- not found */
- pars_info_t* info, /* in: info struct */
- const char* name) /* in: bound literal name to find */
-{
- ulint i;
- ib_vector_t* vec;
-
- if (!info || !info->bound_lits) {
- return(NULL);
- }
-
- vec = info->bound_lits;
-
- for (i = 0; i < ib_vector_size(vec); i++) {
- pars_bound_lit_t* pbl = ib_vector_get(vec, i);
-
- if (strcmp(pbl->name, name) == 0) {
- return(pbl);
- }
- }
-
- return(NULL);
-}
-
-/********************************************************************
-Get bound id with the given name.*/
-
-pars_bound_id_t*
-pars_info_get_bound_id(
-/*===================*/
- /* out: bound id, or NULL if not
- found */
- pars_info_t* info, /* in: info struct */
- const char* name) /* in: bound id name to find */
-{
- ulint i;
- ib_vector_t* vec;
-
- if (!info || !info->bound_ids) {
- return(NULL);
- }
-
- vec = info->bound_ids;
-
- for (i = 0; i < ib_vector_size(vec); i++) {
- pars_bound_id_t* bid = ib_vector_get(vec, i);
-
- if (strcmp(bid->name, name) == 0) {
- return(bid);
- }
- }
-
- return(NULL);
-}
diff --git a/storage/innobase/pars/pars0sym.c b/storage/innobase/pars/pars0sym.c
deleted file mode 100644
index 2d56fff2d42..00000000000
--- a/storage/innobase/pars/pars0sym.c
+++ /dev/null
@@ -1,352 +0,0 @@
-/******************************************************
-SQL parser symbol table
-
-(c) 1997 Innobase Oy
-
-Created 12/15/1997 Heikki Tuuri
-*******************************************************/
-
-#include "pars0sym.h"
-
-#ifdef UNIV_NONINL
-#include "pars0sym.ic"
-#endif
-
-#include "mem0mem.h"
-#include "data0type.h"
-#include "data0data.h"
-#include "pars0grm.h"
-#include "pars0pars.h"
-#include "que0que.h"
-#include "eval0eval.h"
-#include "row0sel.h"
-
-/**********************************************************************
-Creates a symbol table for a single stored procedure or query. */
-
-sym_tab_t*
-sym_tab_create(
-/*===========*/
- /* out, own: symbol table */
- mem_heap_t* heap) /* in: memory heap where to create */
-{
- sym_tab_t* sym_tab;
-
- sym_tab = mem_heap_alloc(heap, sizeof(sym_tab_t));
-
- UT_LIST_INIT(sym_tab->sym_list);
- UT_LIST_INIT(sym_tab->func_node_list);
-
- sym_tab->heap = heap;
-
- return(sym_tab);
-}
-
-/**********************************************************************
-Frees the memory allocated dynamically AFTER parsing phase for variables
-etc. in the symbol table. Does not free the mem heap where the table was
-originally created. Frees also SQL explicit cursor definitions. */
-
-void
-sym_tab_free_private(
-/*=================*/
- sym_tab_t* sym_tab) /* in, own: symbol table */
-{
- sym_node_t* sym;
- func_node_t* func;
-
- sym = UT_LIST_GET_FIRST(sym_tab->sym_list);
-
- while (sym) {
- eval_node_free_val_buf(sym);
-
- if (sym->prefetch_buf) {
- sel_col_prefetch_buf_free(sym->prefetch_buf);
- }
-
- if (sym->cursor_def) {
- que_graph_free_recursive(sym->cursor_def);
- }
-
- sym = UT_LIST_GET_NEXT(sym_list, sym);
- }
-
- func = UT_LIST_GET_FIRST(sym_tab->func_node_list);
-
- while (func) {
- eval_node_free_val_buf(func);
-
- func = UT_LIST_GET_NEXT(func_node_list, func);
- }
-}
-
-/**********************************************************************
-Adds an integer literal to a symbol table. */
-
-sym_node_t*
-sym_tab_add_int_lit(
-/*================*/
- /* out: symbol table node */
- sym_tab_t* sym_tab, /* in: symbol table */
- ulint val) /* in: integer value */
-{
- sym_node_t* node;
- byte* data;
-
- node = mem_heap_alloc(sym_tab->heap, sizeof(sym_node_t));
-
- node->common.type = QUE_NODE_SYMBOL;
-
- node->resolved = TRUE;
- node->token_type = SYM_LIT;
-
- node->indirection = NULL;
-
- dtype_set(&(node->common.val.type), DATA_INT, 0, 4);
-
- data = mem_heap_alloc(sym_tab->heap, 4);
- mach_write_to_4(data, val);
-
- dfield_set_data(&(node->common.val), data, 4);
-
- node->common.val_buf_size = 0;
- node->prefetch_buf = NULL;
- node->cursor_def = NULL;
-
- UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node);
-
- node->sym_table = sym_tab;
-
- return(node);
-}
-
-/**********************************************************************
-Adds a string literal to a symbol table. */
-
-sym_node_t*
-sym_tab_add_str_lit(
-/*================*/
- /* out: symbol table node */
- sym_tab_t* sym_tab, /* in: symbol table */
- byte* str, /* in: string with no quotes around
- it */
- ulint len) /* in: string length */
-{
- sym_node_t* node;
- byte* data;
-
- node = mem_heap_alloc(sym_tab->heap, sizeof(sym_node_t));
-
- node->common.type = QUE_NODE_SYMBOL;
-
- node->resolved = TRUE;
- node->token_type = SYM_LIT;
-
- node->indirection = NULL;
-
- dtype_set(&(node->common.val.type), DATA_VARCHAR, DATA_ENGLISH, 0);
-
- if (len) {
- data = mem_heap_alloc(sym_tab->heap, len);
- ut_memcpy(data, str, len);
- } else {
- data = NULL;
- }
-
- dfield_set_data(&(node->common.val), data, len);
-
- node->common.val_buf_size = 0;
- node->prefetch_buf = NULL;
- node->cursor_def = NULL;
-
- UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node);
-
- node->sym_table = sym_tab;
-
- return(node);
-}
-
-/**********************************************************************
-Add a bound literal to a symbol table. */
-
-sym_node_t*
-sym_tab_add_bound_lit(
-/*==================*/
- /* out: symbol table node */
- sym_tab_t* sym_tab, /* in: symbol table */
- const char* name, /* in: name of bound literal */
- ulint* lit_type) /* out: type of literal (PARS_*_LIT) */
-{
- sym_node_t* node;
- pars_bound_lit_t* blit;
- ulint len = 0;
-
- blit = pars_info_get_bound_lit(sym_tab->info, name);
- ut_a(blit);
-
- node = mem_heap_alloc(sym_tab->heap, sizeof(sym_node_t));
-
- node->common.type = QUE_NODE_SYMBOL;
-
- node->resolved = TRUE;
- node->token_type = SYM_LIT;
-
- node->indirection = NULL;
-
- switch (blit->type) {
- case DATA_FIXBINARY:
- len = blit->length;
- *lit_type = PARS_FIXBINARY_LIT;
- break;
-
- case DATA_BLOB:
- *lit_type = PARS_BLOB_LIT;
- break;
-
- case DATA_VARCHAR:
- *lit_type = PARS_STR_LIT;
- break;
-
- case DATA_CHAR:
- ut_a(blit->length > 0);
-
- len = blit->length;
- *lit_type = PARS_STR_LIT;
- break;
-
- case DATA_INT:
- ut_a(blit->length > 0);
- ut_a(blit->length <= 8);
-
- len = blit->length;
- *lit_type = PARS_INT_LIT;
- break;
-
- default:
- ut_error;
- }
-
- dtype_set(&(node->common.val.type), blit->type, blit->prtype, len);
-
- dfield_set_data(&(node->common.val), blit->address, blit->length);
-
- node->common.val_buf_size = 0;
- node->prefetch_buf = NULL;
- node->cursor_def = NULL;
-
- UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node);
-
- node->sym_table = sym_tab;
-
- return(node);
-}
-
-/**********************************************************************
-Adds an SQL null literal to a symbol table. */
-
-sym_node_t*
-sym_tab_add_null_lit(
-/*=================*/
- /* out: symbol table node */
- sym_tab_t* sym_tab) /* in: symbol table */
-{
- sym_node_t* node;
-
- node = mem_heap_alloc(sym_tab->heap, sizeof(sym_node_t));
-
- node->common.type = QUE_NODE_SYMBOL;
-
- node->resolved = TRUE;
- node->token_type = SYM_LIT;
-
- node->indirection = NULL;
-
- node->common.val.type.mtype = DATA_ERROR;
-
- dfield_set_data(&(node->common.val), NULL, UNIV_SQL_NULL);
-
- node->common.val_buf_size = 0;
- node->prefetch_buf = NULL;
- node->cursor_def = NULL;
-
- UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node);
-
- node->sym_table = sym_tab;
-
- return(node);
-}
-
-/**********************************************************************
-Adds an identifier to a symbol table. */
-
-sym_node_t*
-sym_tab_add_id(
-/*===========*/
- /* out: symbol table node */
- sym_tab_t* sym_tab, /* in: symbol table */
- byte* name, /* in: identifier name */
- ulint len) /* in: identifier length */
-{
- sym_node_t* node;
-
- node = mem_heap_alloc(sym_tab->heap, sizeof(sym_node_t));
-
- node->common.type = QUE_NODE_SYMBOL;
-
- node->resolved = FALSE;
- node->indirection = NULL;
-
- node->name = mem_heap_strdupl(sym_tab->heap, (char*) name, len);
- node->name_len = len;
-
- UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node);
-
- dfield_set_data(&(node->common.val), NULL, UNIV_SQL_NULL);
-
- node->common.val_buf_size = 0;
- node->prefetch_buf = NULL;
- node->cursor_def = NULL;
-
- node->sym_table = sym_tab;
-
- return(node);
-}
-
-/**********************************************************************
-Add a bound identifier to a symbol table. */
-
-sym_node_t*
-sym_tab_add_bound_id(
-/*===========*/
- /* out: symbol table node */
- sym_tab_t* sym_tab, /* in: symbol table */
- const char* name) /* in: name of bound id */
-{
- sym_node_t* node;
- pars_bound_id_t* bid;
-
- bid = pars_info_get_bound_id(sym_tab->info, name);
- ut_a(bid);
-
- node = mem_heap_alloc(sym_tab->heap, sizeof(sym_node_t));
-
- node->common.type = QUE_NODE_SYMBOL;
-
- node->resolved = FALSE;
- node->indirection = NULL;
-
- node->name = mem_heap_strdup(sym_tab->heap, bid->id);
- node->name_len = strlen(node->name);
-
- UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node);
-
- dfield_set_data(&(node->common.val), NULL, UNIV_SQL_NULL);
-
- node->common.val_buf_size = 0;
- node->prefetch_buf = NULL;
- node->cursor_def = NULL;
-
- node->sym_table = sym_tab;
-
- return(node);
-}
diff --git a/storage/innobase/plug.in b/storage/innobase/plug.in
deleted file mode 100644
index f7d2abed751..00000000000
--- a/storage/innobase/plug.in
+++ /dev/null
@@ -1,44 +0,0 @@
-MYSQL_STORAGE_ENGINE(innobase, innodb, [InnoDB Storage Engine],
- [Transactional Tables using InnoDB], [max,max-no-ndb])
-MYSQL_PLUGIN_DIRECTORY(innobase, [storage/innobase])
-MYSQL_PLUGIN_STATIC(innobase, [libinnobase.a])
-MYSQL_PLUGIN_DYNAMIC(innobase, [ha_innodb.la])
-MYSQL_PLUGIN_ACTIONS(innobase, [
- AC_CHECK_LIB(rt, aio_read, [innodb_system_libs="-lrt"])
- AC_SUBST(innodb_system_libs)
- AC_CHECK_HEADERS(aio.h sched.h)
- AC_CHECK_SIZEOF(int, 4)
- AC_CHECK_SIZEOF(long, 4)
- AC_CHECK_SIZEOF(void*, 4)
- AC_CHECK_FUNCS(sched_yield)
- AC_CHECK_FUNCS(fdatasync)
- AC_CHECK_FUNCS(localtime_r)
- AC_C_BIGENDIAN
- case "$target_os" in
- lin*)
- CFLAGS="$CFLAGS -DUNIV_LINUX";;
- hpux10*)
- CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE -DUNIV_HPUX -DUNIV_HPUX10";;
- hp*)
- CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE -DUNIV_HPUX";;
- aix*)
- CFLAGS="$CFLAGS -DUNIV_AIX";;
- irix*|osf*|sysv5uw7*|openbsd*)
- CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE";;
- *solaris*|*SunOS*)
- CFLAGS="$CFLAGS -DUNIV_SOLARIS";;
- esac
- INNODB_DYNAMIC_CFLAGS="-DMYSQL_DYNAMIC_PLUGIN"
- case "$target_cpu" in
- x86_64)
- # The AMD64 ABI forbids absolute addresses in shared libraries
- ;;
- *86)
- # Use absolute addresses on IA-32
- INNODB_DYNAMIC_CFLAGS="$INNODB_DYNAMIC_CFLAGS -prefer-non-pic"
- ;;
- esac
- AC_SUBST(INNODB_DYNAMIC_CFLAGS)
- ])
-
-# vim: set ft=config:
diff --git a/storage/innobase/que/que0que.c b/storage/innobase/que/que0que.c
deleted file mode 100644
index bf83f28f04e..00000000000
--- a/storage/innobase/que/que0que.c
+++ /dev/null
@@ -1,1443 +0,0 @@
-/******************************************************
-Query graph
-
-(c) 1996 Innobase Oy
-
-Created 5/27/1996 Heikki Tuuri
-*******************************************************/
-
-#include "que0que.h"
-
-#ifdef UNIV_NONINL
-#include "que0que.ic"
-#endif
-
-#include "srv0que.h"
-#include "usr0sess.h"
-#include "trx0trx.h"
-#include "trx0roll.h"
-#include "row0undo.h"
-#include "row0ins.h"
-#include "row0upd.h"
-#include "row0sel.h"
-#include "row0purge.h"
-#include "dict0crea.h"
-#include "log0log.h"
-#include "eval0proc.h"
-#include "eval0eval.h"
-#include "pars0types.h"
-
-#define QUE_PARALLELIZE_LIMIT (64 * 256 * 256 * 256)
-#define QUE_ROUND_ROBIN_LIMIT (64 * 256 * 256 * 256)
-#define QUE_MAX_LOOPS_WITHOUT_CHECK 16
-
-/* If the following flag is set TRUE, the module will print trace info
-of SQL execution in the UNIV_SQL_DEBUG version */
-ibool que_trace_on = FALSE;
-
-ibool que_always_false = FALSE;
-
-/* Short introduction to query graphs
- ==================================
-
-A query graph consists of nodes linked to each other in various ways. The
-execution starts at que_run_threads() which takes a que_thr_t parameter.
-que_thr_t contains two fields that control query graph execution: run_node
-and prev_node. run_node is the next node to execute and prev_node is the
-last node executed.
-
-Each node has a pointer to a 'next' statement, i.e., its brother, and a
-pointer to its parent node. The next pointer is NULL in the last statement
-of a block.
-
-Loop nodes contain a link to the first statement of the enclosed statement
-list. While the loop runs, que_thr_step() checks if execution to the loop
-node came from its parent or from one of the statement nodes in the loop. If
-it came from the parent of the loop node it starts executing the first
-statement node in the loop. If it came from one of the statement nodes in
-the loop, then it checks if the statement node has another statement node
-following it, and runs it if so.
-
-To signify loop ending, the loop statements (see e.g. while_step()) set
-que_thr_t->run_node to the loop node's parent node. This is noticed on the
-next call of que_thr_step() and execution proceeds to the node pointed to by
-the loop node's 'next' pointer.
-
-For example, the code:
-
-X := 1;
-WHILE X < 5 LOOP
- X := X + 1;
- X := X + 1;
-X := 5
-
-will result in the following node hierarchy, with the X-axis indicating
-'next' links and the Y-axis indicating parent/child links:
-
-A - W - A
- |
- |
- A - A
-
-A = assign_node_t, W = while_node_t. */
-
-/* How a stored procedure containing COMMIT or ROLLBACK commands
-is executed?
-
-The commit or rollback can be seen as a subprocedure call.
-The problem is that if there are several query threads
-currently running within the transaction, their action could
-mess the commit or rollback operation. Or, at the least, the
-operation would be difficult to visualize and keep in control.
-
-Therefore the query thread requesting a commit or a rollback
-sends to the transaction a signal, which moves the transaction
-to TRX_QUE_SIGNALED state. All running query threads of the
-transaction will eventually notice that the transaction is now in
-this state and voluntarily suspend themselves. Only the last
-query thread which suspends itself will trigger handling of
-the signal.
-
-When the transaction starts to handle a rollback or commit
-signal, it builds a query graph which, when executed, will
-roll back or commit the incomplete transaction. The transaction
-is moved to the TRX_QUE_ROLLING_BACK or TRX_QUE_COMMITTING state.
-If specified, the SQL cursors opened by the transaction are closed.
-When the execution of the graph completes, it is like returning
-from a subprocedure: the query thread which requested the operation
-starts running again. */
-
-/**************************************************************************
-Moves a thread from another state to the QUE_THR_RUNNING state. Increments
-the n_active_thrs counters of the query graph and transaction.
-***NOTE***: This is the only function in which such a transition is allowed
-to happen! */
-static
-void
-que_thr_move_to_run_state(
-/*======================*/
- que_thr_t* thr); /* in: an query thread */
-
-/***************************************************************************
-Adds a query graph to the session's list of graphs. */
-
-void
-que_graph_publish(
-/*==============*/
- que_t* graph, /* in: graph */
- sess_t* sess) /* in: session */
-{
- ut_ad(mutex_own(&kernel_mutex));
-
- UT_LIST_ADD_LAST(graphs, sess->graphs, graph);
-}
-
-/***************************************************************************
-Creates a query graph fork node. */
-
-que_fork_t*
-que_fork_create(
-/*============*/
- /* out, own: fork node */
- que_t* graph, /* in: graph, if NULL then this
- fork node is assumed to be the
- graph root */
- que_node_t* parent, /* in: parent node */
- ulint fork_type, /* in: fork type */
- mem_heap_t* heap) /* in: memory heap where created */
-{
- que_fork_t* fork;
-
- ut_ad(heap);
-
- fork = mem_heap_alloc(heap, sizeof(que_fork_t));
-
- fork->common.type = QUE_NODE_FORK;
- fork->n_active_thrs = 0;
-
- fork->state = QUE_FORK_COMMAND_WAIT;
-
- if (graph != NULL) {
- fork->graph = graph;
- } else {
- fork->graph = fork;
- }
-
- fork->common.parent = parent;
- fork->fork_type = fork_type;
-
- fork->caller = NULL;
-
- UT_LIST_INIT(fork->thrs);
-
- fork->sym_tab = NULL;
- fork->info = NULL;
-
- fork->heap = heap;
-
- return(fork);
-}
-
-/***************************************************************************
-Creates a query graph thread node. */
-
-que_thr_t*
-que_thr_create(
-/*===========*/
- /* out, own: query thread node */
- que_fork_t* parent, /* in: parent node, i.e., a fork node */
- mem_heap_t* heap) /* in: memory heap where created */
-{
- que_thr_t* thr;
-
- ut_ad(parent && heap);
-
- thr = mem_heap_alloc(heap, sizeof(que_thr_t));
-
- thr->common.type = QUE_NODE_THR;
- thr->common.parent = parent;
-
- thr->magic_n = QUE_THR_MAGIC_N;
-
- thr->graph = parent->graph;
-
- thr->state = QUE_THR_COMMAND_WAIT;
-
- thr->is_active = FALSE;
-
- thr->run_node = NULL;
- thr->resource = 0;
- thr->lock_state = QUE_THR_LOCK_NOLOCK;
-
- UT_LIST_ADD_LAST(thrs, parent->thrs, thr);
-
- return(thr);
-}
-
-/**************************************************************************
-Moves a suspended query thread to the QUE_THR_RUNNING state and may release
-a single worker thread to execute it. This function should be used to end
-the wait state of a query thread waiting for a lock or a stored procedure
-completion. */
-
-void
-que_thr_end_wait(
-/*=============*/
- que_thr_t* thr, /* in: query thread in the
- QUE_THR_LOCK_WAIT,
- or QUE_THR_PROCEDURE_WAIT, or
- QUE_THR_SIG_REPLY_WAIT state */
- que_thr_t** next_thr) /* in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread; if NULL is passed
- as the parameter, it is ignored */
-{
- ibool was_active;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(thr);
- ut_ad((thr->state == QUE_THR_LOCK_WAIT)
- || (thr->state == QUE_THR_PROCEDURE_WAIT)
- || (thr->state == QUE_THR_SIG_REPLY_WAIT));
- ut_ad(thr->run_node);
-
- thr->prev_node = thr->run_node;
-
- was_active = thr->is_active;
-
- que_thr_move_to_run_state(thr);
-
- if (was_active) {
-
- return;
- }
-
- if (next_thr && *next_thr == NULL) {
- *next_thr = thr;
- } else {
- ut_a(0);
- srv_que_task_enqueue_low(thr);
- }
-}
-
-/**************************************************************************
-Same as que_thr_end_wait, but no parameter next_thr available. */
-
-void
-que_thr_end_wait_no_next_thr(
-/*=========================*/
- que_thr_t* thr) /* in: query thread in the QUE_THR_LOCK_WAIT,
- or QUE_THR_PROCEDURE_WAIT, or
- QUE_THR_SIG_REPLY_WAIT state */
-{
- ibool was_active;
-
- ut_a(thr->state == QUE_THR_LOCK_WAIT); /* In MySQL this is the
- only possible state here */
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(thr);
- ut_ad((thr->state == QUE_THR_LOCK_WAIT)
- || (thr->state == QUE_THR_PROCEDURE_WAIT)
- || (thr->state == QUE_THR_SIG_REPLY_WAIT));
-
- was_active = thr->is_active;
-
- que_thr_move_to_run_state(thr);
-
- if (was_active) {
-
- return;
- }
-
- /* In MySQL we let the OS thread (not just the query thread) to wait
- for the lock to be released: */
-
- srv_release_mysql_thread_if_suspended(thr);
-
- /* srv_que_task_enqueue_low(thr); */
-}
-
-/**************************************************************************
-Inits a query thread for a command. */
-UNIV_INLINE
-void
-que_thr_init_command(
-/*=================*/
- que_thr_t* thr) /* in: query thread */
-{
- thr->run_node = thr;
- thr->prev_node = thr->common.parent;
-
- que_thr_move_to_run_state(thr);
-}
-
-/**************************************************************************
-Starts execution of a command in a query fork. Picks a query thread which
-is not in the QUE_THR_RUNNING state and moves it to that state. If none
-can be chosen, a situation which may arise in parallelized fetches, NULL
-is returned. */
-
-que_thr_t*
-que_fork_start_command(
-/*===================*/
- /* out: a query thread of the graph moved to
- QUE_THR_RUNNING state, or NULL; the query
- thread should be executed by que_run_threads
- by the caller */
- que_fork_t* fork) /* in: a query fork */
-{
- que_thr_t* thr;
- que_thr_t* suspended_thr = NULL;
- que_thr_t* completed_thr = NULL;
-
- fork->state = QUE_FORK_ACTIVE;
-
- fork->last_sel_node = NULL;
-
- /* Choose the query thread to run: usually there is just one thread,
- but in a parallelized select, which necessarily is non-scrollable,
- there may be several to choose from */
-
- /* First we try to find a query thread in the QUE_THR_COMMAND_WAIT
- state. Then we try to find a query thread in the QUE_THR_SUSPENDED
- state, finally we try to find a query thread in the QUE_THR_COMPLETED
- state */
-
- thr = UT_LIST_GET_FIRST(fork->thrs);
-
- /* We make a single pass over the thr list within which we note which
- threads are ready to run. */
- while (thr) {
- switch (thr->state) {
- case QUE_THR_COMMAND_WAIT:
-
- /* We have to send the initial message to query thread
- to start it */
-
- que_thr_init_command(thr);
-
- return(thr);
-
- case QUE_THR_SUSPENDED:
- /* In this case the execution of the thread was
- suspended: no initial message is needed because
- execution can continue from where it was left */
- if (!suspended_thr) {
- suspended_thr = thr;
- }
-
- break;
-
- case QUE_THR_COMPLETED:
- if (!completed_thr) {
- completed_thr = thr;
- }
-
- break;
-
- case QUE_THR_LOCK_WAIT:
- ut_error;
-
- }
-
- thr = UT_LIST_GET_NEXT(thrs, thr);
- }
-
- if (suspended_thr) {
-
- thr = suspended_thr;
- que_thr_move_to_run_state(thr);
-
- } else if (completed_thr) {
-
- thr = completed_thr;
- que_thr_init_command(thr);
- }
-
- return(thr);
-}
-
-/**************************************************************************
-After signal handling is finished, returns control to a query graph error
-handling routine. (Currently, just returns the control to the root of the
-graph so that the graph can communicate an error message to the client.) */
-
-void
-que_fork_error_handle(
-/*==================*/
- trx_t* trx __attribute__((unused)), /* in: trx */
- que_t* fork) /* in: query graph which was run before signal
- handling started, NULL not allowed */
-{
- que_thr_t* thr;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(trx->sess->state == SESS_ERROR);
- ut_ad(UT_LIST_GET_LEN(trx->reply_signals) == 0);
- ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0);
-
- thr = UT_LIST_GET_FIRST(fork->thrs);
-
- while (thr != NULL) {
- ut_ad(!thr->is_active);
- ut_ad(thr->state != QUE_THR_SIG_REPLY_WAIT);
- ut_ad(thr->state != QUE_THR_LOCK_WAIT);
-
- thr->run_node = thr;
- thr->prev_node = thr->child;
- thr->state = QUE_THR_COMPLETED;
-
- thr = UT_LIST_GET_NEXT(thrs, thr);
- }
-
- thr = UT_LIST_GET_FIRST(fork->thrs);
-
- que_thr_move_to_run_state(thr);
-
- ut_a(0);
- srv_que_task_enqueue_low(thr);
-}
-
-/********************************************************************
-Tests if all the query threads in the same fork have a given state. */
-UNIV_INLINE
-ibool
-que_fork_all_thrs_in_state(
-/*=======================*/
- /* out: TRUE if all the query threads in the
- same fork were in the given state */
- que_fork_t* fork, /* in: query fork */
- ulint state) /* in: state */
-{
- que_thr_t* thr_node;
-
- thr_node = UT_LIST_GET_FIRST(fork->thrs);
-
- while (thr_node != NULL) {
- if (thr_node->state != state) {
-
- return(FALSE);
- }
-
- thr_node = UT_LIST_GET_NEXT(thrs, thr_node);
- }
-
- return(TRUE);
-}
-
-/**************************************************************************
-Calls que_graph_free_recursive for statements in a statement list. */
-static
-void
-que_graph_free_stat_list(
-/*=====================*/
- que_node_t* node) /* in: first query graph node in the list */
-{
- while (node) {
- que_graph_free_recursive(node);
-
- node = que_node_get_next(node);
- }
-}
-
-/**************************************************************************
-Frees a query graph, but not the heap where it was created. Does not free
-explicit cursor declarations, they are freed in que_graph_free. */
-
-void
-que_graph_free_recursive(
-/*=====================*/
- que_node_t* node) /* in: query graph node */
-{
- que_fork_t* fork;
- que_thr_t* thr;
- undo_node_t* undo;
- sel_node_t* sel;
- ins_node_t* ins;
- upd_node_t* upd;
- tab_node_t* cre_tab;
- ind_node_t* cre_ind;
-
- if (node == NULL) {
-
- return;
- }
-
- switch (que_node_get_type(node)) {
-
- case QUE_NODE_FORK:
- fork = node;
-
- thr = UT_LIST_GET_FIRST(fork->thrs);
-
- while (thr) {
- que_graph_free_recursive(thr);
-
- thr = UT_LIST_GET_NEXT(thrs, thr);
- }
-
- break;
- case QUE_NODE_THR:
-
- thr = node;
-
- if (thr->magic_n != QUE_THR_MAGIC_N) {
- fprintf(stderr,
- "que_thr struct appears corrupt;"
- " magic n %lu\n",
- (unsigned long) thr->magic_n);
- mem_analyze_corruption(thr);
- ut_error;
- }
-
- thr->magic_n = QUE_THR_MAGIC_FREED;
-
- que_graph_free_recursive(thr->child);
-
- break;
- case QUE_NODE_UNDO:
-
- undo = node;
-
- mem_heap_free(undo->heap);
-
- break;
- case QUE_NODE_SELECT:
-
- sel = node;
-
- sel_node_free_private(sel);
-
- break;
- case QUE_NODE_INSERT:
-
- ins = node;
-
- que_graph_free_recursive(ins->select);
-
- mem_heap_free(ins->entry_sys_heap);
-
- break;
- case QUE_NODE_UPDATE:
-
- upd = node;
-
- if (upd->in_mysql_interface) {
-
- btr_pcur_free_for_mysql(upd->pcur);
- }
-
- que_graph_free_recursive(upd->cascade_node);
-
- if (upd->cascade_heap) {
- mem_heap_free(upd->cascade_heap);
- }
-
- que_graph_free_recursive(upd->select);
-
- mem_heap_free(upd->heap);
-
- break;
- case QUE_NODE_CREATE_TABLE:
- cre_tab = node;
-
- que_graph_free_recursive(cre_tab->tab_def);
- que_graph_free_recursive(cre_tab->col_def);
- que_graph_free_recursive(cre_tab->commit_node);
-
- mem_heap_free(cre_tab->heap);
-
- break;
- case QUE_NODE_CREATE_INDEX:
- cre_ind = node;
-
- que_graph_free_recursive(cre_ind->ind_def);
- que_graph_free_recursive(cre_ind->field_def);
- que_graph_free_recursive(cre_ind->commit_node);
-
- mem_heap_free(cre_ind->heap);
-
- break;
- case QUE_NODE_PROC:
- que_graph_free_stat_list(((proc_node_t*)node)->stat_list);
-
- break;
- case QUE_NODE_IF:
- que_graph_free_stat_list(((if_node_t*)node)->stat_list);
- que_graph_free_stat_list(((if_node_t*)node)->else_part);
- que_graph_free_stat_list(((if_node_t*)node)->elsif_list);
-
- break;
- case QUE_NODE_ELSIF:
- que_graph_free_stat_list(((elsif_node_t*)node)->stat_list);
-
- break;
- case QUE_NODE_WHILE:
- que_graph_free_stat_list(((while_node_t*)node)->stat_list);
-
- break;
- case QUE_NODE_FOR:
- que_graph_free_stat_list(((for_node_t*)node)->stat_list);
-
- break;
-
- case QUE_NODE_ASSIGNMENT:
- case QUE_NODE_EXIT:
- case QUE_NODE_RETURN:
- case QUE_NODE_COMMIT:
- case QUE_NODE_ROLLBACK:
- case QUE_NODE_LOCK:
- case QUE_NODE_FUNC:
- case QUE_NODE_ORDER:
- case QUE_NODE_ROW_PRINTF:
- case QUE_NODE_OPEN:
- case QUE_NODE_FETCH:
- /* No need to do anything */
-
- break;
- default:
- fprintf(stderr,
- "que_node struct appears corrupt; type %lu\n",
- (unsigned long) que_node_get_type(node));
- mem_analyze_corruption(node);
- ut_error;
- }
-}
-
-/**************************************************************************
-Frees a query graph. */
-
-void
-que_graph_free(
-/*===========*/
- que_t* graph) /* in: query graph; we assume that the memory
- heap where this graph was created is private
- to this graph: if not, then use
- que_graph_free_recursive and free the heap
- afterwards! */
-{
- ut_ad(graph);
-
- if (graph->sym_tab) {
- /* The following call frees dynamic memory allocated
- for variables etc. during execution. Frees also explicit
- cursor definitions. */
-
- sym_tab_free_private(graph->sym_tab);
- }
-
- if (graph->info && graph->info->graph_owns_us) {
- pars_info_free(graph->info);
- }
-
- que_graph_free_recursive(graph);
-
- mem_heap_free(graph->heap);
-}
-
-/**************************************************************************
-Checks if the query graph is in a state where it should be freed, and
-frees it in that case. If the session is in a state where it should be
-closed, also this is done. */
-
-ibool
-que_graph_try_free(
-/*===============*/
- /* out: TRUE if freed */
- que_t* graph) /* in: query graph */
-{
- sess_t* sess;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- sess = (graph->trx)->sess;
-
- if ((graph->state == QUE_FORK_BEING_FREED)
- && (graph->n_active_thrs == 0)) {
-
- UT_LIST_REMOVE(graphs, sess->graphs, graph);
- que_graph_free(graph);
-
- sess_try_close(sess);
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/********************************************************************
-Performs an execution step on a thr node. */
-static
-que_thr_t*
-que_thr_node_step(
-/*==============*/
- /* out: query thread to run next, or NULL
- if none */
- que_thr_t* thr) /* in: query thread where run_node must
- be the thread node itself */
-{
- ut_ad(thr->run_node == thr);
-
- if (thr->prev_node == thr->common.parent) {
- /* If control to the node came from above, it is just passed
- on */
-
- thr->run_node = thr->child;
-
- return(thr);
- }
-
- mutex_enter(&kernel_mutex);
-
- if (que_thr_peek_stop(thr)) {
-
- mutex_exit(&kernel_mutex);
-
- return(thr);
- }
-
- /* Thread execution completed */
-
- thr->state = QUE_THR_COMPLETED;
-
- mutex_exit(&kernel_mutex);
-
- return(NULL);
-}
-
-/**************************************************************************
-Moves a thread from another state to the QUE_THR_RUNNING state. Increments
-the n_active_thrs counters of the query graph and transaction if thr was
-not active.
-***NOTE***: This and ..._mysql are the only functions in which such a
-transition is allowed to happen! */
-static
-void
-que_thr_move_to_run_state(
-/*======================*/
- que_thr_t* thr) /* in: an query thread */
-{
- trx_t* trx;
-
- ut_ad(thr->state != QUE_THR_RUNNING);
-
- trx = thr_get_trx(thr);
-
- if (!thr->is_active) {
-
- (thr->graph)->n_active_thrs++;
-
- trx->n_active_thrs++;
-
- thr->is_active = TRUE;
-
- ut_ad((thr->graph)->n_active_thrs == 1);
- ut_ad(trx->n_active_thrs == 1);
- }
-
- thr->state = QUE_THR_RUNNING;
-}
-
-/**************************************************************************
-Decrements the query thread reference counts in the query graph and the
-transaction. May start signal handling, e.g., a rollback.
-*** NOTE ***:
-This and que_thr_stop_for_mysql are the only functions where the reference
-count can be decremented and this function may only be called from inside
-que_run_threads or que_thr_check_if_switch! These restrictions exist to make
-the rollback code easier to maintain. */
-static
-void
-que_thr_dec_refer_count(
-/*====================*/
- que_thr_t* thr, /* in: query thread */
- que_thr_t** next_thr) /* in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread */
-{
- que_fork_t* fork;
- trx_t* trx;
- sess_t* sess;
- ulint fork_type;
- ibool stopped;
-
- fork = thr->common.parent;
- trx = thr_get_trx(thr);
- sess = trx->sess;
-
- mutex_enter(&kernel_mutex);
-
- ut_a(thr->is_active);
-
- if (thr->state == QUE_THR_RUNNING) {
-
- stopped = que_thr_stop(thr);
-
- if (!stopped) {
- /* The reason for the thr suspension or wait was
- already canceled before we came here: continue
- running the thread */
-
- /* fputs("!!!!!!!! Wait already ended: continue thr\n",
- stderr); */
-
- if (next_thr && *next_thr == NULL) {
- /* Normally srv_suspend_mysql_thread resets
- the state to DB_SUCCESS before waiting, but
- in this case we have to do it here,
- otherwise nobody does it. */
- trx->error_state = DB_SUCCESS;
-
- *next_thr = thr;
- } else {
- ut_a(0);
- srv_que_task_enqueue_low(thr);
- }
-
- mutex_exit(&kernel_mutex);
-
- return;
- }
- }
-
- ut_ad(fork->n_active_thrs == 1);
- ut_ad(trx->n_active_thrs == 1);
-
- fork->n_active_thrs--;
- trx->n_active_thrs--;
-
- thr->is_active = FALSE;
-
- if (trx->n_active_thrs > 0) {
-
- mutex_exit(&kernel_mutex);
-
- return;
- }
-
- fork_type = fork->fork_type;
-
- /* Check if all query threads in the same fork are completed */
-
- if (que_fork_all_thrs_in_state(fork, QUE_THR_COMPLETED)) {
-
- if (fork_type == QUE_FORK_ROLLBACK) {
- /* This is really the undo graph used in rollback,
- no roll_node in this graph */
-
- ut_ad(UT_LIST_GET_LEN(trx->signals) > 0);
- ut_ad(trx->handling_signals == TRUE);
-
- trx_finish_rollback_off_kernel(fork, trx, next_thr);
-
- } else if (fork_type == QUE_FORK_PURGE) {
-
- /* Do nothing */
- } else if (fork_type == QUE_FORK_RECOVERY) {
-
- /* Do nothing */
- } else if (fork_type == QUE_FORK_MYSQL_INTERFACE) {
-
- /* Do nothing */
- } else {
- ut_error; /* not used in MySQL */
- }
- }
-
- if (UT_LIST_GET_LEN(trx->signals) > 0 && trx->n_active_thrs == 0) {
-
- /* If the trx is signaled and its query thread count drops to
- zero, then we start processing a signal; from it we may get
- a new query thread to run */
-
- trx_sig_start_handle(trx, next_thr);
- }
-
- if (trx->handling_signals && UT_LIST_GET_LEN(trx->signals) == 0) {
-
- trx_end_signal_handling(trx);
- }
-
- mutex_exit(&kernel_mutex);
-}
-
-/**************************************************************************
-Stops a query thread if graph or trx is in a state requiring it. The
-conditions are tested in the order (1) graph, (2) trx. The kernel mutex has
-to be reserved. */
-
-ibool
-que_thr_stop(
-/*=========*/
- /* out: TRUE if stopped */
- que_thr_t* thr) /* in: query thread */
-{
- trx_t* trx;
- que_t* graph;
- ibool ret = TRUE;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- graph = thr->graph;
- trx = graph->trx;
-
- if (graph->state == QUE_FORK_COMMAND_WAIT) {
- thr->state = QUE_THR_SUSPENDED;
-
- } else if (trx->que_state == TRX_QUE_LOCK_WAIT) {
-
- UT_LIST_ADD_FIRST(trx_thrs, trx->wait_thrs, thr);
- thr->state = QUE_THR_LOCK_WAIT;
-
- } else if (trx->error_state != DB_SUCCESS
- && trx->error_state != DB_LOCK_WAIT) {
-
- /* Error handling built for the MySQL interface */
- thr->state = QUE_THR_COMPLETED;
-
- } else if (UT_LIST_GET_LEN(trx->signals) > 0
- && graph->fork_type != QUE_FORK_ROLLBACK) {
-
- thr->state = QUE_THR_SUSPENDED;
- } else {
- ut_ad(graph->state == QUE_FORK_ACTIVE);
-
- ret = FALSE;
- }
-
- return(ret);
-}
-
-/**************************************************************************
-A patch for MySQL used to 'stop' a dummy query thread used in MySQL. The
-query thread is stopped and made inactive, except in the case where
-it was put to the lock wait state in lock0lock.c, but the lock has already
-been granted or the transaction chosen as a victim in deadlock resolution. */
-
-void
-que_thr_stop_for_mysql(
-/*===================*/
- que_thr_t* thr) /* in: query thread */
-{
- trx_t* trx;
-
- trx = thr_get_trx(thr);
-
- mutex_enter(&kernel_mutex);
-
- if (thr->state == QUE_THR_RUNNING) {
-
- if (trx->error_state != DB_SUCCESS
- && trx->error_state != DB_LOCK_WAIT) {
-
- /* Error handling built for the MySQL interface */
- thr->state = QUE_THR_COMPLETED;
- } else {
- /* It must have been a lock wait but the lock was
- already released, or this transaction was chosen
- as a victim in selective deadlock resolution */
-
- mutex_exit(&kernel_mutex);
-
- return;
- }
- }
-
- ut_ad(thr->is_active == TRUE);
- ut_ad(trx->n_active_thrs == 1);
- ut_ad(thr->graph->n_active_thrs == 1);
-
- thr->is_active = FALSE;
- (thr->graph)->n_active_thrs--;
-
- trx->n_active_thrs--;
-
- mutex_exit(&kernel_mutex);
-}
-
-/**************************************************************************
-Moves a thread from another state to the QUE_THR_RUNNING state. Increments
-the n_active_thrs counters of the query graph and transaction if thr was
-not active. */
-
-void
-que_thr_move_to_run_state_for_mysql(
-/*================================*/
- que_thr_t* thr, /* in: an query thread */
- trx_t* trx) /* in: transaction */
-{
- if (thr->magic_n != QUE_THR_MAGIC_N) {
- fprintf(stderr,
- "que_thr struct appears corrupt; magic n %lu\n",
- (unsigned long) thr->magic_n);
-
- mem_analyze_corruption(thr);
-
- ut_error;
- }
-
- if (!thr->is_active) {
-
- thr->graph->n_active_thrs++;
-
- trx->n_active_thrs++;
-
- thr->is_active = TRUE;
- }
-
- thr->state = QUE_THR_RUNNING;
-}
-
-/**************************************************************************
-A patch for MySQL used to 'stop' a dummy query thread used in MySQL
-select, when there is no error or lock wait. */
-
-void
-que_thr_stop_for_mysql_no_error(
-/*============================*/
- que_thr_t* thr, /* in: query thread */
- trx_t* trx) /* in: transaction */
-{
- ut_ad(thr->state == QUE_THR_RUNNING);
- ut_ad(thr->is_active == TRUE);
- ut_ad(trx->n_active_thrs == 1);
- ut_ad(thr->graph->n_active_thrs == 1);
-
- if (thr->magic_n != QUE_THR_MAGIC_N) {
- fprintf(stderr,
- "que_thr struct appears corrupt; magic n %lu\n",
- (unsigned long) thr->magic_n);
-
- mem_analyze_corruption(thr);
-
- ut_error;
- }
-
- thr->state = QUE_THR_COMPLETED;
-
- thr->is_active = FALSE;
- (thr->graph)->n_active_thrs--;
-
- trx->n_active_thrs--;
-}
-
-/********************************************************************
-Get the first containing loop node (e.g. while_node_t or for_node_t) for the
-given node, or NULL if the node is not within a loop. */
-
-que_node_t*
-que_node_get_containing_loop_node(
-/*==============================*/
- /* out: containing loop node, or NULL. */
- que_node_t* node) /* in: node */
-{
- ut_ad(node);
-
- for (;;) {
- ulint type;
-
- node = que_node_get_parent(node);
-
- if (!node) {
- break;
- }
-
- type = que_node_get_type(node);
-
- if ((type == QUE_NODE_FOR) || (type == QUE_NODE_WHILE)) {
- break;
- }
- }
-
- return(node);
-}
-
-/**************************************************************************
-Prints info of an SQL query graph node. */
-
-void
-que_node_print_info(
-/*================*/
- que_node_t* node) /* in: query graph node */
-{
- ulint type;
- const char* str;
-
- type = que_node_get_type(node);
-
- if (type == QUE_NODE_SELECT) {
- str = "SELECT";
- } else if (type == QUE_NODE_INSERT) {
- str = "INSERT";
- } else if (type == QUE_NODE_UPDATE) {
- str = "UPDATE";
- } else if (type == QUE_NODE_WHILE) {
- str = "WHILE";
- } else if (type == QUE_NODE_ASSIGNMENT) {
- str = "ASSIGNMENT";
- } else if (type == QUE_NODE_IF) {
- str = "IF";
- } else if (type == QUE_NODE_FETCH) {
- str = "FETCH";
- } else if (type == QUE_NODE_OPEN) {
- str = "OPEN";
- } else if (type == QUE_NODE_PROC) {
- str = "STORED PROCEDURE";
- } else if (type == QUE_NODE_FUNC) {
- str = "FUNCTION";
- } else if (type == QUE_NODE_LOCK) {
- str = "LOCK";
- } else if (type == QUE_NODE_THR) {
- str = "QUERY THREAD";
- } else if (type == QUE_NODE_COMMIT) {
- str = "COMMIT";
- } else if (type == QUE_NODE_UNDO) {
- str = "UNDO ROW";
- } else if (type == QUE_NODE_PURGE) {
- str = "PURGE ROW";
- } else if (type == QUE_NODE_ROLLBACK) {
- str = "ROLLBACK";
- } else if (type == QUE_NODE_CREATE_TABLE) {
- str = "CREATE TABLE";
- } else if (type == QUE_NODE_CREATE_INDEX) {
- str = "CREATE INDEX";
- } else if (type == QUE_NODE_FOR) {
- str = "FOR LOOP";
- } else if (type == QUE_NODE_RETURN) {
- str = "RETURN";
- } else if (type == QUE_NODE_EXIT) {
- str = "EXIT";
- } else {
- str = "UNKNOWN NODE TYPE";
- }
-
- fprintf(stderr, "Node type %lu: %s, address %p\n",
- (ulong) type, str, (void*) node);
-}
-
-/**************************************************************************
-Performs an execution step on a query thread. */
-UNIV_INLINE
-que_thr_t*
-que_thr_step(
-/*=========*/
- /* out: query thread to run next: it may
- differ from the input parameter if, e.g., a
- subprocedure call is made */
- que_thr_t* thr) /* in: query thread */
-{
- que_node_t* node;
- que_thr_t* old_thr;
- trx_t* trx;
- ulint type;
-
- trx = thr_get_trx(thr);
-
- ut_ad(thr->state == QUE_THR_RUNNING);
- ut_a(trx->error_state == DB_SUCCESS);
-
- thr->resource++;
-
- node = thr->run_node;
- type = que_node_get_type(node);
-
- old_thr = thr;
-
-#ifdef UNIV_DEBUG
- if (que_trace_on) {
- fputs("To execute: ", stderr);
- que_node_print_info(node);
- }
-#endif
- if (type & QUE_NODE_CONTROL_STAT) {
- if ((thr->prev_node != que_node_get_parent(node))
- && que_node_get_next(thr->prev_node)) {
-
- /* The control statements, like WHILE, always pass the
- control to the next child statement if there is any
- child left */
-
- thr->run_node = que_node_get_next(thr->prev_node);
-
- } else if (type == QUE_NODE_IF) {
- if_step(thr);
- } else if (type == QUE_NODE_FOR) {
- for_step(thr);
- } else if (type == QUE_NODE_PROC) {
-
- /* We can access trx->undo_no without reserving
- trx->undo_mutex, because there cannot be active query
- threads doing updating or inserting at the moment! */
-
- if (thr->prev_node == que_node_get_parent(node)) {
- trx->last_sql_stat_start.least_undo_no
- = trx->undo_no;
- }
-
- proc_step(thr);
- } else if (type == QUE_NODE_WHILE) {
- while_step(thr);
- } else {
- ut_error;
- }
- } else if (type == QUE_NODE_ASSIGNMENT) {
- assign_step(thr);
- } else if (type == QUE_NODE_SELECT) {
- thr = row_sel_step(thr);
- } else if (type == QUE_NODE_INSERT) {
- thr = row_ins_step(thr);
- } else if (type == QUE_NODE_UPDATE) {
- thr = row_upd_step(thr);
- } else if (type == QUE_NODE_FETCH) {
- thr = fetch_step(thr);
- } else if (type == QUE_NODE_OPEN) {
- thr = open_step(thr);
- } else if (type == QUE_NODE_FUNC) {
- proc_eval_step(thr);
-
- } else if (type == QUE_NODE_LOCK) {
-
- ut_error;
- /*
- thr = que_lock_step(thr);
- */
- } else if (type == QUE_NODE_THR) {
- thr = que_thr_node_step(thr);
- } else if (type == QUE_NODE_COMMIT) {
- thr = trx_commit_step(thr);
- } else if (type == QUE_NODE_UNDO) {
- thr = row_undo_step(thr);
- } else if (type == QUE_NODE_PURGE) {
- thr = row_purge_step(thr);
- } else if (type == QUE_NODE_RETURN) {
- thr = return_step(thr);
- } else if (type == QUE_NODE_EXIT) {
- thr = exit_step(thr);
- } else if (type == QUE_NODE_ROLLBACK) {
- thr = trx_rollback_step(thr);
- } else if (type == QUE_NODE_CREATE_TABLE) {
- thr = dict_create_table_step(thr);
- } else if (type == QUE_NODE_CREATE_INDEX) {
- thr = dict_create_index_step(thr);
- } else if (type == QUE_NODE_ROW_PRINTF) {
- thr = row_printf_step(thr);
- } else {
- ut_error;
- }
-
- if (type == QUE_NODE_EXIT) {
- old_thr->prev_node = que_node_get_containing_loop_node(node);
- } else {
- old_thr->prev_node = node;
- }
-
- if (thr) {
- ut_a(thr_get_trx(thr)->error_state == DB_SUCCESS);
- }
-
- return(thr);
-}
-
-/**************************************************************************
-Run a query thread until it finishes or encounters e.g. a lock wait. */
-static
-void
-que_run_threads_low(
-/*================*/
- que_thr_t* thr) /* in: query thread */
-{
- que_thr_t* next_thr;
- ulint cumul_resource;
- ulint loop_count;
-
- ut_ad(thr->state == QUE_THR_RUNNING);
- ut_a(thr_get_trx(thr)->error_state == DB_SUCCESS);
- ut_ad(!mutex_own(&kernel_mutex));
-
- /* cumul_resource counts how much resources the OS thread (NOT the
- query thread) has spent in this function */
-
- loop_count = QUE_MAX_LOOPS_WITHOUT_CHECK;
- cumul_resource = 0;
-loop:
- /* Check that there is enough space in the log to accommodate
- possible log entries by this query step; if the operation can touch
- more than about 4 pages, checks must be made also within the query
- step! */
-
- log_free_check();
-
- /* Perform the actual query step: note that the query thread
- may change if, e.g., a subprocedure call is made */
-
- /*-------------------------*/
- next_thr = que_thr_step(thr);
- /*-------------------------*/
-
- ut_a(!next_thr || (thr_get_trx(next_thr)->error_state == DB_SUCCESS));
-
- loop_count++;
-
- if (next_thr != thr) {
- ut_a(next_thr == NULL);
-
- /* This can change next_thr to a non-NULL value if there was
- a lock wait that already completed. */
- que_thr_dec_refer_count(thr, &next_thr);
-
- if (next_thr == NULL) {
-
- return;
- }
-
- loop_count = QUE_MAX_LOOPS_WITHOUT_CHECK;
-
- thr = next_thr;
- }
-
- goto loop;
-}
-
-/**************************************************************************
-Run a query thread. Handles lock waits. */
-void
-que_run_threads(
-/*============*/
- que_thr_t* thr) /* in: query thread */
-{
-loop:
- ut_a(thr_get_trx(thr)->error_state == DB_SUCCESS);
- que_run_threads_low(thr);
-
- mutex_enter(&kernel_mutex);
-
- switch (thr->state) {
-
- case QUE_THR_RUNNING:
- /* There probably was a lock wait, but it already ended
- before we came here: continue running thr */
-
- mutex_exit(&kernel_mutex);
-
- goto loop;
-
- case QUE_THR_LOCK_WAIT:
- mutex_exit(&kernel_mutex);
-
- /* The ..._mysql_... function works also for InnoDB's
- internal threads. Let us wait that the lock wait ends. */
-
- srv_suspend_mysql_thread(thr);
-
- if (thr_get_trx(thr)->error_state != DB_SUCCESS) {
- /* thr was chosen as a deadlock victim or there was
- a lock wait timeout */
-
- que_thr_dec_refer_count(thr, NULL);
-
- return;
- }
-
- goto loop;
-
- case QUE_THR_COMPLETED:
- case QUE_THR_COMMAND_WAIT:
- /* Do nothing */
- break;
-
- default:
- ut_error;
- }
-
- mutex_exit(&kernel_mutex);
-}
-
-/*************************************************************************
-Evaluate the given SQL. */
-
-ulint
-que_eval_sql(
-/*=========*/
- /* out: error code or DB_SUCCESS */
- pars_info_t* info, /* in: info struct, or NULL */
- const char* sql, /* in: SQL string */
- ibool reserve_dict_mutex,
- /* in: if TRUE, acquire/release
- dict_sys->mutex around call to pars_sql. */
- trx_t* trx) /* in: trx */
-{
- que_thr_t* thr;
- que_t* graph;
-
- ut_a(trx->error_state == DB_SUCCESS);
-
- if (reserve_dict_mutex) {
- mutex_enter(&dict_sys->mutex);
- }
-
- graph = pars_sql(info, sql);
-
- if (reserve_dict_mutex) {
- mutex_exit(&dict_sys->mutex);
- }
-
- ut_a(graph);
-
- graph->trx = trx;
- trx->graph = NULL;
-
- graph->fork_type = QUE_FORK_MYSQL_INTERFACE;
-
- ut_a(thr = que_fork_start_command(graph));
-
- que_run_threads(thr);
-
- que_graph_free(graph);
-
- return(trx->error_state);
-}
diff --git a/storage/innobase/read/read0read.c b/storage/innobase/read/read0read.c
deleted file mode 100644
index 4068cf4fa69..00000000000
--- a/storage/innobase/read/read0read.c
+++ /dev/null
@@ -1,527 +0,0 @@
-/******************************************************
-Cursor read
-
-(c) 1997 Innobase Oy
-
-Created 2/16/1997 Heikki Tuuri
-*******************************************************/
-
-#include "read0read.h"
-
-#ifdef UNIV_NONINL
-#include "read0read.ic"
-#endif
-
-#include "srv0srv.h"
-#include "trx0sys.h"
-
-/*
--------------------------------------------------------------------------------
-FACT A: Cursor read view on a secondary index sees only committed versions
--------
-of the records in the secondary index or those versions of rows created
-by transaction which created a cursor before cursor was created even
-if transaction which created the cursor has changed that clustered index page.
-
-PROOF: We must show that read goes always to the clustered index record
-to see that record is visible in the cursor read view. Consider e.g.
-following table and SQL-clauses:
-
-create table t1(a int not null, b int, primary key(a), index(b));
-insert into t1 values (1,1),(2,2);
-commit;
-
-Now consider that we have a cursor for a query
-
-select b from t1 where b >= 1;
-
-This query will use secondary key on the table t1. Now after the first fetch
-on this cursor if we do a update:
-
-update t1 set b = 5 where b = 2;
-
-Now second fetch of the cursor should not see record (2,5) instead it should
-see record (2,2).
-
-We also should show that if we have delete t1 where b = 5; we still
-can see record (2,2).
-
-When we access a secondary key record maximum transaction id is fetched
-from this record and this trx_id is compared to up_limit_id in the view.
-If trx_id in the record is greater or equal than up_limit_id in the view
-cluster record is accessed. Because trx_id of the creating
-transaction is stored when this view was created to the list of
-trx_ids not seen by this read view previous version of the
-record is requested to be built. This is build using clustered record.
-If the secondary key record is delete marked it's corresponding
-clustered record can be already be purged only if records
-trx_id < low_limit_no. Purge can't remove any record deleted by a
-transaction which was active when cursor was created. But, we still
-may have a deleted secondary key record but no clustered record. But,
-this is not a problem because this case is handled in
-row_sel_get_clust_rec() function which is called
-whenever we note that this read view does not see trx_id in the
-record. Thus, we see correct version. Q. E. D.
-
--------------------------------------------------------------------------------
-FACT B: Cursor read view on a clustered index sees only committed versions
--------
-of the records in the clustered index or those versions of rows created
-by transaction which created a cursor before cursor was created even
-if transaction which created the cursor has changed that clustered index page.
-
-PROOF: Consider e.g.following table and SQL-clauses:
-
-create table t1(a int not null, b int, primary key(a));
-insert into t1 values (1),(2);
-commit;
-
-Now consider that we have a cursor for a query
-
-select a from t1 where a >= 1;
-
-This query will use clustered key on the table t1. Now after the first fetch
-on this cursor if we do a update:
-
-update t1 set a = 5 where a = 2;
-
-Now second fetch of the cursor should not see record (5) instead it should
-see record (2).
-
-We also should show that if we have execute delete t1 where a = 5; after
-the cursor is opened we still can see record (2).
-
-When accessing clustered record we always check if this read view sees
-trx_id stored to clustered record. By default we don't see any changes
-if record trx_id >= low_limit_id i.e. change was made transaction
-which started after transaction which created the cursor. If row
-was changed by the future transaction a previous version of the
-clustered record is created. Thus we see only committed version in
-this case. We see all changes made by committed transactions i.e.
-record trx_id < up_limit_id. In this case we don't need to do anything,
-we already see correct version of the record. We don't see any changes
-made by active transaction except creating transaction. We have stored
-trx_id of creating transaction to list of trx_ids when this view was
-created. Thus we can easily see if this record was changed by the
-creating transaction. Because we already have clustered record we can
-access roll_ptr. Using this roll_ptr we can fetch undo record.
-We can now check that undo_no of the undo record is less than undo_no of the
-trancaction which created a view when cursor was created. We see this
-clustered record only in case when record undo_no is less than undo_no
-in the view. If this is not true we build based on undo_rec previous
-version of the record. This record is found because purge can't remove
-records accessed by active transaction. Thus we see correct version. Q. E. D.
--------------------------------------------------------------------------------
-FACT C: Purge does not remove any delete marked row that is visible
--------
-to cursor view.
-
-TODO: proof this
-
-*/
-
-/*************************************************************************
-Creates a read view object. */
-UNIV_INLINE
-read_view_t*
-read_view_create_low(
-/*=================*/
- /* out, own: read view struct */
- ulint n, /* in: number of cells in the trx_ids array */
- mem_heap_t* heap) /* in: memory heap from which allocated */
-{
- read_view_t* view;
-
- view = mem_heap_alloc(heap, sizeof(read_view_t));
-
- view->n_trx_ids = n;
- view->trx_ids = mem_heap_alloc(heap, n * sizeof(dulint));
-
- return(view);
-}
-
-/*************************************************************************
-Makes a copy of the oldest existing read view, with the exception that also
-the creating trx of the oldest view is set as not visible in the 'copied'
-view. Opens a new view if no views currently exist. The view must be closed
-with ..._close. This is used in purge. */
-
-read_view_t*
-read_view_oldest_copy_or_open_new(
-/*==============================*/
- /* out, own: read view struct */
- dulint cr_trx_id, /* in: trx_id of creating
- transaction, or (0, 0) used in purge*/
- mem_heap_t* heap) /* in: memory heap from which
- allocated */
-{
- read_view_t* old_view;
- read_view_t* view_copy;
- ibool needs_insert = TRUE;
- ulint insert_done = 0;
- ulint n;
- ulint i;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- old_view = UT_LIST_GET_LAST(trx_sys->view_list);
-
- if (old_view == NULL) {
-
- return(read_view_open_now(cr_trx_id, heap));
- }
-
- n = old_view->n_trx_ids;
-
- if (ut_dulint_cmp(old_view->creator_trx_id,
- ut_dulint_create(0,0)) != 0) {
- n++;
- } else {
- needs_insert = FALSE;
- }
-
- view_copy = read_view_create_low(n, heap);
-
- /* Insert the id of the creator in the right place of the descending
- array of ids, if needs_insert is TRUE: */
-
- i = 0;
- while (i < n) {
- if (needs_insert
- && (i >= old_view->n_trx_ids
- || ut_dulint_cmp(old_view->creator_trx_id,
- read_view_get_nth_trx_id(old_view, i))
- > 0)) {
-
- read_view_set_nth_trx_id(view_copy, i,
- old_view->creator_trx_id);
- needs_insert = FALSE;
- insert_done = 1;
- } else {
- read_view_set_nth_trx_id(view_copy, i,
- read_view_get_nth_trx_id(
- old_view,
- i - insert_done));
- }
-
- i++;
- }
-
- view_copy->creator_trx_id = cr_trx_id;
-
- view_copy->low_limit_no = old_view->low_limit_no;
- view_copy->low_limit_id = old_view->low_limit_id;
-
-
- if (n > 0) {
- /* The last active transaction has the smallest id: */
- view_copy->up_limit_id = read_view_get_nth_trx_id(
- view_copy, n - 1);
- } else {
- view_copy->up_limit_id = old_view->up_limit_id;
- }
-
- UT_LIST_ADD_LAST(view_list, trx_sys->view_list, view_copy);
-
- return(view_copy);
-}
-
-/*************************************************************************
-Opens a read view where exactly the transactions serialized before this
-point in time are seen in the view. */
-
-read_view_t*
-read_view_open_now(
-/*===============*/
- /* out, own: read view struct */
- dulint cr_trx_id, /* in: trx_id of creating
- transaction, or (0, 0) used in
- purge */
- mem_heap_t* heap) /* in: memory heap from which
- allocated */
-{
- read_view_t* view;
- trx_t* trx;
- ulint n;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- view = read_view_create_low(UT_LIST_GET_LEN(trx_sys->trx_list), heap);
-
- view->creator_trx_id = cr_trx_id;
- view->type = VIEW_NORMAL;
- view->undo_no = ut_dulint_create(0, 0);
-
- /* No future transactions should be visible in the view */
-
- view->low_limit_no = trx_sys->max_trx_id;
- view->low_limit_id = view->low_limit_no;
-
- n = 0;
- trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
-
- /* No active transaction should be visible, except cr_trx */
-
- while (trx) {
- if (ut_dulint_cmp(trx->id, cr_trx_id) != 0
- && (trx->conc_state == TRX_ACTIVE
- || trx->conc_state == TRX_PREPARED)) {
-
- read_view_set_nth_trx_id(view, n, trx->id);
-
- n++;
-
- /* NOTE that a transaction whose trx number is <
- trx_sys->max_trx_id can still be active, if it is
- in the middle of its commit! Note that when a
- transaction starts, we initialize trx->no to
- ut_dulint_max. */
-
- if (ut_dulint_cmp(view->low_limit_no, trx->no) > 0) {
-
- view->low_limit_no = trx->no;
- }
- }
-
- trx = UT_LIST_GET_NEXT(trx_list, trx);
- }
-
- view->n_trx_ids = n;
-
- if (n > 0) {
- /* The last active transaction has the smallest id: */
- view->up_limit_id = read_view_get_nth_trx_id(view, n - 1);
- } else {
- view->up_limit_id = view->low_limit_id;
- }
-
-
- UT_LIST_ADD_FIRST(view_list, trx_sys->view_list, view);
-
- return(view);
-}
-
-/*************************************************************************
-Closes a read view. */
-
-void
-read_view_close(
-/*============*/
- read_view_t* view) /* in: read view */
-{
- ut_ad(mutex_own(&kernel_mutex));
-
- UT_LIST_REMOVE(view_list, trx_sys->view_list, view);
-}
-
-/*************************************************************************
-Closes a consistent read view for MySQL. This function is called at an SQL
-statement end if the trx isolation level is <= TRX_ISO_READ_COMMITTED. */
-
-void
-read_view_close_for_mysql(
-/*======================*/
- trx_t* trx) /* in: trx which has a read view */
-{
- ut_a(trx->global_read_view);
-
- mutex_enter(&kernel_mutex);
-
- read_view_close(trx->global_read_view);
-
- mem_heap_empty(trx->global_read_view_heap);
-
- trx->read_view = NULL;
- trx->global_read_view = NULL;
-
- mutex_exit(&kernel_mutex);
-}
-
-/*************************************************************************
-Prints a read view to stderr. */
-
-void
-read_view_print(
-/*============*/
- read_view_t* view) /* in: read view */
-{
- ulint n_ids;
- ulint i;
-
- if (view->type == VIEW_HIGH_GRANULARITY) {
- fprintf(stderr,
- "High-granularity read view undo_n:o %lu %lu\n",
- (ulong) ut_dulint_get_high(view->undo_no),
- (ulong) ut_dulint_get_low(view->undo_no));
- } else {
- fprintf(stderr, "Normal read view\n");
- }
-
- fprintf(stderr, "Read view low limit trx n:o %lu %lu\n",
- (ulong) ut_dulint_get_high(view->low_limit_no),
- (ulong) ut_dulint_get_low(view->low_limit_no));
-
- fprintf(stderr, "Read view up limit trx id %lu %lu\n",
- (ulong) ut_dulint_get_high(view->up_limit_id),
- (ulong) ut_dulint_get_low(view->up_limit_id));
-
- fprintf(stderr, "Read view low limit trx id %lu %lu\n",
- (ulong) ut_dulint_get_high(view->low_limit_id),
- (ulong) ut_dulint_get_low(view->low_limit_id));
-
- fprintf(stderr, "Read view individually stored trx ids:\n");
-
- n_ids = view->n_trx_ids;
-
- for (i = 0; i < n_ids; i++) {
- fprintf(stderr, "Read view trx id %lu %lu\n",
- (ulong) ut_dulint_get_high(
- read_view_get_nth_trx_id(view, i)),
- (ulong) ut_dulint_get_low(
- read_view_get_nth_trx_id(view, i)));
- }
-}
-
-/*************************************************************************
-Create a high-granularity consistent cursor view for mysql to be used
-in cursors. In this consistent read view modifications done by the
-creating transaction after the cursor is created or future transactions
-are not visible. */
-
-cursor_view_t*
-read_cursor_view_create_for_mysql(
-/*==============================*/
- trx_t* cr_trx) /* in: trx where cursor view is created */
-{
- cursor_view_t* curview;
- read_view_t* view;
- mem_heap_t* heap;
- trx_t* trx;
- ulint n;
-
- ut_a(cr_trx);
-
- /* Use larger heap than in trx_create when creating a read_view
- because cursors are quite long. */
-
- heap = mem_heap_create(512);
-
- curview = (cursor_view_t*) mem_heap_alloc(heap, sizeof(cursor_view_t));
- curview->heap = heap;
-
- /* Drop cursor tables from consideration when evaluating the need of
- auto-commit */
- curview->n_mysql_tables_in_use = cr_trx->n_mysql_tables_in_use;
- cr_trx->n_mysql_tables_in_use = 0;
-
- mutex_enter(&kernel_mutex);
-
- curview->read_view = read_view_create_low(
- UT_LIST_GET_LEN(trx_sys->trx_list), curview->heap);
-
- view = curview->read_view;
- view->creator_trx_id = cr_trx->id;
- view->type = VIEW_HIGH_GRANULARITY;
- view->undo_no = cr_trx->undo_no;
-
- /* No future transactions should be visible in the view */
-
- view->low_limit_no = trx_sys->max_trx_id;
- view->low_limit_id = view->low_limit_no;
-
- n = 0;
- trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
-
- /* No active transaction should be visible */
-
- while (trx) {
-
- if (trx->conc_state == TRX_ACTIVE
- || trx->conc_state == TRX_PREPARED) {
-
- read_view_set_nth_trx_id(view, n, trx->id);
-
- n++;
-
- /* NOTE that a transaction whose trx number is <
- trx_sys->max_trx_id can still be active, if it is
- in the middle of its commit! Note that when a
- transaction starts, we initialize trx->no to
- ut_dulint_max. */
-
- if (ut_dulint_cmp(view->low_limit_no, trx->no) > 0) {
-
- view->low_limit_no = trx->no;
- }
- }
-
- trx = UT_LIST_GET_NEXT(trx_list, trx);
- }
-
- view->n_trx_ids = n;
-
- if (n > 0) {
- /* The last active transaction has the smallest id: */
- view->up_limit_id = read_view_get_nth_trx_id(view, n - 1);
- } else {
- view->up_limit_id = view->low_limit_id;
- }
-
- UT_LIST_ADD_FIRST(view_list, trx_sys->view_list, view);
-
- mutex_exit(&kernel_mutex);
-
- return(curview);
-}
-
-/*************************************************************************
-Close a given consistent cursor view for mysql and restore global read view
-back to a transaction read view. */
-
-void
-read_cursor_view_close_for_mysql(
-/*=============================*/
- trx_t* trx, /* in: trx */
- cursor_view_t* curview)/* in: cursor view to be closed */
-{
- ut_a(curview);
- ut_a(curview->read_view);
- ut_a(curview->heap);
-
- /* Add cursor's tables to the global count of active tables that
- belong to this transaction */
- trx->n_mysql_tables_in_use += curview->n_mysql_tables_in_use;
-
- mutex_enter(&kernel_mutex);
-
- read_view_close(curview->read_view);
- trx->read_view = trx->global_read_view;
-
- mutex_exit(&kernel_mutex);
-
- mem_heap_free(curview->heap);
-}
-
-/*************************************************************************
-This function sets a given consistent cursor view to a transaction
-read view if given consistent cursor view is not NULL. Otherwise, function
-restores a global read view to a transaction read view. */
-
-void
-read_cursor_set_for_mysql(
-/*======================*/
- trx_t* trx, /* in: transaction where cursor is set */
- cursor_view_t* curview)/* in: consistent cursor view to be set */
-{
- ut_a(trx);
-
- mutex_enter(&kernel_mutex);
-
- if (UNIV_LIKELY(curview != NULL)) {
- trx->read_view = curview->read_view;
- } else {
- trx->read_view = trx->global_read_view;
- }
-
- mutex_exit(&kernel_mutex);
-}
diff --git a/storage/innobase/rem/rem0cmp.c b/storage/innobase/rem/rem0cmp.c
deleted file mode 100644
index ca0ec663548..00000000000
--- a/storage/innobase/rem/rem0cmp.c
+++ /dev/null
@@ -1,1064 +0,0 @@
-/***********************************************************************
-Comparison services for records
-
-(c) 1994-1996 Innobase Oy
-
-Created 7/1/1994 Heikki Tuuri
-************************************************************************/
-
-#include "rem0cmp.h"
-
-#ifdef UNIV_NONINL
-#include "rem0cmp.ic"
-#endif
-
-#include "srv0srv.h"
-
-/* ALPHABETICAL ORDER
- ==================
-
-The records are put into alphabetical order in the following
-way: let F be the first field where two records disagree.
-If there is a character in some position n where the the
-records disagree, the order is determined by comparison of
-the characters at position n, possibly after
-collating transformation. If there is no such character,
-but the corresponding fields have different lengths, then
-if the data type of the fields is paddable,
-shorter field is padded with a padding character. If the
-data type is not paddable, longer field is considered greater.
-Finally, the SQL null is bigger than any other value.
-
-At the present, the comparison functions return 0 in the case,
-where two records disagree only in the way that one
-has more fields than the other. */
-
-#ifdef UNIV_DEBUG
-/*****************************************************************
-Used in debug checking of cmp_dtuple_... .
-This function is used to compare a data tuple to a physical record. If
-dtuple has n fields then rec must have either m >= n fields, or it must
-differ from dtuple in some of the m fields rec has. */
-static
-int
-cmp_debug_dtuple_rec_with_match(
-/*============================*/
- /* out: 1, 0, -1, if dtuple is greater, equal,
- less than rec, respectively, when only the
- common first fields are compared */
- dtuple_t* dtuple, /* in: data tuple */
- rec_t* rec, /* in: physical record which differs from
- dtuple in some of the common fields, or which
- has an equal number or more fields than
- dtuple */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint* matched_fields);/* in/out: number of already
- completely matched fields; when function
- returns, contains the value for current
- comparison */
-#endif /* UNIV_DEBUG */
-#ifndef UNIV_HOTBACKUP
-/*****************************************************************
-This function is used to compare two data fields for which the data type
-is such that we must use MySQL code to compare them. The prototype here
-must be a copy of the the one in ha_innobase.cc! */
-extern
-int
-innobase_mysql_cmp(
-/*===============*/
- /* out: 1, 0, -1, if a is greater,
- equal, less than b, respectively */
- int mysql_type, /* in: MySQL type */
- uint charset_number, /* in: number of the charset */
- unsigned char* a, /* in: data field */
- unsigned int a_length, /* in: data field length,
- not UNIV_SQL_NULL */
- unsigned char* b, /* in: data field */
- unsigned int b_length); /* in: data field length,
- not UNIV_SQL_NULL */
-#endif /* !UNIV_HOTBACKUP */
-/*************************************************************************
-Transforms the character code so that it is ordered appropriately for the
-language. This is only used for the latin1 char set. MySQL does the
-comparisons for other char sets. */
-UNIV_INLINE
-ulint
-cmp_collate(
-/*========*/
- /* out: collation order position */
- ulint code) /* in: code of a character stored in database record */
-{
- return((ulint) srv_latin1_ordering[code]);
-}
-
-/*****************************************************************
-Returns TRUE if two columns are equal for comparison purposes. */
-
-ibool
-cmp_cols_are_equal(
-/*===============*/
- /* out: TRUE if the columns are
- considered equal in comparisons */
- const dict_col_t* col1, /* in: column 1 */
- const dict_col_t* col2, /* in: column 2 */
- ibool check_charsets)
- /* in: whether to check charsets */
-{
- if (dtype_is_non_binary_string_type(col1->mtype, col1->prtype)
- && dtype_is_non_binary_string_type(col2->mtype, col2->prtype)) {
-
- /* Both are non-binary string types: they can be compared if
- and only if the charset-collation is the same */
-
- if (check_charsets) {
- return(dtype_get_charset_coll(col1->prtype)
- == dtype_get_charset_coll(col2->prtype));
- } else {
- return(TRUE);
- }
- }
-
- if (dtype_is_binary_string_type(col1->mtype, col1->prtype)
- && dtype_is_binary_string_type(col2->mtype, col2->prtype)) {
-
- /* Both are binary string types: they can be compared */
-
- return(TRUE);
- }
-
- if (col1->mtype != col2->mtype) {
-
- return(FALSE);
- }
-
- if (col1->mtype == DATA_INT
- && (col1->prtype & DATA_UNSIGNED)
- != (col2->prtype & DATA_UNSIGNED)) {
-
- /* The storage format of an unsigned integer is different
- from a signed integer: in a signed integer we OR
- 0x8000... to the value of positive integers. */
-
- return(FALSE);
- }
-
- return(col1->mtype != DATA_INT || col1->len == col2->len);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*****************************************************************
-Innobase uses this function to compare two data fields for which the data type
-is such that we must compare whole fields or call MySQL to do the comparison */
-static
-int
-cmp_whole_field(
-/*============*/
- /* out: 1, 0, -1, if a is greater,
- equal, less than b, respectively */
- ulint mtype, /* in: main type */
- ulint prtype, /* in: precise type */
- unsigned char* a, /* in: data field */
- unsigned int a_length, /* in: data field length,
- not UNIV_SQL_NULL */
- unsigned char* b, /* in: data field */
- unsigned int b_length) /* in: data field length,
- not UNIV_SQL_NULL */
-{
- float f_1;
- float f_2;
- double d_1;
- double d_2;
- int swap_flag = 1;
-
- switch (mtype) {
-
- case DATA_DECIMAL:
- /* Remove preceding spaces */
- for (; a_length && *a == ' '; a++, a_length--);
- for (; b_length && *b == ' '; b++, b_length--);
-
- if (*a == '-') {
- if (*b != '-') {
- return(-1);
- }
-
- a++; b++;
- a_length--;
- b_length--;
-
- swap_flag = -1;
-
- } else if (*b == '-') {
-
- return(1);
- }
-
- while (a_length > 0 && (*a == '+' || *a == '0')) {
- a++; a_length--;
- }
-
- while (b_length > 0 && (*b == '+' || *b == '0')) {
- b++; b_length--;
- }
-
- if (a_length != b_length) {
- if (a_length < b_length) {
- return(-swap_flag);
- }
-
- return(swap_flag);
- }
-
- while (a_length > 0 && *a == *b) {
-
- a++; b++; a_length--;
- }
-
- if (a_length == 0) {
-
- return(0);
- }
-
- if (*a > *b) {
- return(swap_flag);
- }
-
- return(-swap_flag);
- case DATA_DOUBLE:
- d_1 = mach_double_read(a);
- d_2 = mach_double_read(b);
-
- if (d_1 > d_2) {
- return(1);
- } else if (d_2 > d_1) {
- return(-1);
- }
-
- return(0);
-
- case DATA_FLOAT:
- f_1 = mach_float_read(a);
- f_2 = mach_float_read(b);
-
- if (f_1 > f_2) {
- return(1);
- } else if (f_2 > f_1) {
- return(-1);
- }
-
- return(0);
- case DATA_BLOB:
- if (prtype & DATA_BINARY_TYPE) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: comparing a binary BLOB"
- " with a character set sensitive\n"
- "InnoDB: comparison!\n");
- }
- /* fall through */
- case DATA_VARMYSQL:
- case DATA_MYSQL:
- return(innobase_mysql_cmp(
- (int)(prtype & DATA_MYSQL_TYPE_MASK),
- (uint)dtype_get_charset_coll(prtype),
- a, a_length, b, b_length));
- default:
- fprintf(stderr,
- "InnoDB: unknown type number %lu\n",
- (ulong) mtype);
- ut_error;
- }
-
- return(0);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*****************************************************************
-This function is used to compare two data fields for which we know the
-data type. */
-
-int
-cmp_data_data_slow(
-/*===============*/
- /* out: 1, 0, -1, if data1 is greater, equal,
- less than data2, respectively */
- ulint mtype, /* in: main type */
- ulint prtype, /* in: precise type */
- byte* data1, /* in: data field (== a pointer to a memory
- buffer) */
- ulint len1, /* in: data field length or UNIV_SQL_NULL */
- byte* data2, /* in: data field (== a pointer to a memory
- buffer) */
- ulint len2) /* in: data field length or UNIV_SQL_NULL */
-{
-#ifndef UNIV_HOTBACKUP
- ulint data1_byte;
- ulint data2_byte;
- ulint cur_bytes;
-
- if (len1 == UNIV_SQL_NULL || len2 == UNIV_SQL_NULL) {
-
- if (len1 == len2) {
-
- return(0);
- }
-
- if (len1 == UNIV_SQL_NULL) {
- /* We define the SQL null to be the smallest possible
- value of a field in the alphabetical order */
-
- return(-1);
- }
-
- return(1);
- }
-
- if (mtype >= DATA_FLOAT
- || (mtype == DATA_BLOB
- && 0 == (prtype & DATA_BINARY_TYPE)
- && dtype_get_charset_coll(prtype)
- != DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL)) {
-
- return(cmp_whole_field(mtype, prtype,
- data1, (unsigned) len1,
- data2, (unsigned) len2));
- }
-
- /* Compare then the fields */
-
- cur_bytes = 0;
-
- for (;;) {
- if (len1 <= cur_bytes) {
- if (len2 <= cur_bytes) {
-
- return(0);
- }
-
- data1_byte = dtype_get_pad_char(mtype, prtype);
-
- if (data1_byte == ULINT_UNDEFINED) {
-
- return(-1);
- }
- } else {
- data1_byte = *data1;
- }
-
- if (len2 <= cur_bytes) {
- data2_byte = dtype_get_pad_char(mtype, prtype);
-
- if (data2_byte == ULINT_UNDEFINED) {
-
- return(1);
- }
- } else {
- data2_byte = *data2;
- }
-
- if (data1_byte == data2_byte) {
- /* If the bytes are equal, they will remain such even
- after the collation transformation below */
-
- goto next_byte;
- }
-
- if (mtype <= DATA_CHAR
- || (mtype == DATA_BLOB
- && 0 == (prtype & DATA_BINARY_TYPE))) {
-
- data1_byte = cmp_collate(data1_byte);
- data2_byte = cmp_collate(data2_byte);
- }
-
- if (data1_byte > data2_byte) {
-
- return(1);
- } else if (data1_byte < data2_byte) {
-
- return(-1);
- }
-next_byte:
- /* Next byte */
- cur_bytes++;
- data1++;
- data2++;
- }
-#else /* !UNIV_HOTBACKUP */
- /* This function depends on MySQL code that is not included in
- InnoDB Hot Backup builds. Besides, this function should never
- be called in InnoDB Hot Backup. */
- ut_error;
-#endif /* !UNIV_HOTBACKUP */
-
- return(0); /* Not reached */
-}
-
-/*****************************************************************
-This function is used to compare a data tuple to a physical record.
-Only dtuple->n_fields_cmp first fields are taken into account for
-the the data tuple! If we denote by n = n_fields_cmp, then rec must
-have either m >= n fields, or it must differ from dtuple in some of
-the m fields rec has. If rec has an externally stored field we do not
-compare it but return with value 0 if such a comparison should be
-made. */
-
-int
-cmp_dtuple_rec_with_match(
-/*======================*/
- /* out: 1, 0, -1, if dtuple is greater, equal,
- less than rec, respectively, when only the
- common first fields are compared, or
- until the first externally stored field in
- rec */
- dtuple_t* dtuple, /* in: data tuple */
- rec_t* rec, /* in: physical record which differs from
- dtuple in some of the common fields, or which
- has an equal number or more fields than
- dtuple */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint* matched_fields, /* in/out: number of already completely
- matched fields; when function returns,
- contains the value for current comparison */
- ulint* matched_bytes) /* in/out: number of already matched
- bytes within the first field not completely
- matched; when function returns, contains the
- value for current comparison */
-{
-#ifndef UNIV_HOTBACKUP
- dfield_t* dtuple_field; /* current field in logical record */
- ulint dtuple_f_len; /* the length of the current field
- in the logical record */
- byte* dtuple_b_ptr; /* pointer to the current byte in
- logical field data */
- ulint dtuple_byte; /* value of current byte to be compared
- in dtuple*/
- ulint rec_f_len; /* length of current field in rec */
- byte* rec_b_ptr; /* pointer to the current byte in
- rec field */
- ulint rec_byte; /* value of current byte to be
- compared in rec */
- ulint cur_field; /* current field number */
- ulint cur_bytes; /* number of already matched bytes
- in current field */
- int ret = 3333; /* return value */
-
- ut_ad(dtuple && rec && matched_fields && matched_bytes);
- ut_ad(dtuple_check_typed(dtuple));
- ut_ad(rec_offs_validate(rec, NULL, offsets));
-
- cur_field = *matched_fields;
- cur_bytes = *matched_bytes;
-
- ut_ad(cur_field <= dtuple_get_n_fields_cmp(dtuple));
- ut_ad(cur_field <= rec_offs_n_fields(offsets));
-
- if (cur_bytes == 0 && cur_field == 0) {
- ulint rec_info = rec_get_info_bits(rec,
- rec_offs_comp(offsets));
- ulint tup_info = dtuple_get_info_bits(dtuple);
-
- if (rec_info & REC_INFO_MIN_REC_FLAG) {
- ret = !(tup_info & REC_INFO_MIN_REC_FLAG);
- goto order_resolved;
- } else if (tup_info & REC_INFO_MIN_REC_FLAG) {
- ret = -1;
- goto order_resolved;
- }
- }
-
- /* Match fields in a loop; stop if we run out of fields in dtuple
- or find an externally stored field */
-
- while (cur_field < dtuple_get_n_fields_cmp(dtuple)) {
-
- ulint mtype;
- ulint prtype;
-
- dtuple_field = dtuple_get_nth_field(dtuple, cur_field);
- {
- const dtype_t* type
- = dfield_get_type(dtuple_field);
-
- mtype = type->mtype;
- prtype = type->prtype;
- }
-
- dtuple_f_len = dfield_get_len(dtuple_field);
-
- rec_b_ptr = rec_get_nth_field(rec, offsets,
- cur_field, &rec_f_len);
-
- /* If we have matched yet 0 bytes, it may be that one or
- both the fields are SQL null, or the record or dtuple may be
- the predefined minimum record, or the field is externally
- stored */
-
- if (UNIV_LIKELY(cur_bytes == 0)) {
- if (rec_offs_nth_extern(offsets, cur_field)) {
- /* We do not compare to an externally
- stored field */
-
- ret = 0;
-
- goto order_resolved;
- }
-
- if (dtuple_f_len == UNIV_SQL_NULL) {
- if (rec_f_len == UNIV_SQL_NULL) {
-
- goto next_field;
- }
-
- ret = -1;
- goto order_resolved;
- } else if (rec_f_len == UNIV_SQL_NULL) {
- /* We define the SQL null to be the
- smallest possible value of a field
- in the alphabetical order */
-
- ret = 1;
- goto order_resolved;
- }
- }
-
- if (mtype >= DATA_FLOAT
- || (mtype == DATA_BLOB
- && 0 == (prtype & DATA_BINARY_TYPE)
- && dtype_get_charset_coll(prtype)
- != DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL)) {
-
- ret = cmp_whole_field(mtype, prtype,
- dfield_get_data(dtuple_field),
- (unsigned) dtuple_f_len,
- rec_b_ptr, (unsigned) rec_f_len);
-
- if (ret != 0) {
- cur_bytes = 0;
-
- goto order_resolved;
- } else {
- goto next_field;
- }
- }
-
- /* Set the pointers at the current byte */
-
- rec_b_ptr = rec_b_ptr + cur_bytes;
- dtuple_b_ptr = (byte*)dfield_get_data(dtuple_field)
- + cur_bytes;
- /* Compare then the fields */
-
- for (;;) {
- if (UNIV_UNLIKELY(rec_f_len <= cur_bytes)) {
- if (dtuple_f_len <= cur_bytes) {
-
- goto next_field;
- }
-
- rec_byte = dtype_get_pad_char(mtype, prtype);
-
- if (rec_byte == ULINT_UNDEFINED) {
- ret = 1;
-
- goto order_resolved;
- }
- } else {
- rec_byte = *rec_b_ptr;
- }
-
- if (UNIV_UNLIKELY(dtuple_f_len <= cur_bytes)) {
- dtuple_byte = dtype_get_pad_char(mtype,
- prtype);
-
- if (dtuple_byte == ULINT_UNDEFINED) {
- ret = -1;
-
- goto order_resolved;
- }
- } else {
- dtuple_byte = *dtuple_b_ptr;
- }
-
- if (dtuple_byte == rec_byte) {
- /* If the bytes are equal, they will
- remain such even after the collation
- transformation below */
-
- goto next_byte;
- }
-
- if (mtype <= DATA_CHAR
- || (mtype == DATA_BLOB
- && !(prtype & DATA_BINARY_TYPE))) {
-
- rec_byte = cmp_collate(rec_byte);
- dtuple_byte = cmp_collate(dtuple_byte);
- }
-
- ret = (int) (dtuple_byte - rec_byte);
- if (UNIV_UNLIKELY(ret)) {
- if (ret < 0) {
- ret = -1;
- goto order_resolved;
- } else {
- ret = 1;
- goto order_resolved;
- }
- }
-next_byte:
- /* Next byte */
- cur_bytes++;
- rec_b_ptr++;
- dtuple_b_ptr++;
- }
-
-next_field:
- cur_field++;
- cur_bytes = 0;
- }
-
- ut_ad(cur_bytes == 0);
-
- ret = 0; /* If we ran out of fields, dtuple was equal to rec
- up to the common fields */
-order_resolved:
- ut_ad((ret >= - 1) && (ret <= 1));
- ut_ad(ret == cmp_debug_dtuple_rec_with_match(dtuple, rec, offsets,
- matched_fields));
- ut_ad(*matched_fields == cur_field); /* In the debug version, the
- above cmp_debug_... sets
- *matched_fields to a value */
- *matched_fields = cur_field;
- *matched_bytes = cur_bytes;
-
- return(ret);
-#else /* !UNIV_HOTBACKUP */
- /* This function depends on MySQL code that is not included in
- InnoDB Hot Backup builds. Besides, this function should never
- be called in InnoDB Hot Backup. */
- ut_error;
- return(0);
-#endif /* !UNIV_HOTBACKUP */
-}
-
-/******************************************************************
-Compares a data tuple to a physical record. */
-
-int
-cmp_dtuple_rec(
-/*===========*/
- /* out: 1, 0, -1, if dtuple is greater, equal,
- less than rec, respectively; see the comments
- for cmp_dtuple_rec_with_match */
- dtuple_t* dtuple, /* in: data tuple */
- rec_t* rec, /* in: physical record */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
-{
- ulint matched_fields = 0;
- ulint matched_bytes = 0;
-
- ut_ad(rec_offs_validate(rec, NULL, offsets));
- return(cmp_dtuple_rec_with_match(dtuple, rec, offsets,
- &matched_fields, &matched_bytes));
-}
-
-/******************************************************************
-Checks if a dtuple is a prefix of a record. The last field in dtuple
-is allowed to be a prefix of the corresponding field in the record. */
-
-ibool
-cmp_dtuple_is_prefix_of_rec(
-/*========================*/
- /* out: TRUE if prefix */
- dtuple_t* dtuple, /* in: data tuple */
- rec_t* rec, /* in: physical record */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
-{
- ulint n_fields;
- ulint matched_fields = 0;
- ulint matched_bytes = 0;
-
- ut_ad(rec_offs_validate(rec, NULL, offsets));
- n_fields = dtuple_get_n_fields(dtuple);
-
- if (n_fields > rec_offs_n_fields(offsets)) {
-
- return(FALSE);
- }
-
- cmp_dtuple_rec_with_match(dtuple, rec, offsets,
- &matched_fields, &matched_bytes);
- if (matched_fields == n_fields) {
-
- return(TRUE);
- }
-
- if (matched_fields == n_fields - 1
- && matched_bytes == dfield_get_len(
- dtuple_get_nth_field(dtuple, n_fields - 1))) {
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*****************************************************************
-This function is used to compare two physical records. Only the common
-first fields are compared, and if an externally stored field is
-encountered, then 0 is returned. */
-
-int
-cmp_rec_rec_with_match(
-/*===================*/
- /* out: 1, 0 , -1 if rec1 is greater, equal,
- less, respectively, than rec2; only the common
- first fields are compared */
- rec_t* rec1, /* in: physical record */
- rec_t* rec2, /* in: physical record */
- const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */
- const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */
- dict_index_t* index, /* in: data dictionary index */
- ulint* matched_fields, /* in/out: number of already completely
- matched fields; when the function returns,
- contains the value the for current
- comparison */
- ulint* matched_bytes) /* in/out: number of already matched
- bytes within the first field not completely
- matched; when the function returns, contains
- the value for the current comparison */
-{
-#ifndef UNIV_HOTBACKUP
- ulint rec1_n_fields; /* the number of fields in rec */
- ulint rec1_f_len; /* length of current field in rec */
- byte* rec1_b_ptr; /* pointer to the current byte in rec field */
- ulint rec1_byte; /* value of current byte to be compared in
- rec */
- ulint rec2_n_fields; /* the number of fields in rec */
- ulint rec2_f_len; /* length of current field in rec */
- byte* rec2_b_ptr; /* pointer to the current byte in rec field */
- ulint rec2_byte; /* value of current byte to be compared in
- rec */
- ulint cur_field; /* current field number */
- ulint cur_bytes; /* number of already matched bytes in current
- field */
- int ret = 3333; /* return value */
- ulint comp;
-
- ut_ad(rec1 && rec2 && index);
- ut_ad(rec_offs_validate(rec1, index, offsets1));
- ut_ad(rec_offs_validate(rec2, index, offsets2));
- ut_ad(rec_offs_comp(offsets1) == rec_offs_comp(offsets2));
-
- comp = rec_offs_comp(offsets1);
- rec1_n_fields = rec_offs_n_fields(offsets1);
- rec2_n_fields = rec_offs_n_fields(offsets2);
-
- cur_field = *matched_fields;
- cur_bytes = *matched_bytes;
-
- /* Match fields in a loop */
-
- while ((cur_field < rec1_n_fields) && (cur_field < rec2_n_fields)) {
-
- ulint mtype;
- ulint prtype;
-
- if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) {
- /* This is for the insert buffer B-tree. */
- mtype = DATA_BINARY;
- prtype = 0;
- } else {
- const dict_col_t* col
- = dict_index_get_nth_col(index, cur_field);
-
- mtype = col->mtype;
- prtype = col->prtype;
- }
-
- rec1_b_ptr = rec_get_nth_field(rec1, offsets1,
- cur_field, &rec1_f_len);
- rec2_b_ptr = rec_get_nth_field(rec2, offsets2,
- cur_field, &rec2_f_len);
-
- if (cur_bytes == 0) {
- if (cur_field == 0) {
- /* Test if rec is the predefined minimum
- record */
- if (rec_get_info_bits(rec1, comp)
- & REC_INFO_MIN_REC_FLAG) {
-
- if (rec_get_info_bits(rec2, comp)
- & REC_INFO_MIN_REC_FLAG) {
- ret = 0;
- } else {
- ret = -1;
- }
-
- goto order_resolved;
-
- } else if (rec_get_info_bits(rec2, comp)
- & REC_INFO_MIN_REC_FLAG) {
-
- ret = 1;
-
- goto order_resolved;
- }
- }
-
- if (rec_offs_nth_extern(offsets1, cur_field)
- || rec_offs_nth_extern(offsets2, cur_field)) {
- /* We do not compare to an externally
- stored field */
-
- ret = 0;
-
- goto order_resolved;
- }
-
- if (rec1_f_len == UNIV_SQL_NULL
- || rec2_f_len == UNIV_SQL_NULL) {
-
- if (rec1_f_len == rec2_f_len) {
-
- goto next_field;
-
- } else if (rec2_f_len == UNIV_SQL_NULL) {
-
- /* We define the SQL null to be the
- smallest possible value of a field
- in the alphabetical order */
-
- ret = 1;
- } else {
- ret = -1;
- }
-
- goto order_resolved;
- }
- }
-
- if (mtype >= DATA_FLOAT
- || (mtype == DATA_BLOB
- && 0 == (prtype & DATA_BINARY_TYPE)
- && dtype_get_charset_coll(prtype)
- != DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL)) {
-
- ret = cmp_whole_field(mtype, prtype,
- rec1_b_ptr,
- (unsigned) rec1_f_len,
- rec2_b_ptr,
- (unsigned) rec2_f_len);
- if (ret != 0) {
- cur_bytes = 0;
-
- goto order_resolved;
- } else {
- goto next_field;
- }
- }
-
- /* Set the pointers at the current byte */
- rec1_b_ptr = rec1_b_ptr + cur_bytes;
- rec2_b_ptr = rec2_b_ptr + cur_bytes;
-
- /* Compare then the fields */
- for (;;) {
- if (rec2_f_len <= cur_bytes) {
-
- if (rec1_f_len <= cur_bytes) {
-
- goto next_field;
- }
-
- rec2_byte = dtype_get_pad_char(mtype, prtype);
-
- if (rec2_byte == ULINT_UNDEFINED) {
- ret = 1;
-
- goto order_resolved;
- }
- } else {
- rec2_byte = *rec2_b_ptr;
- }
-
- if (rec1_f_len <= cur_bytes) {
- rec1_byte = dtype_get_pad_char(mtype, prtype);
-
- if (rec1_byte == ULINT_UNDEFINED) {
- ret = -1;
-
- goto order_resolved;
- }
- } else {
- rec1_byte = *rec1_b_ptr;
- }
-
- if (rec1_byte == rec2_byte) {
- /* If the bytes are equal, they will remain
- such even after the collation transformation
- below */
-
- goto next_byte;
- }
-
- if (mtype <= DATA_CHAR
- || (mtype == DATA_BLOB
- && !(prtype & DATA_BINARY_TYPE))) {
-
- rec1_byte = cmp_collate(rec1_byte);
- rec2_byte = cmp_collate(rec2_byte);
- }
-
- if (rec1_byte < rec2_byte) {
- ret = -1;
- goto order_resolved;
- } else if (rec1_byte > rec2_byte) {
- ret = 1;
- goto order_resolved;
- }
-next_byte:
- /* Next byte */
-
- cur_bytes++;
- rec1_b_ptr++;
- rec2_b_ptr++;
- }
-
-next_field:
- cur_field++;
- cur_bytes = 0;
- }
-
- ut_ad(cur_bytes == 0);
-
- ret = 0; /* If we ran out of fields, rec1 was equal to rec2 up
- to the common fields */
-order_resolved:
-
- ut_ad((ret >= - 1) && (ret <= 1));
-
- *matched_fields = cur_field;
- *matched_bytes = cur_bytes;
-
- return(ret);
-#else /* !UNIV_HOTBACKUP */
- /* This function depends on MySQL code that is not included in
- InnoDB Hot Backup builds. Besides, this function should never
- be called in InnoDB Hot Backup. */
- ut_error;
- return(0);
-#endif /* !UNIV_HOTBACKUP */
-}
-
-#ifdef UNIV_DEBUG
-/*****************************************************************
-Used in debug checking of cmp_dtuple_... .
-This function is used to compare a data tuple to a physical record. If
-dtuple has n fields then rec must have either m >= n fields, or it must
-differ from dtuple in some of the m fields rec has. If encounters an
-externally stored field, returns 0. */
-static
-int
-cmp_debug_dtuple_rec_with_match(
-/*============================*/
- /* out: 1, 0, -1, if dtuple is greater, equal,
- less than rec, respectively, when only the
- common first fields are compared */
- dtuple_t* dtuple, /* in: data tuple */
- rec_t* rec, /* in: physical record which differs from
- dtuple in some of the common fields, or which
- has an equal number or more fields than
- dtuple */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint* matched_fields) /* in/out: number of already
- completely matched fields; when function
- returns, contains the value for current
- comparison */
-{
- dfield_t* dtuple_field; /* current field in logical record */
- ulint dtuple_f_len; /* the length of the current field
- in the logical record */
- byte* dtuple_f_data; /* pointer to the current logical
- field data */
- ulint rec_f_len; /* length of current field in rec */
- byte* rec_f_data; /* pointer to the current rec field */
- int ret = 3333; /* return value */
- ulint cur_field; /* current field number */
-
- ut_ad(dtuple && rec && matched_fields);
- ut_ad(dtuple_check_typed(dtuple));
- ut_ad(rec_offs_validate(rec, NULL, offsets));
-
- ut_ad(*matched_fields <= dtuple_get_n_fields_cmp(dtuple));
- ut_ad(*matched_fields <= rec_offs_n_fields(offsets));
-
- cur_field = *matched_fields;
-
- if (cur_field == 0) {
- if (rec_get_info_bits(rec, rec_offs_comp(offsets))
- & REC_INFO_MIN_REC_FLAG) {
-
- ret = !(dtuple_get_info_bits(dtuple)
- & REC_INFO_MIN_REC_FLAG);
-
- goto order_resolved;
- }
-
- if (dtuple_get_info_bits(dtuple) & REC_INFO_MIN_REC_FLAG) {
- ret = -1;
-
- goto order_resolved;
- }
- }
-
- /* Match fields in a loop; stop if we run out of fields in dtuple */
-
- while (cur_field < dtuple_get_n_fields_cmp(dtuple)) {
-
- ulint mtype;
- ulint prtype;
-
- dtuple_field = dtuple_get_nth_field(dtuple, cur_field);
- {
- const dtype_t* type
- = dfield_get_type(dtuple_field);
-
- mtype = type->mtype;
- prtype = type->prtype;
- }
-
- dtuple_f_data = dfield_get_data(dtuple_field);
- dtuple_f_len = dfield_get_len(dtuple_field);
-
- rec_f_data = rec_get_nth_field(rec, offsets,
- cur_field, &rec_f_len);
-
- if (rec_offs_nth_extern(offsets, cur_field)) {
- /* We do not compare to an externally stored field */
-
- ret = 0;
-
- goto order_resolved;
- }
-
- ret = cmp_data_data(mtype, prtype, dtuple_f_data, dtuple_f_len,
- rec_f_data, rec_f_len);
- if (ret != 0) {
- goto order_resolved;
- }
-
- cur_field++;
- }
-
- ret = 0; /* If we ran out of fields, dtuple was equal to rec
- up to the common fields */
-order_resolved:
- ut_ad((ret >= - 1) && (ret <= 1));
-
- *matched_fields = cur_field;
-
- return(ret);
-}
-#endif /* UNIV_DEBUG */
diff --git a/storage/innobase/rem/rem0rec.c b/storage/innobase/rem/rem0rec.c
deleted file mode 100644
index 64f8e2d319c..00000000000
--- a/storage/innobase/rem/rem0rec.c
+++ /dev/null
@@ -1,1515 +0,0 @@
-/************************************************************************
-Record manager
-
-(c) 1994-2001 Innobase Oy
-
-Created 5/30/1994 Heikki Tuuri
-*************************************************************************/
-
-#include "rem0rec.h"
-
-#ifdef UNIV_NONINL
-#include "rem0rec.ic"
-#endif
-
-#include "mtr0mtr.h"
-#include "mtr0log.h"
-
-/* PHYSICAL RECORD (OLD STYLE)
- ===========================
-
-The physical record, which is the data type of all the records
-found in index pages of the database, has the following format
-(lower addresses and more significant bits inside a byte are below
-represented on a higher text line):
-
-| offset of the end of the last field of data, the most significant
- bit is set to 1 if and only if the field is SQL-null,
- if the offset is 2-byte, then the second most significant
- bit is set to 1 if the field is stored on another page:
- mostly this will occur in the case of big BLOB fields |
-...
-| offset of the end of the first field of data + the SQL-null bit |
-| 4 bits used to delete mark a record, and mark a predefined
- minimum record in alphabetical order |
-| 4 bits giving the number of records owned by this record
- (this term is explained in page0page.h) |
-| 13 bits giving the order number of this record in the
- heap of the index page |
-| 10 bits giving the number of fields in this record |
-| 1 bit which is set to 1 if the offsets above are given in
- one byte format, 0 if in two byte format |
-| two bytes giving an absolute pointer to the next record in the page |
-ORIGIN of the record
-| first field of data |
-...
-| last field of data |
-
-The origin of the record is the start address of the first field
-of data. The offsets are given relative to the origin.
-The offsets of the data fields are stored in an inverted
-order because then the offset of the first fields are near the
-origin, giving maybe a better processor cache hit rate in searches.
-
-The offsets of the data fields are given as one-byte
-(if there are less than 127 bytes of data in the record)
-or two-byte unsigned integers. The most significant bit
-is not part of the offset, instead it indicates the SQL-null
-if the bit is set to 1. */
-
-/* PHYSICAL RECORD (NEW STYLE)
- ===========================
-
-The physical record, which is the data type of all the records
-found in index pages of the database, has the following format
-(lower addresses and more significant bits inside a byte are below
-represented on a higher text line):
-
-| length of the last non-null variable-length field of data:
- if the maximum length is 255, one byte; otherwise,
- 0xxxxxxx (one byte, length=0..127), or 1exxxxxxxxxxxxxx (two bytes,
- length=128..16383, extern storage flag) |
-...
-| length of first variable-length field of data |
-| SQL-null flags (1 bit per nullable field), padded to full bytes |
-| 4 bits used to delete mark a record, and mark a predefined
- minimum record in alphabetical order |
-| 4 bits giving the number of records owned by this record
- (this term is explained in page0page.h) |
-| 13 bits giving the order number of this record in the
- heap of the index page |
-| 3 bits record type: 000=conventional, 001=node pointer (inside B-tree),
- 010=infimum, 011=supremum, 1xx=reserved |
-| two bytes giving a relative pointer to the next record in the page |
-ORIGIN of the record
-| first field of data |
-...
-| last field of data |
-
-The origin of the record is the start address of the first field
-of data. The offsets are given relative to the origin.
-The offsets of the data fields are stored in an inverted
-order because then the offset of the first fields are near the
-origin, giving maybe a better processor cache hit rate in searches.
-
-The offsets of the data fields are given as one-byte
-(if there are less than 127 bytes of data in the record)
-or two-byte unsigned integers. The most significant bit
-is not part of the offset, instead it indicates the SQL-null
-if the bit is set to 1. */
-
-/* CANONICAL COORDINATES. A record can be seen as a single
-string of 'characters' in the following way: catenate the bytes
-in each field, in the order of fields. An SQL-null field
-is taken to be an empty sequence of bytes. Then after
-the position of each field insert in the string
-the 'character' <FIELD-END>, except that after an SQL-null field
-insert <NULL-FIELD-END>. Now the ordinal position of each
-byte in this canonical string is its canonical coordinate.
-So, for the record ("AA", SQL-NULL, "BB", ""), the canonical
-string is "AA<FIELD_END><NULL-FIELD-END>BB<FIELD-END><FIELD-END>".
-We identify prefixes (= initial segments) of a record
-with prefixes of the canonical string. The canonical
-length of the prefix is the length of the corresponding
-prefix of the canonical string. The canonical length of
-a record is the length of its canonical string.
-
-For example, the maximal common prefix of records
-("AA", SQL-NULL, "BB", "C") and ("AA", SQL-NULL, "B", "C")
-is "AA<FIELD-END><NULL-FIELD-END>B", and its canonical
-length is 5.
-
-A complete-field prefix of a record is a prefix which ends at the
-end of some field (containing also <FIELD-END>).
-A record is a complete-field prefix of another record, if
-the corresponding canonical strings have the same property. */
-
-ulint rec_dummy; /* this is used to fool compiler in
- rec_validate */
-
-/*******************************************************************
-Validates the consistency of an old-style physical record. */
-static
-ibool
-rec_validate_old(
-/*=============*/
- /* out: TRUE if ok */
- rec_t* rec); /* in: physical record */
-
-/**********************************************************
-The following function determines the offsets to each field in the
-record. The offsets are written to a previously allocated array of
-ulint, where rec_offs_n_fields(offsets) has been initialized to the
-number of fields in the record. The rest of the array will be
-initialized by this function. rec_offs_base(offsets)[0] will be set
-to the extra size (if REC_OFFS_COMPACT is set, the record is in the
-new format), and rec_offs_base(offsets)[1..n_fields] will be set to
-offsets past the end of fields 0..n_fields, or to the beginning of
-fields 1..n_fields+1. When the high-order bit of the offset at [i+1]
-is set (REC_OFFS_SQL_NULL), the field i is NULL. When the second
-high-order bit of the offset at [i+1] is set (REC_OFFS_EXTERNAL), the
-field i is being stored externally. */
-static
-void
-rec_init_offsets(
-/*=============*/
- rec_t* rec, /* in: physical record */
- dict_index_t* index, /* in: record descriptor */
- ulint* offsets)/* in/out: array of offsets;
- in: n=rec_offs_n_fields(offsets) */
-{
- ulint i = 0;
- ulint offs;
-
- rec_offs_make_valid(rec, index, offsets);
-
- if (dict_table_is_comp(index->table)) {
- const byte* nulls;
- const byte* lens;
- dict_field_t* field;
- ulint null_mask;
- ulint status = rec_get_status(rec);
- ulint n_node_ptr_field = ULINT_UNDEFINED;
-
- switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) {
- case REC_STATUS_INFIMUM:
- case REC_STATUS_SUPREMUM:
- /* the field is 8 bytes long */
- rec_offs_base(offsets)[0]
- = REC_N_NEW_EXTRA_BYTES | REC_OFFS_COMPACT;
- rec_offs_base(offsets)[1] = 8;
- return;
- case REC_STATUS_NODE_PTR:
- n_node_ptr_field
- = dict_index_get_n_unique_in_tree(index);
- break;
- case REC_STATUS_ORDINARY:
- break;
- }
-
- nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1);
- lens = nulls - UT_BITS_IN_BYTES(index->n_nullable);
- offs = 0;
- null_mask = 1;
-
- /* read the lengths of fields 0..n */
- do {
- ulint len;
- if (UNIV_UNLIKELY(i == n_node_ptr_field)) {
- len = offs += 4;
- goto resolved;
- }
-
- field = dict_index_get_nth_field(index, i);
- if (!(dict_field_get_col(field)->prtype
- & DATA_NOT_NULL)) {
- /* nullable field => read the null flag */
-
- if (UNIV_UNLIKELY(!(byte) null_mask)) {
- nulls--;
- null_mask = 1;
- }
-
- if (*nulls & null_mask) {
- null_mask <<= 1;
- /* No length is stored for NULL fields.
- We do not advance offs, and we set
- the length to zero and enable the
- SQL NULL flag in offsets[]. */
- len = offs | REC_OFFS_SQL_NULL;
- goto resolved;
- }
- null_mask <<= 1;
- }
-
- if (UNIV_UNLIKELY(!field->fixed_len)) {
- /* Variable-length field: read the length */
- const dict_col_t* col
- = dict_field_get_col(field);
- len = *lens--;
- if (UNIV_UNLIKELY(col->len > 255)
- || UNIV_UNLIKELY(col->mtype
- == DATA_BLOB)) {
- if (len & 0x80) {
- /* 1exxxxxxx xxxxxxxx */
- len <<= 8;
- len |= *lens--;
-
- offs += len & 0x3fff;
- if (UNIV_UNLIKELY(len
- & 0x4000)) {
- len = offs
- | REC_OFFS_EXTERNAL;
- } else {
- len = offs;
- }
-
- goto resolved;
- }
- }
-
- len = offs += len;
- } else {
- len = offs += field->fixed_len;
- }
-resolved:
- rec_offs_base(offsets)[i + 1] = len;
- } while (++i < rec_offs_n_fields(offsets));
-
- *rec_offs_base(offsets)
- = (rec - (lens + 1)) | REC_OFFS_COMPACT;
- } else {
- /* Old-style record: determine extra size and end offsets */
- offs = REC_N_OLD_EXTRA_BYTES;
- if (rec_get_1byte_offs_flag(rec)) {
- offs += rec_offs_n_fields(offsets);
- *rec_offs_base(offsets) = offs;
- /* Determine offsets to fields */
- do {
- offs = rec_1_get_field_end_info(rec, i);
- if (offs & REC_1BYTE_SQL_NULL_MASK) {
- offs &= ~REC_1BYTE_SQL_NULL_MASK;
- offs |= REC_OFFS_SQL_NULL;
- }
- rec_offs_base(offsets)[1 + i] = offs;
- } while (++i < rec_offs_n_fields(offsets));
- } else {
- offs += 2 * rec_offs_n_fields(offsets);
- *rec_offs_base(offsets) = offs;
- /* Determine offsets to fields */
- do {
- offs = rec_2_get_field_end_info(rec, i);
- if (offs & REC_2BYTE_SQL_NULL_MASK) {
- offs &= ~REC_2BYTE_SQL_NULL_MASK;
- offs |= REC_OFFS_SQL_NULL;
- }
- if (offs & REC_2BYTE_EXTERN_MASK) {
- offs &= ~REC_2BYTE_EXTERN_MASK;
- offs |= REC_OFFS_EXTERNAL;
- }
- rec_offs_base(offsets)[1 + i] = offs;
- } while (++i < rec_offs_n_fields(offsets));
- }
- }
-}
-
-/**********************************************************
-The following function determines the offsets to each field
-in the record. It can reuse a previously returned array. */
-
-ulint*
-rec_get_offsets_func(
-/*=================*/
- /* out: the new offsets */
- rec_t* rec, /* in: physical record */
- dict_index_t* index, /* in: record descriptor */
- ulint* offsets,/* in/out: array consisting of offsets[0]
- allocated elements, or an array from
- rec_get_offsets(), or NULL */
- ulint n_fields,/* in: maximum number of initialized fields
- (ULINT_UNDEFINED if all fields) */
- mem_heap_t** heap, /* in/out: memory heap */
- const char* file, /* in: file name where called */
- ulint line) /* in: line number where called */
-{
- ulint n;
- ulint size;
-
- ut_ad(rec);
- ut_ad(index);
- ut_ad(heap);
-
- if (dict_table_is_comp(index->table)) {
- switch (UNIV_EXPECT(rec_get_status(rec),
- REC_STATUS_ORDINARY)) {
- case REC_STATUS_ORDINARY:
- n = dict_index_get_n_fields(index);
- break;
- case REC_STATUS_NODE_PTR:
- n = dict_index_get_n_unique_in_tree(index) + 1;
- break;
- case REC_STATUS_INFIMUM:
- case REC_STATUS_SUPREMUM:
- /* infimum or supremum record */
- n = 1;
- break;
- default:
- ut_error;
- return(NULL);
- }
- } else {
- n = rec_get_n_fields_old(rec);
- }
-
- if (UNIV_UNLIKELY(n_fields < n)) {
- n = n_fields;
- }
-
- size = n + (1 + REC_OFFS_HEADER_SIZE);
-
- if (UNIV_UNLIKELY(!offsets)
- || UNIV_UNLIKELY(rec_offs_get_n_alloc(offsets) < size)) {
- if (!*heap) {
- *heap = mem_heap_create_func(size * sizeof(ulint),
- NULL, MEM_HEAP_DYNAMIC,
- file, line);
- }
- offsets = mem_heap_alloc(*heap, size * sizeof(ulint));
- rec_offs_set_n_alloc(offsets, size);
- }
-
- rec_offs_set_n_fields(offsets, n);
- rec_init_offsets(rec, index, offsets);
- return(offsets);
-}
-
-/****************************************************************
-The following function is used to get a pointer to the nth
-data field in an old-style record. */
-
-byte*
-rec_get_nth_field_old(
-/*==================*/
- /* out: pointer to the field */
- rec_t* rec, /* in: record */
- ulint n, /* in: index of the field */
- ulint* len) /* out: length of the field; UNIV_SQL_NULL if SQL
- null */
-{
- ulint os;
- ulint next_os;
-
- ut_ad(rec && len);
- ut_ad(n < rec_get_n_fields_old(rec));
-
- if (n > REC_MAX_N_FIELDS) {
- fprintf(stderr, "Error: trying to access field %lu in rec\n",
- (ulong) n);
- ut_error;
- }
-
- if (rec == NULL) {
- fputs("Error: rec is NULL pointer\n", stderr);
- ut_error;
- }
-
- if (rec_get_1byte_offs_flag(rec)) {
- os = rec_1_get_field_start_offs(rec, n);
-
- next_os = rec_1_get_field_end_info(rec, n);
-
- if (next_os & REC_1BYTE_SQL_NULL_MASK) {
- *len = UNIV_SQL_NULL;
-
- return(rec + os);
- }
-
- next_os = next_os & ~REC_1BYTE_SQL_NULL_MASK;
- } else {
- os = rec_2_get_field_start_offs(rec, n);
-
- next_os = rec_2_get_field_end_info(rec, n);
-
- if (next_os & REC_2BYTE_SQL_NULL_MASK) {
- *len = UNIV_SQL_NULL;
-
- return(rec + os);
- }
-
- next_os = next_os & ~(REC_2BYTE_SQL_NULL_MASK
- | REC_2BYTE_EXTERN_MASK);
- }
-
- *len = next_os - os;
-
- ut_ad(*len < UNIV_PAGE_SIZE);
-
- return(rec + os);
-}
-
-/**************************************************************
-The following function returns the size of a data tuple when converted to
-a new-style physical record. */
-
-ulint
-rec_get_converted_size_new(
-/*=======================*/
- /* out: size */
- dict_index_t* index, /* in: record descriptor */
- dtuple_t* dtuple) /* in: data tuple */
-{
- ulint size = REC_N_NEW_EXTRA_BYTES
- + UT_BITS_IN_BYTES(index->n_nullable);
- ulint i;
- ulint n_fields;
- ut_ad(index && dtuple);
- ut_ad(dict_table_is_comp(index->table));
-
- switch (dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK) {
- case REC_STATUS_ORDINARY:
- n_fields = dict_index_get_n_fields(index);
- ut_ad(n_fields == dtuple_get_n_fields(dtuple));
- break;
- case REC_STATUS_NODE_PTR:
- n_fields = dict_index_get_n_unique_in_tree(index);
- ut_ad(n_fields + 1 == dtuple_get_n_fields(dtuple));
- ut_ad(dtuple_get_nth_field(dtuple, n_fields)->len == 4);
- size += 4; /* child page number */
- break;
- case REC_STATUS_INFIMUM:
- case REC_STATUS_SUPREMUM:
- /* infimum or supremum record, 8 data bytes */
- return(REC_N_NEW_EXTRA_BYTES + 8);
- default:
- ut_error;
- return(ULINT_UNDEFINED);
- }
-
- /* read the lengths of fields 0..n */
- for (i = 0; i < n_fields; i++) {
- dict_field_t* field;
- ulint len;
- const dict_col_t* col;
-
- field = dict_index_get_nth_field(index, i);
- len = dtuple_get_nth_field(dtuple, i)->len;
- col = dict_field_get_col(field);
-
- ut_ad(dict_col_type_assert_equal(
- col, dfield_get_type(dtuple_get_nth_field(
- dtuple, i))));
-
- if (len == UNIV_SQL_NULL) {
- /* No length is stored for NULL fields. */
- ut_ad(!(col->prtype & DATA_NOT_NULL));
- continue;
- }
-
- ut_ad(len <= col->len || col->mtype == DATA_BLOB);
-
- if (field->fixed_len) {
- ut_ad(len == field->fixed_len);
- /* dict_index_add_col() should guarantee this */
- ut_ad(!field->prefix_len
- || field->fixed_len == field->prefix_len);
- } else if (len < 128
- || (col->len < 256 && col->mtype != DATA_BLOB)) {
- size++;
- } else {
- /* For variable-length columns, we look up the
- maximum length from the column itself. If this
- is a prefix index column shorter than 256 bytes,
- this will waste one byte. */
- size += 2;
- }
- size += len;
- }
-
- return(size);
-}
-
-/***************************************************************
-Sets the value of the ith field SQL null bit of an old-style record. */
-
-void
-rec_set_nth_field_null_bit(
-/*=======================*/
- rec_t* rec, /* in: record */
- ulint i, /* in: ith field */
- ibool val) /* in: value to set */
-{
- ulint info;
-
- if (rec_get_1byte_offs_flag(rec)) {
-
- info = rec_1_get_field_end_info(rec, i);
-
- if (val) {
- info = info | REC_1BYTE_SQL_NULL_MASK;
- } else {
- info = info & ~REC_1BYTE_SQL_NULL_MASK;
- }
-
- rec_1_set_field_end_info(rec, i, info);
-
- return;
- }
-
- info = rec_2_get_field_end_info(rec, i);
-
- if (val) {
- info = info | REC_2BYTE_SQL_NULL_MASK;
- } else {
- info = info & ~REC_2BYTE_SQL_NULL_MASK;
- }
-
- rec_2_set_field_end_info(rec, i, info);
-}
-
-/***************************************************************
-Sets the value of the ith field extern storage bit of an old-style record. */
-
-void
-rec_set_nth_field_extern_bit_old(
-/*=============================*/
- rec_t* rec, /* in: old-style record */
- ulint i, /* in: ith field */
- ibool val, /* in: value to set */
- mtr_t* mtr) /* in: mtr holding an X-latch to the page where
- rec is, or NULL; in the NULL case we do not
- write to log about the change */
-{
- ulint info;
-
- ut_a(!rec_get_1byte_offs_flag(rec));
- ut_a(i < rec_get_n_fields_old(rec));
-
- info = rec_2_get_field_end_info(rec, i);
-
- if (val) {
- info = info | REC_2BYTE_EXTERN_MASK;
- } else {
- info = info & ~REC_2BYTE_EXTERN_MASK;
- }
-
- if (mtr) {
- mlog_write_ulint(rec - REC_N_OLD_EXTRA_BYTES - 2 * (i + 1),
- info, MLOG_2BYTES, mtr);
- } else {
- rec_2_set_field_end_info(rec, i, info);
- }
-}
-
-/***************************************************************
-Sets the value of the ith field extern storage bit of a new-style record. */
-
-void
-rec_set_nth_field_extern_bit_new(
-/*=============================*/
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: record descriptor */
- ulint ith, /* in: ith field */
- ibool val, /* in: value to set */
- mtr_t* mtr) /* in: mtr holding an X-latch to the page
- where rec is, or NULL; in the NULL case
- we do not write to log about the change */
-{
- byte* nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1);
- byte* lens = nulls - UT_BITS_IN_BYTES(index->n_nullable);
- ulint i;
- ulint n_fields;
- ulint null_mask = 1;
- ut_ad(rec && index);
- ut_ad(dict_table_is_comp(index->table));
- ut_ad(rec_get_status(rec) == REC_STATUS_ORDINARY);
-
- n_fields = dict_index_get_n_fields(index);
-
- ut_ad(ith < n_fields);
-
- /* read the lengths of fields 0..n */
- for (i = 0; i < n_fields; i++) {
- const dict_field_t* field;
- const dict_col_t* col;
-
- field = dict_index_get_nth_field(index, i);
- col = dict_field_get_col(field);
-
- if (!(col->prtype & DATA_NOT_NULL)) {
- if (UNIV_UNLIKELY(!(byte) null_mask)) {
- nulls--;
- null_mask = 1;
- }
-
- if (*nulls & null_mask) {
- null_mask <<= 1;
- /* NULL fields cannot be external. */
- ut_ad(i != ith);
- continue;
- }
-
- null_mask <<= 1;
- }
- if (field->fixed_len) {
- /* fixed-length fields cannot be external
- (Fixed-length fields longer than
- DICT_MAX_INDEX_COL_LEN will be treated as
- variable-length ones in dict_index_add_col().) */
- ut_ad(i != ith);
- continue;
- }
- lens--;
- if (col->len > 255 || col->mtype == DATA_BLOB) {
- ulint len = lens[1];
- if (len & 0x80) { /* 1exxxxxx: 2-byte length */
- if (i == ith) {
- if (!val == !(len & 0x40)) {
- return; /* no change */
- }
- /* toggle the extern bit */
- len ^= 0x40;
- if (mtr) {
- mlog_write_ulint(lens + 1,
- len,
- MLOG_1BYTE,
- mtr);
- } else {
- lens[1] = (byte) len;
- }
- return;
- }
- lens--;
- } else {
- /* short fields cannot be external */
- ut_ad(i != ith);
- }
- } else {
- /* short fields cannot be external */
- ut_ad(i != ith);
- }
- }
-}
-
-/***************************************************************
-Sets TRUE the extern storage bits of fields mentioned in an array. */
-
-void
-rec_set_field_extern_bits(
-/*======================*/
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: record descriptor */
- const ulint* vec, /* in: array of field numbers */
- ulint n_fields,/* in: number of fields numbers */
- mtr_t* mtr) /* in: mtr holding an X-latch to the
- page where rec is, or NULL;
- in the NULL case we do not write
- to log about the change */
-{
- ulint i;
-
- if (dict_table_is_comp(index->table)) {
- for (i = 0; i < n_fields; i++) {
- rec_set_nth_field_extern_bit_new(rec, index, vec[i],
- TRUE, mtr);
- }
- } else {
- for (i = 0; i < n_fields; i++) {
- rec_set_nth_field_extern_bit_old(rec, vec[i],
- TRUE, mtr);
- }
- }
-}
-
-/***************************************************************
-Sets an old-style record field to SQL null.
-The physical size of the field is not changed. */
-
-void
-rec_set_nth_field_sql_null(
-/*=======================*/
- rec_t* rec, /* in: record */
- ulint n) /* in: index of the field */
-{
- ulint offset;
-
- offset = rec_get_field_start_offs(rec, n);
-
- data_write_sql_null(rec + offset, rec_get_nth_field_size(rec, n));
-
- rec_set_nth_field_null_bit(rec, n, TRUE);
-}
-
-/*************************************************************
-Builds an old-style physical record out of a data tuple and
-stores it beginning from the start of the given buffer. */
-static
-rec_t*
-rec_convert_dtuple_to_rec_old(
-/*==========================*/
- /* out: pointer to the origin of
- physical record */
- byte* buf, /* in: start address of the physical record */
- dtuple_t* dtuple)/* in: data tuple */
-{
- dfield_t* field;
- ulint n_fields;
- ulint data_size;
- rec_t* rec;
- ulint end_offset;
- ulint ored_offset;
- byte* data;
- ulint len;
- ulint i;
-
- ut_ad(buf && dtuple);
- ut_ad(dtuple_validate(dtuple));
- ut_ad(dtuple_check_typed(dtuple));
-
- n_fields = dtuple_get_n_fields(dtuple);
- data_size = dtuple_get_data_size(dtuple);
-
- ut_ad(n_fields > 0);
-
- /* Calculate the offset of the origin in the physical record */
-
- rec = buf + rec_get_converted_extra_size(data_size, n_fields);
-#ifdef UNIV_DEBUG
- /* Suppress Valgrind warnings of ut_ad()
- in mach_write_to_1(), mach_write_to_2() et al. */
- memset(buf, 0xff, rec - buf + data_size);
-#endif /* UNIV_DEBUG */
- /* Store the number of fields */
- rec_set_n_fields_old(rec, n_fields);
-
- /* Set the info bits of the record */
- rec_set_info_bits(rec, FALSE,
- dtuple_get_info_bits(dtuple) & REC_INFO_BITS_MASK);
-
- /* Store the data and the offsets */
-
- end_offset = 0;
-
- if (data_size <= REC_1BYTE_OFFS_LIMIT) {
-
- rec_set_1byte_offs_flag(rec, TRUE);
-
- for (i = 0; i < n_fields; i++) {
-
- field = dtuple_get_nth_field(dtuple, i);
-
- data = dfield_get_data(field);
- len = dfield_get_len(field);
-
- if (len == UNIV_SQL_NULL) {
- len = dtype_get_sql_null_size(
- dfield_get_type(field));
- data_write_sql_null(rec + end_offset, len);
-
- end_offset += len;
- ored_offset = end_offset
- | REC_1BYTE_SQL_NULL_MASK;
- } else {
- /* If the data is not SQL null, store it */
- ut_memcpy(rec + end_offset, data, len);
-
- end_offset += len;
- ored_offset = end_offset;
- }
-
- rec_1_set_field_end_info(rec, i, ored_offset);
- }
- } else {
- rec_set_1byte_offs_flag(rec, FALSE);
-
- for (i = 0; i < n_fields; i++) {
-
- field = dtuple_get_nth_field(dtuple, i);
-
- data = dfield_get_data(field);
- len = dfield_get_len(field);
-
- if (len == UNIV_SQL_NULL) {
- len = dtype_get_sql_null_size(
- dfield_get_type(field));
- data_write_sql_null(rec + end_offset, len);
-
- end_offset += len;
- ored_offset = end_offset
- | REC_2BYTE_SQL_NULL_MASK;
- } else {
- /* If the data is not SQL null, store it */
- ut_memcpy(rec + end_offset, data, len);
-
- end_offset += len;
- ored_offset = end_offset;
- }
-
- rec_2_set_field_end_info(rec, i, ored_offset);
- }
- }
-
- return(rec);
-}
-
-/*************************************************************
-Builds a new-style physical record out of a data tuple and
-stores it beginning from the start of the given buffer. */
-static
-rec_t*
-rec_convert_dtuple_to_rec_new(
-/*==========================*/
- /* out: pointer to the origin
- of physical record */
- byte* buf, /* in: start address of the physical record */
- dict_index_t* index, /* in: record descriptor */
- dtuple_t* dtuple) /* in: data tuple */
-{
- dfield_t* field;
- dtype_t* type;
- rec_t* rec = buf + REC_N_NEW_EXTRA_BYTES;
- byte* end;
- byte* nulls;
- byte* lens;
- ulint len;
- ulint i;
- ulint n_node_ptr_field;
- ulint fixed_len;
- ulint null_mask = 1;
- const ulint n_fields = dtuple_get_n_fields(dtuple);
- const ulint status = dtuple_get_info_bits(dtuple)
- & REC_NEW_STATUS_MASK;
- ut_ad(dict_table_is_comp(index->table));
- ut_ad(n_fields > 0);
-
- /* Try to ensure that the memset() between the for() loops
- completes fast. The address is not exact, but UNIV_PREFETCH
- should never generate a memory fault. */
- UNIV_PREFETCH_RW(rec - REC_N_NEW_EXTRA_BYTES - n_fields);
- UNIV_PREFETCH_RW(rec);
-
- switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) {
- case REC_STATUS_ORDINARY:
- ut_ad(n_fields <= dict_index_get_n_fields(index));
- n_node_ptr_field = ULINT_UNDEFINED;
- break;
- case REC_STATUS_NODE_PTR:
- ut_ad(n_fields == dict_index_get_n_unique_in_tree(index) + 1);
- n_node_ptr_field = n_fields - 1;
- break;
- case REC_STATUS_INFIMUM:
- case REC_STATUS_SUPREMUM:
- ut_ad(n_fields == 1);
- n_node_ptr_field = ULINT_UNDEFINED;
- goto init;
- default:
- ut_a(0);
- return(0);
- }
-
- /* Calculate the offset of the origin in the physical record.
- We must loop over all fields to do this. */
- rec += UT_BITS_IN_BYTES(index->n_nullable);
-
- for (i = 0; i < n_fields; i++) {
- if (UNIV_UNLIKELY(i == n_node_ptr_field)) {
-#ifdef UNIV_DEBUG
- field = dtuple_get_nth_field(dtuple, i);
- type = dfield_get_type(field);
- ut_ad(dtype_get_prtype(type) & DATA_NOT_NULL);
- ut_ad(dfield_get_len(field) == 4);
-#endif /* UNIV_DEBUG */
- goto init;
- }
- field = dtuple_get_nth_field(dtuple, i);
- type = dfield_get_type(field);
- len = dfield_get_len(field);
- fixed_len = dict_index_get_nth_field(index, i)->fixed_len;
-
- ut_ad(dict_col_type_assert_equal(
- dict_field_get_col(dict_index_get_nth_field(
- index, i)),
- dfield_get_type(field)));
-
- if (!(dtype_get_prtype(type) & DATA_NOT_NULL)) {
- if (len == UNIV_SQL_NULL)
- continue;
- }
- /* only nullable fields can be null */
- ut_ad(len != UNIV_SQL_NULL);
- if (fixed_len) {
- ut_ad(len == fixed_len);
- } else {
- ut_ad(len <= dtype_get_len(type)
- || dtype_get_mtype(type) == DATA_BLOB);
- rec++;
- if (len >= 128
- && (dtype_get_len(type) >= 256
- || dtype_get_mtype(type) == DATA_BLOB)) {
- rec++;
- }
- }
- }
-
-init:
- end = rec;
- nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1);
- lens = nulls - UT_BITS_IN_BYTES(index->n_nullable);
- /* clear the SQL-null flags */
- memset (lens + 1, 0, nulls - lens);
-
- /* Set the info bits of the record */
- rec_set_status(rec, status);
-
- rec_set_info_bits(rec, TRUE,
- dtuple_get_info_bits(dtuple) & REC_INFO_BITS_MASK);
-
- /* Store the data and the offsets */
-
- for (i = 0; i < n_fields; i++) {
- field = dtuple_get_nth_field(dtuple, i);
- type = dfield_get_type(field);
- len = dfield_get_len(field);
-
- if (UNIV_UNLIKELY(i == n_node_ptr_field)) {
- ut_ad(dtype_get_prtype(type) & DATA_NOT_NULL);
- ut_ad(len == 4);
- memcpy(end, dfield_get_data(field), len);
- break;
- }
- fixed_len = dict_index_get_nth_field(index, i)->fixed_len;
-
- if (!(dtype_get_prtype(type) & DATA_NOT_NULL)) {
- /* nullable field */
- ut_ad(index->n_nullable > 0);
-
- if (UNIV_UNLIKELY(!(byte) null_mask)) {
- nulls--;
- null_mask = 1;
- }
-
- ut_ad(*nulls < null_mask);
-
- /* set the null flag if necessary */
- if (len == UNIV_SQL_NULL) {
- *nulls |= null_mask;
- null_mask <<= 1;
- continue;
- }
-
- null_mask <<= 1;
- }
- /* only nullable fields can be null */
- ut_ad(len != UNIV_SQL_NULL);
- if (fixed_len) {
- ut_ad(len == fixed_len);
- } else {
- ut_ad(len <= dtype_get_len(type)
- || dtype_get_mtype(type) == DATA_BLOB);
- if (len < 128
- || (dtype_get_len(type) < 256
- && dtype_get_mtype(type) != DATA_BLOB)) {
-
- *lens-- = (byte) len;
- } else {
- /* the extern bits will be set later */
- ut_ad(len < 16384);
- *lens-- = (byte) (len >> 8) | 0x80;
- *lens-- = (byte) len;
- }
- }
-
- memcpy(end, dfield_get_data(field), len);
- end += len;
- }
-
- return(rec);
-}
-
-/*************************************************************
-Builds a physical record out of a data tuple and
-stores it beginning from the start of the given buffer. */
-
-rec_t*
-rec_convert_dtuple_to_rec(
-/*======================*/
- /* out: pointer to the origin
- of physical record */
- byte* buf, /* in: start address of the
- physical record */
- dict_index_t* index, /* in: record descriptor */
- dtuple_t* dtuple) /* in: data tuple */
-{
- rec_t* rec;
-
- ut_ad(buf && index && dtuple);
- ut_ad(dtuple_validate(dtuple));
- ut_ad(dtuple_check_typed(dtuple));
-
- if (dict_table_is_comp(index->table)) {
- rec = rec_convert_dtuple_to_rec_new(buf, index, dtuple);
- } else {
- rec = rec_convert_dtuple_to_rec_old(buf, dtuple);
- }
-
-#ifdef UNIV_DEBUG
- {
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- const ulint* offsets;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- offsets = rec_get_offsets(rec, index,
- offsets_, ULINT_UNDEFINED, &heap);
- ut_ad(rec_validate(rec, offsets));
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- }
-#endif /* UNIV_DEBUG */
- return(rec);
-}
-
-/******************************************************************
-Copies the first n fields of a physical record to a data tuple. The fields
-are copied to the memory heap. */
-
-void
-rec_copy_prefix_to_dtuple(
-/*======================*/
- dtuple_t* tuple, /* in: data tuple */
- rec_t* rec, /* in: physical record */
- dict_index_t* index, /* in: record descriptor */
- ulint n_fields, /* in: number of fields to copy */
- mem_heap_t* heap) /* in: memory heap */
-{
- dfield_t* field;
- byte* data;
- ulint len;
- byte* buf = NULL;
- ulint i;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- offsets = rec_get_offsets(rec, index, offsets, n_fields, &heap);
-
- ut_ad(rec_validate(rec, offsets));
- ut_ad(dtuple_check_typed(tuple));
-
- dtuple_set_info_bits(tuple, rec_get_info_bits(
- rec, dict_table_is_comp(index->table)));
-
- for (i = 0; i < n_fields; i++) {
-
- field = dtuple_get_nth_field(tuple, i);
- data = rec_get_nth_field(rec, offsets, i, &len);
-
- if (len != UNIV_SQL_NULL) {
- buf = mem_heap_alloc(heap, len);
-
- ut_memcpy(buf, data, len);
- }
-
- dfield_set_data(field, buf, len);
- }
-}
-
-/******************************************************************
-Copies the first n fields of an old-style physical record
-to a new physical record in a buffer. */
-static
-rec_t*
-rec_copy_prefix_to_buf_old(
-/*=======================*/
- /* out, own: copied record */
- rec_t* rec, /* in: physical record */
- ulint n_fields, /* in: number of fields to copy */
- ulint area_end, /* in: end of the prefix data */
- byte** buf, /* in/out: memory buffer for the copied prefix,
- or NULL */
- ulint* buf_size) /* in/out: buffer size */
-{
- rec_t* copy_rec;
- ulint area_start;
- ulint prefix_len;
-
- if (rec_get_1byte_offs_flag(rec)) {
- area_start = REC_N_OLD_EXTRA_BYTES + n_fields;
- } else {
- area_start = REC_N_OLD_EXTRA_BYTES + 2 * n_fields;
- }
-
- prefix_len = area_start + area_end;
-
- if ((*buf == NULL) || (*buf_size < prefix_len)) {
- if (*buf != NULL) {
- mem_free(*buf);
- }
-
- *buf = mem_alloc(prefix_len);
- *buf_size = prefix_len;
- }
-
- ut_memcpy(*buf, rec - area_start, prefix_len);
-
- copy_rec = *buf + area_start;
-
- rec_set_n_fields_old(copy_rec, n_fields);
-
- return(copy_rec);
-}
-
-/******************************************************************
-Copies the first n fields of a physical record to a new physical record in
-a buffer. */
-
-rec_t*
-rec_copy_prefix_to_buf(
-/*===================*/
- /* out, own: copied record */
- rec_t* rec, /* in: physical record */
- dict_index_t* index, /* in: record descriptor */
- ulint n_fields, /* in: number of fields to copy */
- byte** buf, /* in/out: memory buffer
- for the copied prefix, or NULL */
- ulint* buf_size) /* in/out: buffer size */
-{
- byte* nulls;
- byte* lens;
- ulint i;
- ulint prefix_len;
- ulint null_mask;
- ulint status;
-
- UNIV_PREFETCH_RW(*buf);
-
- if (!dict_table_is_comp(index->table)) {
- ut_ad(rec_validate_old(rec));
- return(rec_copy_prefix_to_buf_old(
- rec, n_fields,
- rec_get_field_start_offs(rec, n_fields),
- buf, buf_size));
- }
-
- status = rec_get_status(rec);
-
- switch (status) {
- case REC_STATUS_ORDINARY:
- ut_ad(n_fields <= dict_index_get_n_fields(index));
- break;
- case REC_STATUS_NODE_PTR:
- /* it doesn't make sense to copy the child page number field */
- ut_ad(n_fields <= dict_index_get_n_unique_in_tree(index));
- break;
- case REC_STATUS_INFIMUM:
- case REC_STATUS_SUPREMUM:
- /* infimum or supremum record: no sense to copy anything */
- default:
- ut_error;
- return(NULL);
- }
-
- nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1);
- lens = nulls - UT_BITS_IN_BYTES(index->n_nullable);
- UNIV_PREFETCH_R(lens);
- prefix_len = 0;
- null_mask = 1;
-
- /* read the lengths of fields 0..n */
- for (i = 0; i < n_fields; i++) {
- const dict_field_t* field;
- const dict_col_t* col;
-
- field = dict_index_get_nth_field(index, i);
- col = dict_field_get_col(field);
-
- if (!(col->prtype & DATA_NOT_NULL)) {
- /* nullable field => read the null flag */
- if (UNIV_UNLIKELY(!(byte) null_mask)) {
- nulls--;
- null_mask = 1;
- }
-
- if (*nulls & null_mask) {
- null_mask <<= 1;
- continue;
- }
-
- null_mask <<= 1;
- }
-
- if (field->fixed_len) {
- prefix_len += field->fixed_len;
- } else {
- ulint len = *lens--;
- if (col->len > 255 || col->mtype == DATA_BLOB) {
- if (len & 0x80) {
- /* 1exxxxxx */
- len &= 0x3f;
- len <<= 8;
- len |= *lens--;
- UNIV_PREFETCH_R(lens);
- }
- }
- prefix_len += len;
- }
- }
-
- UNIV_PREFETCH_R(rec + prefix_len);
-
- prefix_len += rec - (lens + 1);
-
- if ((*buf == NULL) || (*buf_size < prefix_len)) {
- if (*buf != NULL) {
- mem_free(*buf);
- }
-
- *buf = mem_alloc(prefix_len);
- *buf_size = prefix_len;
- }
-
- memcpy(*buf, lens + 1, prefix_len);
-
- return(*buf + (rec - (lens + 1)));
-}
-
-/*******************************************************************
-Validates the consistency of an old-style physical record. */
-static
-ibool
-rec_validate_old(
-/*=============*/
- /* out: TRUE if ok */
- rec_t* rec) /* in: physical record */
-{
- byte* data;
- ulint len;
- ulint n_fields;
- ulint len_sum = 0;
- ulint sum = 0;
- ulint i;
-
- ut_a(rec);
- n_fields = rec_get_n_fields_old(rec);
-
- if ((n_fields == 0) || (n_fields > REC_MAX_N_FIELDS)) {
- fprintf(stderr, "InnoDB: Error: record has %lu fields\n",
- (ulong) n_fields);
- return(FALSE);
- }
-
- for (i = 0; i < n_fields; i++) {
- data = rec_get_nth_field_old(rec, i, &len);
-
- if (!((len < UNIV_PAGE_SIZE) || (len == UNIV_SQL_NULL))) {
- fprintf(stderr,
- "InnoDB: Error: record field %lu len %lu\n",
- (ulong) i,
- (ulong) len);
- return(FALSE);
- }
-
- if (len != UNIV_SQL_NULL) {
- len_sum += len;
- sum += *(data + len -1); /* dereference the
- end of the field to
- cause a memory trap
- if possible */
- } else {
- len_sum += rec_get_nth_field_size(rec, i);
- }
- }
-
- if (len_sum != rec_get_data_size_old(rec)) {
- fprintf(stderr,
- "InnoDB: Error: record len should be %lu, len %lu\n",
- (ulong) len_sum,
- rec_get_data_size_old(rec));
- return(FALSE);
- }
-
- rec_dummy = sum; /* This is here only to fool the compiler */
-
- return(TRUE);
-}
-
-/*******************************************************************
-Validates the consistency of a physical record. */
-
-ibool
-rec_validate(
-/*=========*/
- /* out: TRUE if ok */
- rec_t* rec, /* in: physical record */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
-{
- const byte* data;
- ulint len;
- ulint n_fields;
- ulint len_sum = 0;
- ulint sum = 0;
- ulint i;
-
- ut_a(rec);
- n_fields = rec_offs_n_fields(offsets);
-
- if ((n_fields == 0) || (n_fields > REC_MAX_N_FIELDS)) {
- fprintf(stderr, "InnoDB: Error: record has %lu fields\n",
- (ulong) n_fields);
- return(FALSE);
- }
-
- ut_a(rec_offs_comp(offsets) || n_fields <= rec_get_n_fields_old(rec));
-
- for (i = 0; i < n_fields; i++) {
- data = rec_get_nth_field(rec, offsets, i, &len);
-
- if (!((len < UNIV_PAGE_SIZE) || (len == UNIV_SQL_NULL))) {
- fprintf(stderr,
- "InnoDB: Error: record field %lu len %lu\n",
- (ulong) i,
- (ulong) len);
- return(FALSE);
- }
-
- if (len != UNIV_SQL_NULL) {
- len_sum += len;
- sum += *(data + len -1); /* dereference the
- end of the field to
- cause a memory trap
- if possible */
- } else if (!rec_offs_comp(offsets)) {
- len_sum += rec_get_nth_field_size(rec, i);
- }
- }
-
- if (len_sum != (ulint)(rec_get_end(rec, offsets) - rec)) {
- fprintf(stderr,
- "InnoDB: Error: record len should be %lu, len %lu\n",
- (ulong) len_sum,
- (ulong) (rec_get_end(rec, offsets) - rec));
- return(FALSE);
- }
-
- rec_dummy = sum; /* This is here only to fool the compiler */
-
- if (!rec_offs_comp(offsets)) {
- ut_a(rec_validate_old(rec));
- }
-
- return(TRUE);
-}
-
-/*******************************************************************
-Prints an old-style physical record. */
-
-void
-rec_print_old(
-/*==========*/
- FILE* file, /* in: file where to print */
- rec_t* rec) /* in: physical record */
-{
- const byte* data;
- ulint len;
- ulint n;
- ulint i;
-
- ut_ad(rec);
-
- n = rec_get_n_fields_old(rec);
-
- fprintf(file, "PHYSICAL RECORD: n_fields %lu;"
- " %u-byte offsets; info bits %lu\n",
- (ulong) n,
- rec_get_1byte_offs_flag(rec) ? 1 : 2,
- (ulong) rec_get_info_bits(rec, FALSE));
-
- for (i = 0; i < n; i++) {
-
- data = rec_get_nth_field_old(rec, i, &len);
-
- fprintf(file, " %lu:", (ulong) i);
-
- if (len != UNIV_SQL_NULL) {
- if (len <= 30) {
-
- ut_print_buf(file, data, len);
- } else {
- ut_print_buf(file, data, 30);
-
- fputs("...(truncated)", file);
- }
- } else {
- fprintf(file, " SQL NULL, size %lu ",
- rec_get_nth_field_size(rec, i));
- }
- putc(';', file);
- }
-
- putc('\n', file);
-
- rec_validate_old(rec);
-}
-
-/*******************************************************************
-Prints a physical record. */
-
-void
-rec_print_new(
-/*==========*/
- FILE* file, /* in: file where to print */
- rec_t* rec, /* in: physical record */
- const ulint* offsets)/* in: array returned by rec_get_offsets() */
-{
- const byte* data;
- ulint len;
- ulint i;
-
- ut_ad(rec_offs_validate(rec, NULL, offsets));
-
- if (!rec_offs_comp(offsets)) {
- rec_print_old(file, rec);
- return;
- }
-
- ut_ad(rec);
-
- fprintf(file, "PHYSICAL RECORD: n_fields %lu;"
- " compact format; info bits %lu\n",
- (ulong) rec_offs_n_fields(offsets),
- (ulong) rec_get_info_bits(rec, TRUE));
-
- for (i = 0; i < rec_offs_n_fields(offsets); i++) {
-
- data = rec_get_nth_field(rec, offsets, i, &len);
-
- fprintf(file, " %lu:", (ulong) i);
-
- if (len != UNIV_SQL_NULL) {
- if (len <= 30) {
-
- ut_print_buf(file, data, len);
- } else {
- ut_print_buf(file, data, 30);
-
- fputs("...(truncated)", file);
- }
- } else {
- fputs(" SQL NULL", file);
- }
- putc(';', file);
- }
-
- putc('\n', file);
-
- rec_validate(rec, offsets);
-}
-
-/*******************************************************************
-Prints a physical record. */
-
-void
-rec_print(
-/*======*/
- FILE* file, /* in: file where to print */
- rec_t* rec, /* in: physical record */
- dict_index_t* index) /* in: record descriptor */
-{
- ut_ad(index);
-
- if (!dict_table_is_comp(index->table)) {
- rec_print_old(file, rec);
- return;
- } else {
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- rec_print_new(file, rec,
- rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap));
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- }
-}
diff --git a/storage/innobase/row/row0ins.c b/storage/innobase/row/row0ins.c
deleted file mode 100644
index ad14b927170..00000000000
--- a/storage/innobase/row/row0ins.c
+++ /dev/null
@@ -1,2522 +0,0 @@
-/******************************************************
-Insert into a table
-
-(c) 1996 Innobase Oy
-
-Created 4/20/1996 Heikki Tuuri
-*******************************************************/
-
-#include "row0ins.h"
-
-#ifdef UNIV_NONINL
-#include "row0ins.ic"
-#endif
-
-#include "dict0dict.h"
-#include "dict0boot.h"
-#include "trx0undo.h"
-#include "btr0btr.h"
-#include "btr0cur.h"
-#include "mach0data.h"
-#include "que0que.h"
-#include "row0upd.h"
-#include "row0sel.h"
-#include "row0row.h"
-#include "rem0cmp.h"
-#include "lock0lock.h"
-#include "log0log.h"
-#include "eval0eval.h"
-#include "data0data.h"
-#include "usr0sess.h"
-#include "buf0lru.h"
-
-#define ROW_INS_PREV 1
-#define ROW_INS_NEXT 2
-
-
-/*********************************************************************
-This prototype is copied from /mysql/sql/ha_innodb.cc.
-Invalidates the MySQL query cache for the table.
-NOTE that the exact prototype of this function has to be in
-/innobase/row/row0ins.c! */
-extern
-void
-innobase_invalidate_query_cache(
-/*============================*/
- trx_t* trx, /* in: transaction which modifies the table */
- char* full_name, /* in: concatenation of database name, null
- char '\0', table name, null char'\0';
- NOTE that in Windows this is always
- in LOWER CASE! */
- ulint full_name_len); /* in: full name length where also the null
- chars count */
-
-/*************************************************************************
-Creates an insert node struct. */
-
-ins_node_t*
-ins_node_create(
-/*============*/
- /* out, own: insert node struct */
- ulint ins_type, /* in: INS_VALUES, ... */
- dict_table_t* table, /* in: table where to insert */
- mem_heap_t* heap) /* in: mem heap where created */
-{
- ins_node_t* node;
-
- node = mem_heap_alloc(heap, sizeof(ins_node_t));
-
- node->common.type = QUE_NODE_INSERT;
-
- node->ins_type = ins_type;
-
- node->state = INS_NODE_SET_IX_LOCK;
- node->table = table;
- node->index = NULL;
- node->entry = NULL;
-
- node->select = NULL;
-
- node->trx_id = ut_dulint_zero;
-
- node->entry_sys_heap = mem_heap_create(128);
-
- node->magic_n = INS_NODE_MAGIC_N;
-
- return(node);
-}
-
-/***************************************************************
-Creates an entry template for each index of a table. */
-static
-void
-ins_node_create_entry_list(
-/*=======================*/
- ins_node_t* node) /* in: row insert node */
-{
- dict_index_t* index;
- dtuple_t* entry;
-
- ut_ad(node->entry_sys_heap);
-
- UT_LIST_INIT(node->entry_list);
-
- index = dict_table_get_first_index(node->table);
-
- while (index != NULL) {
- entry = row_build_index_entry(node->row, index,
- node->entry_sys_heap);
- UT_LIST_ADD_LAST(tuple_list, node->entry_list, entry);
-
- index = dict_table_get_next_index(index);
- }
-}
-
-/*********************************************************************
-Adds system field buffers to a row. */
-static
-void
-row_ins_alloc_sys_fields(
-/*=====================*/
- ins_node_t* node) /* in: insert node */
-{
- dtuple_t* row;
- dict_table_t* table;
- mem_heap_t* heap;
- const dict_col_t* col;
- dfield_t* dfield;
- byte* ptr;
-
- row = node->row;
- table = node->table;
- heap = node->entry_sys_heap;
-
- ut_ad(row && table && heap);
- ut_ad(dtuple_get_n_fields(row) == dict_table_get_n_cols(table));
-
- /* 1. Allocate buffer for row id */
-
- col = dict_table_get_sys_col(table, DATA_ROW_ID);
-
- dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
-
- ptr = mem_heap_alloc(heap, DATA_ROW_ID_LEN);
-
- dfield_set_data(dfield, ptr, DATA_ROW_ID_LEN);
-
- node->row_id_buf = ptr;
-
- /* 3. Allocate buffer for trx id */
-
- col = dict_table_get_sys_col(table, DATA_TRX_ID);
-
- dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
- ptr = mem_heap_alloc(heap, DATA_TRX_ID_LEN);
-
- dfield_set_data(dfield, ptr, DATA_TRX_ID_LEN);
-
- node->trx_id_buf = ptr;
-
- /* 4. Allocate buffer for roll ptr */
-
- col = dict_table_get_sys_col(table, DATA_ROLL_PTR);
-
- dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
- ptr = mem_heap_alloc(heap, DATA_ROLL_PTR_LEN);
-
- dfield_set_data(dfield, ptr, DATA_ROLL_PTR_LEN);
-}
-
-/*************************************************************************
-Sets a new row to insert for an INS_DIRECT node. This function is only used
-if we have constructed the row separately, which is a rare case; this
-function is quite slow. */
-
-void
-ins_node_set_new_row(
-/*=================*/
- ins_node_t* node, /* in: insert node */
- dtuple_t* row) /* in: new row (or first row) for the node */
-{
- node->state = INS_NODE_SET_IX_LOCK;
- node->index = NULL;
- node->entry = NULL;
-
- node->row = row;
-
- mem_heap_empty(node->entry_sys_heap);
-
- /* Create templates for index entries */
-
- ins_node_create_entry_list(node);
-
- /* Allocate from entry_sys_heap buffers for sys fields */
-
- row_ins_alloc_sys_fields(node);
-
- /* As we allocated a new trx id buf, the trx id should be written
- there again: */
-
- node->trx_id = ut_dulint_zero;
-}
-
-/***********************************************************************
-Does an insert operation by updating a delete-marked existing record
-in the index. This situation can occur if the delete-marked record is
-kept in the index for consistent reads. */
-static
-ulint
-row_ins_sec_index_entry_by_modify(
-/*==============================*/
- /* out: DB_SUCCESS or error code */
- ulint mode, /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
- depending on whether mtr holds just a leaf
- latch or also a tree latch */
- btr_cur_t* cursor, /* in: B-tree cursor */
- dtuple_t* entry, /* in: index entry to insert */
- que_thr_t* thr, /* in: query thread */
- mtr_t* mtr) /* in: mtr */
-{
- big_rec_t* dummy_big_rec;
- mem_heap_t* heap;
- upd_t* update;
- rec_t* rec;
- ulint err;
-
- rec = btr_cur_get_rec(cursor);
-
- ut_ad((cursor->index->type & DICT_CLUSTERED) == 0);
- ut_ad(rec_get_deleted_flag(rec,
- dict_table_is_comp(cursor->index->table)));
-
- /* We know that in the alphabetical ordering, entry and rec are
- identified. But in their binary form there may be differences if
- there are char fields in them. Therefore we have to calculate the
- difference. */
-
- heap = mem_heap_create(1024);
-
- update = row_upd_build_sec_rec_difference_binary(
- cursor->index, entry, rec, thr_get_trx(thr), heap);
- if (mode == BTR_MODIFY_LEAF) {
- /* Try an optimistic updating of the record, keeping changes
- within the page */
-
- err = btr_cur_optimistic_update(BTR_KEEP_SYS_FLAG, cursor,
- update, 0, thr, mtr);
- if (err == DB_OVERFLOW || err == DB_UNDERFLOW) {
- err = DB_FAIL;
- }
- } else {
- ut_a(mode == BTR_MODIFY_TREE);
- if (buf_LRU_buf_pool_running_out()) {
-
- err = DB_LOCK_TABLE_FULL;
-
- goto func_exit;
- }
-
- err = btr_cur_pessimistic_update(BTR_KEEP_SYS_FLAG, cursor,
- &dummy_big_rec, update,
- 0, thr, mtr);
- }
-func_exit:
- mem_heap_free(heap);
-
- return(err);
-}
-
-/***********************************************************************
-Does an insert operation by delete unmarking and updating a delete marked
-existing record in the index. This situation can occur if the delete marked
-record is kept in the index for consistent reads. */
-static
-ulint
-row_ins_clust_index_entry_by_modify(
-/*================================*/
- /* out: DB_SUCCESS, DB_FAIL, or error code */
- ulint mode, /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
- depending on whether mtr holds just a leaf
- latch or also a tree latch */
- btr_cur_t* cursor, /* in: B-tree cursor */
- big_rec_t** big_rec,/* out: possible big rec vector of fields
- which have to be stored externally by the
- caller */
- dtuple_t* entry, /* in: index entry to insert */
- ulint* ext_vec,/* in: array containing field numbers of
- externally stored fields in entry, or NULL */
- ulint n_ext_vec,/* in: number of fields in ext_vec */
- que_thr_t* thr, /* in: query thread */
- mtr_t* mtr) /* in: mtr */
-{
- mem_heap_t* heap;
- rec_t* rec;
- upd_t* update;
- ulint err;
-
- ut_ad(cursor->index->type & DICT_CLUSTERED);
-
- *big_rec = NULL;
-
- rec = btr_cur_get_rec(cursor);
-
- ut_ad(rec_get_deleted_flag(rec,
- dict_table_is_comp(cursor->index->table)));
-
- heap = mem_heap_create(1024);
-
- /* Build an update vector containing all the fields to be modified;
- NOTE that this vector may NOT contain system columns trx_id or
- roll_ptr */
-
- update = row_upd_build_difference_binary(cursor->index, entry, ext_vec,
- n_ext_vec, rec,
- thr_get_trx(thr), heap);
- if (mode == BTR_MODIFY_LEAF) {
- /* Try optimistic updating of the record, keeping changes
- within the page */
-
- err = btr_cur_optimistic_update(0, cursor, update, 0, thr,
- mtr);
- if (err == DB_OVERFLOW || err == DB_UNDERFLOW) {
- err = DB_FAIL;
- }
- } else {
- ut_a(mode == BTR_MODIFY_TREE);
- if (buf_LRU_buf_pool_running_out()) {
-
- err = DB_LOCK_TABLE_FULL;
-
- goto func_exit;
- }
- err = btr_cur_pessimistic_update(0, cursor, big_rec, update,
- 0, thr, mtr);
- }
-func_exit:
- mem_heap_free(heap);
-
- return(err);
-}
-
-/*************************************************************************
-Returns TRUE if in a cascaded update/delete an ancestor node of node
-updates (not DELETE, but UPDATE) table. */
-static
-ibool
-row_ins_cascade_ancestor_updates_table(
-/*===================================*/
- /* out: TRUE if an ancestor updates table */
- que_node_t* node, /* in: node in a query graph */
- dict_table_t* table) /* in: table */
-{
- que_node_t* parent;
- upd_node_t* upd_node;
-
- parent = que_node_get_parent(node);
-
- while (que_node_get_type(parent) == QUE_NODE_UPDATE) {
-
- upd_node = parent;
-
- if (upd_node->table == table && upd_node->is_delete == FALSE) {
-
- return(TRUE);
- }
-
- parent = que_node_get_parent(parent);
-
- ut_a(parent);
- }
-
- return(FALSE);
-}
-
-/*************************************************************************
-Returns the number of ancestor UPDATE or DELETE nodes of a
-cascaded update/delete node. */
-static
-ulint
-row_ins_cascade_n_ancestors(
-/*========================*/
- /* out: number of ancestors */
- que_node_t* node) /* in: node in a query graph */
-{
- que_node_t* parent;
- ulint n_ancestors = 0;
-
- parent = que_node_get_parent(node);
-
- while (que_node_get_type(parent) == QUE_NODE_UPDATE) {
- n_ancestors++;
-
- parent = que_node_get_parent(parent);
-
- ut_a(parent);
- }
-
- return(n_ancestors);
-}
-
-/**********************************************************************
-Calculates the update vector node->cascade->update for a child table in
-a cascaded update. */
-static
-ulint
-row_ins_cascade_calc_update_vec(
-/*============================*/
- /* out: number of fields in the
- calculated update vector; the value
- can also be 0 if no foreign key
- fields changed; the returned value
- is ULINT_UNDEFINED if the column
- type in the child table is too short
- to fit the new value in the parent
- table: that means the update fails */
- upd_node_t* node, /* in: update node of the parent
- table */
- dict_foreign_t* foreign, /* in: foreign key constraint whose
- type is != 0 */
- mem_heap_t* heap) /* in: memory heap to use as
- temporary storage */
-{
- upd_node_t* cascade = node->cascade_node;
- dict_table_t* table = foreign->foreign_table;
- dict_index_t* index = foreign->foreign_index;
- upd_t* update;
- upd_field_t* ufield;
- dict_table_t* parent_table;
- dict_index_t* parent_index;
- upd_t* parent_update;
- upd_field_t* parent_ufield;
- ulint n_fields_updated;
- ulint parent_field_no;
- ulint i;
- ulint j;
-
- ut_a(node);
- ut_a(foreign);
- ut_a(cascade);
- ut_a(table);
- ut_a(index);
-
- /* Calculate the appropriate update vector which will set the fields
- in the child index record to the same value (possibly padded with
- spaces if the column is a fixed length CHAR or FIXBINARY column) as
- the referenced index record will get in the update. */
-
- parent_table = node->table;
- ut_a(parent_table == foreign->referenced_table);
- parent_index = foreign->referenced_index;
- parent_update = node->update;
-
- update = cascade->update;
-
- update->info_bits = 0;
- update->n_fields = foreign->n_fields;
-
- n_fields_updated = 0;
-
- for (i = 0; i < foreign->n_fields; i++) {
-
- parent_field_no = dict_table_get_nth_col_pos(
- parent_table,
- dict_index_get_nth_col_no(parent_index, i));
-
- for (j = 0; j < parent_update->n_fields; j++) {
- parent_ufield = parent_update->fields + j;
-
- if (parent_ufield->field_no == parent_field_no) {
-
- ulint min_size;
- const dict_col_t* col;
-
- col = dict_index_get_nth_col(index, i);
-
- /* A field in the parent index record is
- updated. Let us make the update vector
- field for the child table. */
-
- ufield = update->fields + n_fields_updated;
-
- ufield->field_no
- = dict_table_get_nth_col_pos(
- table, dict_col_get_no(col));
- ufield->exp = NULL;
-
- ufield->new_val = parent_ufield->new_val;
-
- /* Do not allow a NOT NULL column to be
- updated as NULL */
-
- if (ufield->new_val.len == UNIV_SQL_NULL
- && (col->prtype & DATA_NOT_NULL)) {
-
- return(ULINT_UNDEFINED);
- }
-
- /* If the new value would not fit in the
- column, do not allow the update */
-
- if (ufield->new_val.len != UNIV_SQL_NULL
- && dtype_get_at_most_n_mbchars(
- col->prtype,
- col->mbminlen, col->mbmaxlen,
- col->len,
- ufield->new_val.len,
- ufield->new_val.data)
- < ufield->new_val.len) {
-
- return(ULINT_UNDEFINED);
- }
-
- /* If the parent column type has a different
- length than the child column type, we may
- need to pad with spaces the new value of the
- child column */
-
- min_size = dict_col_get_min_size(col);
-
- if (min_size
- && ufield->new_val.len != UNIV_SQL_NULL
- && ufield->new_val.len < min_size) {
-
- char* pad_start;
- const char* pad_end;
- ufield->new_val.data = mem_heap_alloc(
- heap, min_size);
- pad_start = ((char*) ufield
- ->new_val.data)
- + ufield->new_val.len;
- pad_end = ((char*) ufield
- ->new_val.data)
- + min_size;
- ufield->new_val.len = min_size;
- ut_memcpy(ufield->new_val.data,
- parent_ufield->new_val.data,
- parent_ufield->new_val.len);
-
- switch (UNIV_EXPECT(col->mbminlen,1)) {
- default:
- ut_error;
- case 1:
- if (UNIV_UNLIKELY
- (dtype_get_charset_coll(
- col->prtype)
- == DATA_MYSQL_BINARY_CHARSET_COLL)) {
- /* Do not pad BINARY
- columns. */
- return(ULINT_UNDEFINED);
- }
-
- /* space=0x20 */
- memset(pad_start, 0x20,
- pad_end - pad_start);
- break;
- case 2:
- /* space=0x0020 */
- ut_a(!(ufield->new_val.len
- % 2));
- ut_a(!(min_size % 2));
- do {
- *pad_start++ = 0x00;
- *pad_start++ = 0x20;
- } while (pad_start < pad_end);
- break;
- }
- }
-
- ufield->extern_storage = FALSE;
-
- n_fields_updated++;
- }
- }
- }
-
- update->n_fields = n_fields_updated;
-
- return(n_fields_updated);
-}
-
-/*************************************************************************
-Set detailed error message associated with foreign key errors for
-the given transaction. */
-static
-void
-row_ins_set_detailed(
-/*=================*/
- trx_t* trx, /* in: transaction */
- dict_foreign_t* foreign) /* in: foreign key constraint */
-{
- mutex_enter(&srv_misc_tmpfile_mutex);
- rewind(srv_misc_tmpfile);
-
- if (os_file_set_eof(srv_misc_tmpfile)) {
- ut_print_name(srv_misc_tmpfile, trx, TRUE,
- foreign->foreign_table_name);
- dict_print_info_on_foreign_key_in_create_format(
- srv_misc_tmpfile, trx, foreign, FALSE);
- trx_set_detailed_error_from_file(trx, srv_misc_tmpfile);
- } else {
- trx_set_detailed_error(trx, "temp file operation failed");
- }
-
- mutex_exit(&srv_misc_tmpfile_mutex);
-}
-
-/*************************************************************************
-Reports a foreign key error associated with an update or a delete of a
-parent table index entry. */
-static
-void
-row_ins_foreign_report_err(
-/*=======================*/
- const char* errstr, /* in: error string from the viewpoint
- of the parent table */
- que_thr_t* thr, /* in: query thread whose run_node
- is an update node */
- dict_foreign_t* foreign, /* in: foreign key constraint */
- rec_t* rec, /* in: a matching index record in the
- child table */
- dtuple_t* entry) /* in: index entry in the parent
- table */
-{
- FILE* ef = dict_foreign_err_file;
- trx_t* trx = thr_get_trx(thr);
-
- row_ins_set_detailed(trx, foreign);
-
- mutex_enter(&dict_foreign_err_mutex);
- rewind(ef);
- ut_print_timestamp(ef);
- fputs(" Transaction:\n", ef);
- trx_print(ef, trx, 600);
-
- fputs("Foreign key constraint fails for table ", ef);
- ut_print_name(ef, trx, TRUE, foreign->foreign_table_name);
- fputs(":\n", ef);
- dict_print_info_on_foreign_key_in_create_format(ef, trx, foreign,
- TRUE);
- putc('\n', ef);
- fputs(errstr, ef);
- fputs(" in parent table, in index ", ef);
- ut_print_name(ef, trx, FALSE, foreign->referenced_index->name);
- if (entry) {
- fputs(" tuple:\n", ef);
- dtuple_print(ef, entry);
- }
- fputs("\nBut in child table ", ef);
- ut_print_name(ef, trx, TRUE, foreign->foreign_table_name);
- fputs(", in index ", ef);
- ut_print_name(ef, trx, FALSE, foreign->foreign_index->name);
- if (rec) {
- fputs(", there is a record:\n", ef);
- rec_print(ef, rec, foreign->foreign_index);
- } else {
- fputs(", the record is not available\n", ef);
- }
- putc('\n', ef);
-
- mutex_exit(&dict_foreign_err_mutex);
-}
-
-/*************************************************************************
-Reports a foreign key error to dict_foreign_err_file when we are trying
-to add an index entry to a child table. Note that the adding may be the result
-of an update, too. */
-static
-void
-row_ins_foreign_report_add_err(
-/*===========================*/
- trx_t* trx, /* in: transaction */
- dict_foreign_t* foreign, /* in: foreign key constraint */
- rec_t* rec, /* in: a record in the parent table:
- it does not match entry because we
- have an error! */
- dtuple_t* entry) /* in: index entry to insert in the
- child table */
-{
- FILE* ef = dict_foreign_err_file;
-
- row_ins_set_detailed(trx, foreign);
-
- mutex_enter(&dict_foreign_err_mutex);
- rewind(ef);
- ut_print_timestamp(ef);
- fputs(" Transaction:\n", ef);
- trx_print(ef, trx, 600);
- fputs("Foreign key constraint fails for table ", ef);
- ut_print_name(ef, trx, TRUE, foreign->foreign_table_name);
- fputs(":\n", ef);
- dict_print_info_on_foreign_key_in_create_format(ef, trx, foreign,
- TRUE);
- fputs("\nTrying to add in child table, in index ", ef);
- ut_print_name(ef, trx, FALSE, foreign->foreign_index->name);
- if (entry) {
- fputs(" tuple:\n", ef);
- dtuple_print(ef, entry);
- }
- fputs("\nBut in parent table ", ef);
- ut_print_name(ef, trx, TRUE, foreign->referenced_table_name);
- fputs(", in index ", ef);
- ut_print_name(ef, trx, FALSE, foreign->referenced_index->name);
- fputs(",\nthe closest match we can find is record:\n", ef);
- if (rec && page_rec_is_supremum(rec)) {
- /* If the cursor ended on a supremum record, it is better
- to report the previous record in the error message, so that
- the user gets a more descriptive error message. */
- rec = page_rec_get_prev(rec);
- }
-
- if (rec) {
- rec_print(ef, rec, foreign->referenced_index);
- }
- putc('\n', ef);
-
- mutex_exit(&dict_foreign_err_mutex);
-}
-
-/*************************************************************************
-Invalidate the query cache for the given table. */
-static
-void
-row_ins_invalidate_query_cache(
-/*===========================*/
- que_thr_t* thr, /* in: query thread whose run_node
- is an update node */
- const char* name) /* in: table name prefixed with
- database name and a '/' character */
-{
- char* buf;
- char* ptr;
- ulint len = strlen(name) + 1;
-
- buf = mem_strdupl(name, len);
-
- ptr = strchr(buf, '/');
- ut_a(ptr);
- *ptr = '\0';
-
- /* We call a function in ha_innodb.cc */
-#ifndef UNIV_HOTBACKUP
- innobase_invalidate_query_cache(thr_get_trx(thr), buf, len);
-#endif
- mem_free(buf);
-}
-
-/*************************************************************************
-Perform referential actions or checks when a parent row is deleted or updated
-and the constraint had an ON DELETE or ON UPDATE condition which was not
-RESTRICT. */
-static
-ulint
-row_ins_foreign_check_on_constraint(
-/*================================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT,
- or error code */
- que_thr_t* thr, /* in: query thread whose run_node
- is an update node */
- dict_foreign_t* foreign, /* in: foreign key constraint whose
- type is != 0 */
- btr_pcur_t* pcur, /* in: cursor placed on a matching
- index record in the child table */
- dtuple_t* entry, /* in: index entry in the parent
- table */
- mtr_t* mtr) /* in: mtr holding the latch of pcur
- page */
-{
- upd_node_t* node;
- upd_node_t* cascade;
- dict_table_t* table = foreign->foreign_table;
- dict_index_t* index;
- dict_index_t* clust_index;
- dtuple_t* ref;
- mem_heap_t* upd_vec_heap = NULL;
- rec_t* rec;
- rec_t* clust_rec;
- upd_t* update;
- ulint n_to_update;
- ulint err;
- ulint i;
- trx_t* trx;
- mem_heap_t* tmp_heap = NULL;
-
- ut_a(thr);
- ut_a(foreign);
- ut_a(pcur);
- ut_a(mtr);
-
- trx = thr_get_trx(thr);
-
- /* Since we are going to delete or update a row, we have to invalidate
- the MySQL query cache for table. A deadlock of threads is not possible
- here because the caller of this function does not hold any latches with
- the sync0sync.h rank above the kernel mutex. The query cache mutex has
- a rank just above the kernel mutex. */
-
- row_ins_invalidate_query_cache(thr, table->name);
-
- node = thr->run_node;
-
- if (node->is_delete && 0 == (foreign->type
- & (DICT_FOREIGN_ON_DELETE_CASCADE
- | DICT_FOREIGN_ON_DELETE_SET_NULL))) {
-
- row_ins_foreign_report_err("Trying to delete",
- thr, foreign,
- btr_pcur_get_rec(pcur), entry);
-
- return(DB_ROW_IS_REFERENCED);
- }
-
- if (!node->is_delete && 0 == (foreign->type
- & (DICT_FOREIGN_ON_UPDATE_CASCADE
- | DICT_FOREIGN_ON_UPDATE_SET_NULL))) {
-
- /* This is an UPDATE */
-
- row_ins_foreign_report_err("Trying to update",
- thr, foreign,
- btr_pcur_get_rec(pcur), entry);
-
- return(DB_ROW_IS_REFERENCED);
- }
-
- if (node->cascade_node == NULL) {
- /* Extend our query graph by creating a child to current
- update node. The child is used in the cascade or set null
- operation. */
-
- node->cascade_heap = mem_heap_create(128);
- node->cascade_node = row_create_update_node_for_mysql(
- table, node->cascade_heap);
- que_node_set_parent(node->cascade_node, node);
- }
-
- /* Initialize cascade_node to do the operation we want. Note that we
- use the SAME cascade node to do all foreign key operations of the
- SQL DELETE: the table of the cascade node may change if there are
- several child tables to the table where the delete is done! */
-
- cascade = node->cascade_node;
-
- cascade->table = table;
-
- cascade->foreign = foreign;
-
- if (node->is_delete
- && (foreign->type & DICT_FOREIGN_ON_DELETE_CASCADE)) {
- cascade->is_delete = TRUE;
- } else {
- cascade->is_delete = FALSE;
-
- if (foreign->n_fields > cascade->update_n_fields) {
- /* We have to make the update vector longer */
-
- cascade->update = upd_create(foreign->n_fields,
- node->cascade_heap);
- cascade->update_n_fields = foreign->n_fields;
- }
- }
-
- /* We do not allow cyclic cascaded updating (DELETE is allowed,
- but not UPDATE) of the same table, as this can lead to an infinite
- cycle. Check that we are not updating the same table which is
- already being modified in this cascade chain. We have to check
- this also because the modification of the indexes of a 'parent'
- table may still be incomplete, and we must avoid seeing the indexes
- of the parent table in an inconsistent state! */
-
- if (!cascade->is_delete
- && row_ins_cascade_ancestor_updates_table(cascade, table)) {
-
- /* We do not know if this would break foreign key
- constraints, but play safe and return an error */
-
- err = DB_ROW_IS_REFERENCED;
-
- row_ins_foreign_report_err(
- "Trying an update, possibly causing a cyclic"
- " cascaded update\n"
- "in the child table,", thr, foreign,
- btr_pcur_get_rec(pcur), entry);
-
- goto nonstandard_exit_func;
- }
-
- if (row_ins_cascade_n_ancestors(cascade) >= 15) {
- err = DB_ROW_IS_REFERENCED;
-
- row_ins_foreign_report_err(
- "Trying a too deep cascaded delete or update\n",
- thr, foreign, btr_pcur_get_rec(pcur), entry);
-
- goto nonstandard_exit_func;
- }
-
- index = btr_pcur_get_btr_cur(pcur)->index;
-
- ut_a(index == foreign->foreign_index);
-
- rec = btr_pcur_get_rec(pcur);
-
- if (index->type & DICT_CLUSTERED) {
- /* pcur is already positioned in the clustered index of
- the child table */
-
- clust_index = index;
- clust_rec = rec;
- } else {
- /* We have to look for the record in the clustered index
- in the child table */
-
- clust_index = dict_table_get_first_index(table);
-
- tmp_heap = mem_heap_create(256);
-
- ref = row_build_row_ref(ROW_COPY_POINTERS, index, rec,
- tmp_heap);
- btr_pcur_open_with_no_init(clust_index, ref,
- PAGE_CUR_LE, BTR_SEARCH_LEAF,
- cascade->pcur, 0, mtr);
-
- clust_rec = btr_pcur_get_rec(cascade->pcur);
-
- if (!page_rec_is_user_rec(clust_rec)
- || btr_pcur_get_low_match(cascade->pcur)
- < dict_index_get_n_unique(clust_index)) {
-
- fputs("InnoDB: error in cascade of a foreign key op\n"
- "InnoDB: ", stderr);
- dict_index_name_print(stderr, trx, index);
-
- fputs("\n"
- "InnoDB: record ", stderr);
- rec_print(stderr, rec, index);
- fputs("\n"
- "InnoDB: clustered record ", stderr);
- rec_print(stderr, clust_rec, clust_index);
- fputs("\n"
- "InnoDB: Submit a detailed bug report to"
- " http://bugs.mysql.com\n", stderr);
-
- err = DB_SUCCESS;
-
- goto nonstandard_exit_func;
- }
- }
-
- /* Set an X-lock on the row to delete or update in the child table */
-
- err = lock_table(0, table, LOCK_IX, thr);
-
- if (err == DB_SUCCESS) {
- /* Here it suffices to use a LOCK_REC_NOT_GAP type lock;
- we already have a normal shared lock on the appropriate
- gap if the search criterion was not unique */
-
- err = lock_clust_rec_read_check_and_lock_alt(
- 0, clust_rec, clust_index, LOCK_X, LOCK_REC_NOT_GAP,
- thr);
- }
-
- if (err != DB_SUCCESS) {
-
- goto nonstandard_exit_func;
- }
-
- if (rec_get_deleted_flag(clust_rec, dict_table_is_comp(table))) {
- /* This can happen if there is a circular reference of
- rows such that cascading delete comes to delete a row
- already in the process of being delete marked */
- err = DB_SUCCESS;
-
- goto nonstandard_exit_func;
- }
-
- if ((node->is_delete
- && (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL))
- || (!node->is_delete
- && (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL))) {
-
- /* Build the appropriate update vector which sets
- foreign->n_fields first fields in rec to SQL NULL */
-
- update = cascade->update;
-
- update->info_bits = 0;
- update->n_fields = foreign->n_fields;
-
- for (i = 0; i < foreign->n_fields; i++) {
- (update->fields + i)->field_no
- = dict_table_get_nth_col_pos(
- table,
- dict_index_get_nth_col_no(index, i));
- (update->fields + i)->exp = NULL;
- (update->fields + i)->new_val.len = UNIV_SQL_NULL;
- (update->fields + i)->new_val.data = NULL;
- (update->fields + i)->extern_storage = FALSE;
- }
- }
-
- if (!node->is_delete
- && (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE)) {
-
- /* Build the appropriate update vector which sets changing
- foreign->n_fields first fields in rec to new values */
-
- upd_vec_heap = mem_heap_create(256);
-
- n_to_update = row_ins_cascade_calc_update_vec(node, foreign,
- upd_vec_heap);
- if (n_to_update == ULINT_UNDEFINED) {
- err = DB_ROW_IS_REFERENCED;
-
- row_ins_foreign_report_err(
- "Trying a cascaded update where the"
- " updated value in the child\n"
- "table would not fit in the length"
- " of the column, or the value would\n"
- "be NULL and the column is"
- " declared as not NULL in the child table,",
- thr, foreign, btr_pcur_get_rec(pcur), entry);
-
- goto nonstandard_exit_func;
- }
-
- if (cascade->update->n_fields == 0) {
-
- /* The update does not change any columns referred
- to in this foreign key constraint: no need to do
- anything */
-
- err = DB_SUCCESS;
-
- goto nonstandard_exit_func;
- }
- }
-
- /* Store pcur position and initialize or store the cascade node
- pcur stored position */
-
- btr_pcur_store_position(pcur, mtr);
-
- if (index == clust_index) {
- btr_pcur_copy_stored_position(cascade->pcur, pcur);
- } else {
- btr_pcur_store_position(cascade->pcur, mtr);
- }
-
- mtr_commit(mtr);
-
- ut_a(cascade->pcur->rel_pos == BTR_PCUR_ON);
-
- cascade->state = UPD_NODE_UPDATE_CLUSTERED;
-
- err = row_update_cascade_for_mysql(thr, cascade,
- foreign->foreign_table);
-
- if (foreign->foreign_table->n_foreign_key_checks_running == 0) {
- fprintf(stderr,
- "InnoDB: error: table %s has the counter 0"
- " though there is\n"
- "InnoDB: a FOREIGN KEY check running on it.\n",
- foreign->foreign_table->name);
- }
-
- /* Release the data dictionary latch for a while, so that we do not
- starve other threads from doing CREATE TABLE etc. if we have a huge
- cascaded operation running. The counter n_foreign_key_checks_running
- will prevent other users from dropping or ALTERing the table when we
- release the latch. */
-
- row_mysql_unfreeze_data_dictionary(thr_get_trx(thr));
- row_mysql_freeze_data_dictionary(thr_get_trx(thr));
-
- mtr_start(mtr);
-
- /* Restore pcur position */
-
- btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, mtr);
-
- if (tmp_heap) {
- mem_heap_free(tmp_heap);
- }
-
- if (upd_vec_heap) {
- mem_heap_free(upd_vec_heap);
- }
-
- return(err);
-
-nonstandard_exit_func:
- if (tmp_heap) {
- mem_heap_free(tmp_heap);
- }
-
- if (upd_vec_heap) {
- mem_heap_free(upd_vec_heap);
- }
-
- btr_pcur_store_position(pcur, mtr);
-
- mtr_commit(mtr);
- mtr_start(mtr);
-
- btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, mtr);
-
- return(err);
-}
-
-/*************************************************************************
-Sets a shared lock on a record. Used in locking possible duplicate key
-records and also in checking foreign key constraints. */
-static
-ulint
-row_ins_set_shared_rec_lock(
-/*========================*/
- /* out: DB_SUCCESS or error code */
- ulint type, /* in: LOCK_ORDINARY, LOCK_GAP, or
- LOCK_REC_NOT_GAP type lock */
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- que_thr_t* thr) /* in: query thread */
-{
- ulint err;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- if (index->type & DICT_CLUSTERED) {
- err = lock_clust_rec_read_check_and_lock(
- 0, rec, index, offsets, LOCK_S, type, thr);
- } else {
- err = lock_sec_rec_read_check_and_lock(
- 0, rec, index, offsets, LOCK_S, type, thr);
- }
-
- return(err);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*************************************************************************
-Sets a exclusive lock on a record. Used in locking possible duplicate key
-records */
-static
-ulint
-row_ins_set_exclusive_rec_lock(
-/*===========================*/
- /* out: DB_SUCCESS or error code */
- ulint type, /* in: LOCK_ORDINARY, LOCK_GAP, or
- LOCK_REC_NOT_GAP type lock */
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- que_thr_t* thr) /* in: query thread */
-{
- ulint err;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- if (index->type & DICT_CLUSTERED) {
- err = lock_clust_rec_read_check_and_lock(
- 0, rec, index, offsets, LOCK_X, type, thr);
- } else {
- err = lock_sec_rec_read_check_and_lock(
- 0, rec, index, offsets, LOCK_X, type, thr);
- }
-
- return(err);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*******************************************************************
-Checks if foreign key constraint fails for an index entry. Sets shared locks
-which lock either the success or the failure of the constraint. NOTE that
-the caller must have a shared latch on dict_operation_lock. */
-
-ulint
-row_ins_check_foreign_constraint(
-/*=============================*/
- /* out: DB_SUCCESS,
- DB_NO_REFERENCED_ROW,
- or DB_ROW_IS_REFERENCED */
- ibool check_ref,/* in: TRUE if we want to check that
- the referenced table is ok, FALSE if we
- want to to check the foreign key table */
- dict_foreign_t* foreign,/* in: foreign constraint; NOTE that the
- tables mentioned in it must be in the
- dictionary cache if they exist at all */
- dict_table_t* table, /* in: if check_ref is TRUE, then the foreign
- table, else the referenced table */
- dtuple_t* entry, /* in: index entry for index */
- que_thr_t* thr) /* in: query thread */
-{
- upd_node_t* upd_node;
- dict_table_t* check_table;
- dict_index_t* check_index;
- ulint n_fields_cmp;
- rec_t* rec;
- btr_pcur_t pcur;
- ibool moved;
- int cmp;
- ulint err;
- ulint i;
- mtr_t mtr;
- trx_t* trx = thr_get_trx(thr);
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
-run_again:
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
- err = DB_SUCCESS;
-
- if (trx->check_foreigns == FALSE) {
- /* The user has suppressed foreign key checks currently for
- this session */
- goto exit_func;
- }
-
- /* If any of the foreign key fields in entry is SQL NULL, we
- suppress the foreign key check: this is compatible with Oracle,
- for example */
-
- for (i = 0; i < foreign->n_fields; i++) {
- if (UNIV_SQL_NULL == dfield_get_len(
- dtuple_get_nth_field(entry, i))) {
-
- goto exit_func;
- }
- }
-
- if (que_node_get_type(thr->run_node) == QUE_NODE_UPDATE) {
- upd_node = thr->run_node;
-
- if (!(upd_node->is_delete) && upd_node->foreign == foreign) {
- /* If a cascaded update is done as defined by a
- foreign key constraint, do not check that
- constraint for the child row. In ON UPDATE CASCADE
- the update of the parent row is only half done when
- we come here: if we would check the constraint here
- for the child row it would fail.
-
- A QUESTION remains: if in the child table there are
- several constraints which refer to the same parent
- table, we should merge all updates to the child as
- one update? And the updates can be contradictory!
- Currently we just perform the update associated
- with each foreign key constraint, one after
- another, and the user has problems predicting in
- which order they are performed. */
-
- goto exit_func;
- }
- }
-
- if (check_ref) {
- check_table = foreign->referenced_table;
- check_index = foreign->referenced_index;
- } else {
- check_table = foreign->foreign_table;
- check_index = foreign->foreign_index;
- }
-
- if (check_table == NULL || check_table->ibd_file_missing) {
- if (check_ref) {
- FILE* ef = dict_foreign_err_file;
-
- row_ins_set_detailed(trx, foreign);
-
- mutex_enter(&dict_foreign_err_mutex);
- rewind(ef);
- ut_print_timestamp(ef);
- fputs(" Transaction:\n", ef);
- trx_print(ef, trx, 600);
- fputs("Foreign key constraint fails for table ", ef);
- ut_print_name(ef, trx, TRUE,
- foreign->foreign_table_name);
- fputs(":\n", ef);
- dict_print_info_on_foreign_key_in_create_format(
- ef, trx, foreign, TRUE);
- fputs("\nTrying to add to index ", ef);
- ut_print_name(ef, trx, FALSE,
- foreign->foreign_index->name);
- fputs(" tuple:\n", ef);
- dtuple_print(ef, entry);
- fputs("\nBut the parent table ", ef);
- ut_print_name(ef, trx, TRUE,
- foreign->referenced_table_name);
- fputs("\nor its .ibd file does"
- " not currently exist!\n", ef);
- mutex_exit(&dict_foreign_err_mutex);
-
- err = DB_NO_REFERENCED_ROW;
- }
-
- goto exit_func;
- }
-
- ut_a(check_table);
- ut_a(check_index);
-
- if (check_table != table) {
- /* We already have a LOCK_IX on table, but not necessarily
- on check_table */
-
- err = lock_table(0, check_table, LOCK_IS, thr);
-
- if (err != DB_SUCCESS) {
-
- goto do_possible_lock_wait;
- }
- }
-
- mtr_start(&mtr);
-
- /* Store old value on n_fields_cmp */
-
- n_fields_cmp = dtuple_get_n_fields_cmp(entry);
-
- dtuple_set_n_fields_cmp(entry, foreign->n_fields);
-
- btr_pcur_open(check_index, entry, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
-
- /* Scan index records and check if there is a matching record */
-
- for (;;) {
- rec = btr_pcur_get_rec(&pcur);
-
- if (page_rec_is_infimum(rec)) {
-
- goto next_rec;
- }
-
- offsets = rec_get_offsets(rec, check_index,
- offsets, ULINT_UNDEFINED, &heap);
-
- if (page_rec_is_supremum(rec)) {
-
- err = row_ins_set_shared_rec_lock(
- LOCK_ORDINARY, rec, check_index, offsets, thr);
- if (err != DB_SUCCESS) {
-
- break;
- }
-
- goto next_rec;
- }
-
- cmp = cmp_dtuple_rec(entry, rec, offsets);
-
- if (cmp == 0) {
- if (rec_get_deleted_flag(rec,
- rec_offs_comp(offsets))) {
- err = row_ins_set_shared_rec_lock(
- LOCK_ORDINARY, rec, check_index,
- offsets, thr);
- if (err != DB_SUCCESS) {
-
- break;
- }
- } else {
- /* Found a matching record. Lock only
- a record because we can allow inserts
- into gaps */
-
- err = row_ins_set_shared_rec_lock(
- LOCK_REC_NOT_GAP, rec, check_index,
- offsets, thr);
-
- if (err != DB_SUCCESS) {
-
- break;
- }
-
- if (check_ref) {
- err = DB_SUCCESS;
-
- break;
- } else if (foreign->type != 0) {
- /* There is an ON UPDATE or ON DELETE
- condition: check them in a separate
- function */
-
- err = row_ins_foreign_check_on_constraint(
- thr, foreign, &pcur, entry,
- &mtr);
- if (err != DB_SUCCESS) {
- /* Since reporting a plain
- "duplicate key" error
- message to the user in
- cases where a long CASCADE
- operation would lead to a
- duplicate key in some
- other table is very
- confusing, map duplicate
- key errors resulting from
- FK constraints to a
- separate error code. */
-
- if (err == DB_DUPLICATE_KEY) {
- err = DB_FOREIGN_DUPLICATE_KEY;
- }
-
- break;
- }
- } else {
- row_ins_foreign_report_err(
- "Trying to delete or update",
- thr, foreign, rec, entry);
-
- err = DB_ROW_IS_REFERENCED;
- break;
- }
- }
- }
-
- if (cmp < 0) {
- err = row_ins_set_shared_rec_lock(
- LOCK_GAP, rec, check_index, offsets, thr);
- if (err != DB_SUCCESS) {
-
- break;
- }
-
- if (check_ref) {
- err = DB_NO_REFERENCED_ROW;
- row_ins_foreign_report_add_err(
- trx, foreign, rec, entry);
- } else {
- err = DB_SUCCESS;
- }
-
- break;
- }
-
- ut_a(cmp == 0);
-next_rec:
- moved = btr_pcur_move_to_next(&pcur, &mtr);
-
- if (!moved) {
- if (check_ref) {
- rec = btr_pcur_get_rec(&pcur);
- row_ins_foreign_report_add_err(
- trx, foreign, rec, entry);
- err = DB_NO_REFERENCED_ROW;
- } else {
- err = DB_SUCCESS;
- }
-
- break;
- }
- }
-
- btr_pcur_close(&pcur);
-
- mtr_commit(&mtr);
-
- /* Restore old value */
- dtuple_set_n_fields_cmp(entry, n_fields_cmp);
-
-do_possible_lock_wait:
- if (err == DB_LOCK_WAIT) {
- trx->error_state = err;
-
- que_thr_stop_for_mysql(thr);
-
- srv_suspend_mysql_thread(thr);
-
- if (trx->error_state == DB_SUCCESS) {
-
- goto run_again;
- }
-
- err = trx->error_state;
- }
-
-exit_func:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(err);
-}
-
-/*******************************************************************
-Checks if foreign key constraints fail for an index entry. If index
-is not mentioned in any constraint, this function does nothing,
-Otherwise does searches to the indexes of referenced tables and
-sets shared locks which lock either the success or the failure of
-a constraint. */
-static
-ulint
-row_ins_check_foreign_constraints(
-/*==============================*/
- /* out: DB_SUCCESS or error code */
- dict_table_t* table, /* in: table */
- dict_index_t* index, /* in: index */
- dtuple_t* entry, /* in: index entry for index */
- que_thr_t* thr) /* in: query thread */
-{
- dict_foreign_t* foreign;
- ulint err;
- trx_t* trx;
- ibool got_s_lock = FALSE;
-
- trx = thr_get_trx(thr);
-
- foreign = UT_LIST_GET_FIRST(table->foreign_list);
-
- while (foreign) {
- if (foreign->foreign_index == index) {
-
- if (foreign->referenced_table == NULL) {
- dict_table_get(foreign->referenced_table_name,
- FALSE);
- }
-
- if (0 == trx->dict_operation_lock_mode) {
- got_s_lock = TRUE;
-
- row_mysql_freeze_data_dictionary(trx);
- }
-
- if (foreign->referenced_table) {
- mutex_enter(&(dict_sys->mutex));
-
- (foreign->referenced_table
- ->n_foreign_key_checks_running)++;
-
- mutex_exit(&(dict_sys->mutex));
- }
-
- /* NOTE that if the thread ends up waiting for a lock
- we will release dict_operation_lock temporarily!
- But the counter on the table protects the referenced
- table from being dropped while the check is running. */
-
- err = row_ins_check_foreign_constraint(
- TRUE, foreign, table, entry, thr);
-
- if (foreign->referenced_table) {
- mutex_enter(&(dict_sys->mutex));
-
- ut_a(foreign->referenced_table
- ->n_foreign_key_checks_running > 0);
- (foreign->referenced_table
- ->n_foreign_key_checks_running)--;
-
- mutex_exit(&(dict_sys->mutex));
- }
-
- if (got_s_lock) {
- row_mysql_unfreeze_data_dictionary(trx);
- }
-
- if (err != DB_SUCCESS) {
- return(err);
- }
- }
-
- foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
- }
-
- return(DB_SUCCESS);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*******************************************************************
-Checks if a unique key violation to rec would occur at the index entry
-insert. */
-static
-ibool
-row_ins_dupl_error_with_rec(
-/*========================*/
- /* out: TRUE if error */
- rec_t* rec, /* in: user record; NOTE that we assume
- that the caller already has a record lock on
- the record! */
- dtuple_t* entry, /* in: entry to insert */
- dict_index_t* index, /* in: index */
- const ulint* offsets)/* in: rec_get_offsets(rec, index) */
-{
- ulint matched_fields;
- ulint matched_bytes;
- ulint n_unique;
- ulint i;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- n_unique = dict_index_get_n_unique(index);
-
- matched_fields = 0;
- matched_bytes = 0;
-
- cmp_dtuple_rec_with_match(entry, rec, offsets,
- &matched_fields, &matched_bytes);
-
- if (matched_fields < n_unique) {
-
- return(FALSE);
- }
-
- /* In a unique secondary index we allow equal key values if they
- contain SQL NULLs */
-
- if (!(index->type & DICT_CLUSTERED)) {
-
- for (i = 0; i < n_unique; i++) {
- if (UNIV_SQL_NULL == dfield_get_len(
- dtuple_get_nth_field(entry, i))) {
-
- return(FALSE);
- }
- }
- }
-
- return(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*******************************************************************
-Scans a unique non-clustered index at a given index entry to determine
-whether a uniqueness violation has occurred for the key value of the entry.
-Set shared locks on possible duplicate records. */
-static
-ulint
-row_ins_scan_sec_index_for_duplicate(
-/*=================================*/
- /* out: DB_SUCCESS, DB_DUPLICATE_KEY, or
- DB_LOCK_WAIT */
- dict_index_t* index, /* in: non-clustered unique index */
- dtuple_t* entry, /* in: index entry */
- que_thr_t* thr) /* in: query thread */
-{
-#ifndef UNIV_HOTBACKUP
- ulint n_unique;
- ulint i;
- int cmp;
- ulint n_fields_cmp;
- rec_t* rec;
- btr_pcur_t pcur;
- ulint err = DB_SUCCESS;
- ibool moved;
- unsigned allow_duplicates;
- mtr_t mtr;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- n_unique = dict_index_get_n_unique(index);
-
- /* If the secondary index is unique, but one of the fields in the
- n_unique first fields is NULL, a unique key violation cannot occur,
- since we define NULL != NULL in this case */
-
- for (i = 0; i < n_unique; i++) {
- if (UNIV_SQL_NULL == dfield_get_len(
- dtuple_get_nth_field(entry, i))) {
-
- return(DB_SUCCESS);
- }
- }
-
- mtr_start(&mtr);
-
- /* Store old value on n_fields_cmp */
-
- n_fields_cmp = dtuple_get_n_fields_cmp(entry);
-
- dtuple_set_n_fields_cmp(entry, dict_index_get_n_unique(index));
-
- btr_pcur_open(index, entry, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr);
-
- allow_duplicates = thr_get_trx(thr)->duplicates & TRX_DUP_IGNORE;
-
- /* Scan index records and check if there is a duplicate */
-
- for (;;) {
- rec = btr_pcur_get_rec(&pcur);
-
- if (page_rec_is_infimum(rec)) {
-
- goto next_rec;
- }
-
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
-
- if (allow_duplicates) {
-
- /* If the SQL-query will update or replace
- duplicate key we will take X-lock for
- duplicates ( REPLACE, LOAD DATAFILE REPLACE,
- INSERT ON DUPLICATE KEY UPDATE). */
-
- err = row_ins_set_exclusive_rec_lock(
- LOCK_ORDINARY, rec, index, offsets, thr);
- } else {
-
- err = row_ins_set_shared_rec_lock(
- LOCK_ORDINARY, rec, index, offsets, thr);
- }
-
- if (err != DB_SUCCESS) {
-
- break;
- }
-
- if (page_rec_is_supremum(rec)) {
-
- goto next_rec;
- }
-
- cmp = cmp_dtuple_rec(entry, rec, offsets);
-
- if (cmp == 0) {
- if (row_ins_dupl_error_with_rec(rec, entry,
- index, offsets)) {
- err = DB_DUPLICATE_KEY;
-
- thr_get_trx(thr)->error_info = index;
-
- break;
- }
- }
-
- if (cmp < 0) {
- break;
- }
-
- ut_a(cmp == 0);
-next_rec:
- moved = btr_pcur_move_to_next(&pcur, &mtr);
-
- if (!moved) {
- break;
- }
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- mtr_commit(&mtr);
-
- /* Restore old value */
- dtuple_set_n_fields_cmp(entry, n_fields_cmp);
-
- return(err);
-#else /* UNIV_HOTBACKUP */
- /* This function depends on MySQL code that is not included in
- InnoDB Hot Backup builds. Besides, this function should never
- be called in InnoDB Hot Backup. */
- ut_error;
- return(DB_FAIL);
-#endif /* UNIV_HOTBACKUP */
-}
-
-/*******************************************************************
-Checks if a unique key violation error would occur at an index entry
-insert. Sets shared locks on possible duplicate records. Works only
-for a clustered index! */
-static
-ulint
-row_ins_duplicate_error_in_clust(
-/*=============================*/
- /* out: DB_SUCCESS if no error,
- DB_DUPLICATE_KEY if error, DB_LOCK_WAIT if we
- have to wait for a lock on a possible
- duplicate record */
- btr_cur_t* cursor, /* in: B-tree cursor */
- dtuple_t* entry, /* in: entry to insert */
- que_thr_t* thr, /* in: query thread */
- mtr_t* mtr) /* in: mtr */
-{
-#ifndef UNIV_HOTBACKUP
- ulint err;
- rec_t* rec;
- ulint n_unique;
- trx_t* trx = thr_get_trx(thr);
- mem_heap_t*heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- UT_NOT_USED(mtr);
-
- ut_a(cursor->index->type & DICT_CLUSTERED);
- ut_ad(cursor->index->type & DICT_UNIQUE);
-
- /* NOTE: For unique non-clustered indexes there may be any number
- of delete marked records with the same value for the non-clustered
- index key (remember multiversioning), and which differ only in
- the row refererence part of the index record, containing the
- clustered index key fields. For such a secondary index record,
- to avoid race condition, we must FIRST do the insertion and after
- that check that the uniqueness condition is not breached! */
-
- /* NOTE: A problem is that in the B-tree node pointers on an
- upper level may match more to the entry than the actual existing
- user records on the leaf level. So, even if low_match would suggest
- that a duplicate key violation may occur, this may not be the case. */
-
- n_unique = dict_index_get_n_unique(cursor->index);
-
- if (cursor->low_match >= n_unique) {
-
- rec = btr_cur_get_rec(cursor);
-
- if (!page_rec_is_infimum(rec)) {
- offsets = rec_get_offsets(rec, cursor->index, offsets,
- ULINT_UNDEFINED, &heap);
-
- /* We set a lock on the possible duplicate: this
- is needed in logical logging of MySQL to make
- sure that in roll-forward we get the same duplicate
- errors as in original execution */
-
- if (trx->duplicates & TRX_DUP_IGNORE) {
-
- /* If the SQL-query will update or replace
- duplicate key we will take X-lock for
- duplicates ( REPLACE, LOAD DATAFILE REPLACE,
- INSERT ON DUPLICATE KEY UPDATE). */
-
- err = row_ins_set_exclusive_rec_lock(
- LOCK_REC_NOT_GAP, rec,
- cursor->index, offsets, thr);
- } else {
-
- err = row_ins_set_shared_rec_lock(
- LOCK_REC_NOT_GAP, rec,
- cursor->index, offsets, thr);
- }
-
- if (err != DB_SUCCESS) {
- goto func_exit;
- }
-
- if (row_ins_dupl_error_with_rec(
- rec, entry, cursor->index, offsets)) {
- trx->error_info = cursor->index;
- err = DB_DUPLICATE_KEY;
- goto func_exit;
- }
- }
- }
-
- if (cursor->up_match >= n_unique) {
-
- rec = page_rec_get_next(btr_cur_get_rec(cursor));
-
- if (!page_rec_is_supremum(rec)) {
- offsets = rec_get_offsets(rec, cursor->index, offsets,
- ULINT_UNDEFINED, &heap);
-
- if (trx->duplicates & TRX_DUP_IGNORE) {
-
- /* If the SQL-query will update or replace
- duplicate key we will take X-lock for
- duplicates ( REPLACE, LOAD DATAFILE REPLACE,
- INSERT ON DUPLICATE KEY UPDATE). */
-
- err = row_ins_set_exclusive_rec_lock(
- LOCK_REC_NOT_GAP, rec,
- cursor->index, offsets, thr);
- } else {
-
- err = row_ins_set_shared_rec_lock(
- LOCK_REC_NOT_GAP, rec,
- cursor->index, offsets, thr);
- }
-
- if (err != DB_SUCCESS) {
- goto func_exit;
- }
-
- if (row_ins_dupl_error_with_rec(
- rec, entry, cursor->index, offsets)) {
- trx->error_info = cursor->index;
- err = DB_DUPLICATE_KEY;
- goto func_exit;
- }
- }
-
- ut_a(!(cursor->index->type & DICT_CLUSTERED));
- /* This should never happen */
- }
-
- err = DB_SUCCESS;
-func_exit:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(err);
-#else /* UNIV_HOTBACKUP */
- /* This function depends on MySQL code that is not included in
- InnoDB Hot Backup builds. Besides, this function should never
- be called in InnoDB Hot Backup. */
- ut_error;
- return(DB_FAIL);
-#endif /* UNIV_HOTBACKUP */
-}
-
-/*******************************************************************
-Checks if an index entry has long enough common prefix with an existing
-record so that the intended insert of the entry must be changed to a modify of
-the existing record. In the case of a clustered index, the prefix must be
-n_unique fields long, and in the case of a secondary index, all fields must be
-equal. */
-UNIV_INLINE
-ulint
-row_ins_must_modify(
-/*================*/
- /* out: 0 if no update, ROW_INS_PREV if
- previous should be updated; currently we
- do the search so that only the low_match
- record can match enough to the search tuple,
- not the next record */
- btr_cur_t* cursor) /* in: B-tree cursor */
-{
- ulint enough_match;
- rec_t* rec;
-
- /* NOTE: (compare to the note in row_ins_duplicate_error) Because node
- pointers on upper levels of the B-tree may match more to entry than
- to actual user records on the leaf level, we have to check if the
- candidate record is actually a user record. In a clustered index
- node pointers contain index->n_unique first fields, and in the case
- of a secondary index, all fields of the index. */
-
- enough_match = dict_index_get_n_unique_in_tree(cursor->index);
-
- if (cursor->low_match >= enough_match) {
-
- rec = btr_cur_get_rec(cursor);
-
- if (!page_rec_is_infimum(rec)) {
-
- return(ROW_INS_PREV);
- }
- }
-
- return(0);
-}
-
-/*******************************************************************
-Tries to insert an index entry to an index. If the index is clustered
-and a record with the same unique key is found, the other record is
-necessarily marked deleted by a committed transaction, or a unique key
-violation error occurs. The delete marked record is then updated to an
-existing record, and we must write an undo log record on the delete
-marked record. If the index is secondary, and a record with exactly the
-same fields is found, the other record is necessarily marked deleted.
-It is then unmarked. Otherwise, the entry is just inserted to the index. */
-
-ulint
-row_ins_index_entry_low(
-/*====================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT, DB_FAIL
- if pessimistic retry needed, or error code */
- ulint mode, /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
- depending on whether we wish optimistic or
- pessimistic descent down the index tree */
- dict_index_t* index, /* in: index */
- dtuple_t* entry, /* in: index entry to insert */
- ulint* ext_vec,/* in: array containing field numbers of
- externally stored fields in entry, or NULL */
- ulint n_ext_vec,/* in: number of fields in ext_vec */
- que_thr_t* thr) /* in: query thread */
-{
- btr_cur_t cursor;
- ulint ignore_sec_unique = 0;
- ulint modify = 0; /* remove warning */
- rec_t* insert_rec;
- rec_t* rec;
- ulint err;
- ulint n_unique;
- big_rec_t* big_rec = NULL;
- mtr_t mtr;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- log_free_check();
-
- mtr_start(&mtr);
-
- cursor.thr = thr;
-
- /* Note that we use PAGE_CUR_LE as the search mode, because then
- the function will return in both low_match and up_match of the
- cursor sensible values */
-
- if (!(thr_get_trx(thr)->check_unique_secondary)) {
- ignore_sec_unique = BTR_IGNORE_SEC_UNIQUE;
- }
-
- btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
- mode | BTR_INSERT | ignore_sec_unique,
- &cursor, 0, &mtr);
-
- if (cursor.flag == BTR_CUR_INSERT_TO_IBUF) {
- /* The insertion was made to the insert buffer already during
- the search: we are done */
-
- err = DB_SUCCESS;
-
- goto function_exit;
- }
-
-#ifdef UNIV_DEBUG
- {
- page_t* page = btr_cur_get_page(&cursor);
- rec_t* first_rec = page_rec_get_next(
- page_get_infimum_rec(page));
-
- if (UNIV_LIKELY(first_rec != page_get_supremum_rec(page))) {
- ut_a(rec_get_n_fields(first_rec, index)
- == dtuple_get_n_fields(entry));
- }
- }
-#endif
-
- n_unique = dict_index_get_n_unique(index);
-
- if (index->type & DICT_UNIQUE && (cursor.up_match >= n_unique
- || cursor.low_match >= n_unique)) {
-
- if (index->type & DICT_CLUSTERED) {
- /* Note that the following may return also
- DB_LOCK_WAIT */
-
- err = row_ins_duplicate_error_in_clust(
- &cursor, entry, thr, &mtr);
- if (err != DB_SUCCESS) {
-
- goto function_exit;
- }
- } else {
- mtr_commit(&mtr);
- err = row_ins_scan_sec_index_for_duplicate(
- index, entry, thr);
- mtr_start(&mtr);
-
- if (err != DB_SUCCESS) {
-
- goto function_exit;
- }
-
- /* We did not find a duplicate and we have now
- locked with s-locks the necessary records to
- prevent any insertion of a duplicate by another
- transaction. Let us now reposition the cursor and
- continue the insertion. */
-
- btr_cur_search_to_nth_level(index, 0, entry,
- PAGE_CUR_LE,
- mode | BTR_INSERT,
- &cursor, 0, &mtr);
- }
- }
-
- modify = row_ins_must_modify(&cursor);
-
- if (modify != 0) {
- /* There is already an index entry with a long enough common
- prefix, we must convert the insert into a modify of an
- existing record */
-
- if (modify == ROW_INS_NEXT) {
- rec = page_rec_get_next(btr_cur_get_rec(&cursor));
-
- btr_cur_position(index, rec, &cursor);
- }
-
- if (index->type & DICT_CLUSTERED) {
- err = row_ins_clust_index_entry_by_modify(
- mode, &cursor, &big_rec, entry,
- ext_vec, n_ext_vec, thr, &mtr);
- } else {
- err = row_ins_sec_index_entry_by_modify(
- mode, &cursor, entry, thr, &mtr);
- }
-
- } else {
- if (mode == BTR_MODIFY_LEAF) {
- err = btr_cur_optimistic_insert(
- 0, &cursor, entry, &insert_rec, &big_rec,
- thr, &mtr);
- } else {
- ut_a(mode == BTR_MODIFY_TREE);
- if (buf_LRU_buf_pool_running_out()) {
-
- err = DB_LOCK_TABLE_FULL;
-
- goto function_exit;
- }
- err = btr_cur_pessimistic_insert(
- 0, &cursor, entry, &insert_rec, &big_rec,
- thr, &mtr);
- }
-
- if (err == DB_SUCCESS) {
- if (ext_vec) {
- rec_set_field_extern_bits(insert_rec, index,
- ext_vec, n_ext_vec,
- &mtr);
- }
- }
- }
-
-function_exit:
- mtr_commit(&mtr);
-
- if (big_rec) {
- rec_t* rec;
- mtr_start(&mtr);
-
- btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
- BTR_MODIFY_TREE, &cursor, 0, &mtr);
- rec = btr_cur_get_rec(&cursor);
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
-
- err = btr_store_big_rec_extern_fields(index, rec,
- offsets, big_rec, &mtr);
-
- if (modify) {
- dtuple_big_rec_free(big_rec);
- } else {
- dtuple_convert_back_big_rec(index, entry, big_rec);
- }
-
- mtr_commit(&mtr);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(err);
-}
-
-/*******************************************************************
-Inserts an index entry to index. Tries first optimistic, then pessimistic
-descent down the tree. If the entry matches enough to a delete marked record,
-performs the insert by updating or delete unmarking the delete marked
-record. */
-
-ulint
-row_ins_index_entry(
-/*================*/
- /* out: DB_SUCCESS, DB_LOCK_WAIT,
- DB_DUPLICATE_KEY, or some other error code */
- dict_index_t* index, /* in: index */
- dtuple_t* entry, /* in: index entry to insert */
- ulint* ext_vec,/* in: array containing field numbers of
- externally stored fields in entry, or NULL */
- ulint n_ext_vec,/* in: number of fields in ext_vec */
- que_thr_t* thr) /* in: query thread */
-{
- ulint err;
-
- if (UT_LIST_GET_FIRST(index->table->foreign_list)) {
- err = row_ins_check_foreign_constraints(index->table, index,
- entry, thr);
- if (err != DB_SUCCESS) {
-
- return(err);
- }
- }
-
- /* Try first optimistic descent to the B-tree */
-
- err = row_ins_index_entry_low(BTR_MODIFY_LEAF, index, entry,
- ext_vec, n_ext_vec, thr);
- if (err != DB_FAIL) {
-
- return(err);
- }
-
- /* Try then pessimistic descent to the B-tree */
-
- err = row_ins_index_entry_low(BTR_MODIFY_TREE, index, entry,
- ext_vec, n_ext_vec, thr);
- return(err);
-}
-
-/***************************************************************
-Sets the values of the dtuple fields in entry from the values of appropriate
-columns in row. */
-static
-void
-row_ins_index_entry_set_vals(
-/*=========================*/
- dict_index_t* index, /* in: index */
- dtuple_t* entry, /* in: index entry to make */
- dtuple_t* row) /* in: row */
-{
- dict_field_t* ind_field;
- dfield_t* field;
- dfield_t* row_field;
- ulint n_fields;
- ulint i;
-
- ut_ad(entry && row);
-
- n_fields = dtuple_get_n_fields(entry);
-
- for (i = 0; i < n_fields; i++) {
- field = dtuple_get_nth_field(entry, i);
- ind_field = dict_index_get_nth_field(index, i);
-
- row_field = dtuple_get_nth_field(row, ind_field->col->ind);
-
- /* Check column prefix indexes */
- if (ind_field->prefix_len > 0
- && dfield_get_len(row_field) != UNIV_SQL_NULL) {
-
- const dict_col_t* col
- = dict_field_get_col(ind_field);
-
- field->len = dtype_get_at_most_n_mbchars(
- col->prtype, col->mbminlen, col->mbmaxlen,
- ind_field->prefix_len,
- row_field->len, row_field->data);
- } else {
- field->len = row_field->len;
- }
-
- field->data = row_field->data;
- }
-}
-
-/***************************************************************
-Inserts a single index entry to the table. */
-static
-ulint
-row_ins_index_entry_step(
-/*=====================*/
- /* out: DB_SUCCESS if operation successfully
- completed, else error code or DB_LOCK_WAIT */
- ins_node_t* node, /* in: row insert node */
- que_thr_t* thr) /* in: query thread */
-{
- ulint err;
-
- ut_ad(dtuple_check_typed(node->row));
-
- row_ins_index_entry_set_vals(node->index, node->entry, node->row);
-
- ut_ad(dtuple_check_typed(node->entry));
-
- err = row_ins_index_entry(node->index, node->entry, NULL, 0, thr);
-
- return(err);
-}
-
-/***************************************************************
-Allocates a row id for row and inits the node->index field. */
-UNIV_INLINE
-void
-row_ins_alloc_row_id_step(
-/*======================*/
- ins_node_t* node) /* in: row insert node */
-{
- dulint row_id;
-
- ut_ad(node->state == INS_NODE_ALLOC_ROW_ID);
-
- if (dict_table_get_first_index(node->table)->type & DICT_UNIQUE) {
-
- /* No row id is stored if the clustered index is unique */
-
- return;
- }
-
- /* Fill in row id value to row */
-
- row_id = dict_sys_get_new_row_id();
-
- dict_sys_write_row_id(node->row_id_buf, row_id);
-}
-
-/***************************************************************
-Gets a row to insert from the values list. */
-UNIV_INLINE
-void
-row_ins_get_row_from_values(
-/*========================*/
- ins_node_t* node) /* in: row insert node */
-{
- que_node_t* list_node;
- dfield_t* dfield;
- dtuple_t* row;
- ulint i;
-
- /* The field values are copied in the buffers of the select node and
- it is safe to use them until we fetch from select again: therefore
- we can just copy the pointers */
-
- row = node->row;
-
- i = 0;
- list_node = node->values_list;
-
- while (list_node) {
- eval_exp(list_node);
-
- dfield = dtuple_get_nth_field(row, i);
- dfield_copy_data(dfield, que_node_get_val(list_node));
-
- i++;
- list_node = que_node_get_next(list_node);
- }
-}
-
-/***************************************************************
-Gets a row to insert from the select list. */
-UNIV_INLINE
-void
-row_ins_get_row_from_select(
-/*========================*/
- ins_node_t* node) /* in: row insert node */
-{
- que_node_t* list_node;
- dfield_t* dfield;
- dtuple_t* row;
- ulint i;
-
- /* The field values are copied in the buffers of the select node and
- it is safe to use them until we fetch from select again: therefore
- we can just copy the pointers */
-
- row = node->row;
-
- i = 0;
- list_node = node->select->select_list;
-
- while (list_node) {
- dfield = dtuple_get_nth_field(row, i);
- dfield_copy_data(dfield, que_node_get_val(list_node));
-
- i++;
- list_node = que_node_get_next(list_node);
- }
-}
-
-/***************************************************************
-Inserts a row to a table. */
-
-ulint
-row_ins(
-/*====*/
- /* out: DB_SUCCESS if operation successfully
- completed, else error code or DB_LOCK_WAIT */
- ins_node_t* node, /* in: row insert node */
- que_thr_t* thr) /* in: query thread */
-{
- ulint err;
-
- ut_ad(node && thr);
-
- if (node->state == INS_NODE_ALLOC_ROW_ID) {
-
- row_ins_alloc_row_id_step(node);
-
- node->index = dict_table_get_first_index(node->table);
- node->entry = UT_LIST_GET_FIRST(node->entry_list);
-
- if (node->ins_type == INS_SEARCHED) {
-
- row_ins_get_row_from_select(node);
-
- } else if (node->ins_type == INS_VALUES) {
-
- row_ins_get_row_from_values(node);
- }
-
- node->state = INS_NODE_INSERT_ENTRIES;
- }
-
- ut_ad(node->state == INS_NODE_INSERT_ENTRIES);
-
- while (node->index != NULL) {
- err = row_ins_index_entry_step(node, thr);
-
- if (err != DB_SUCCESS) {
-
- return(err);
- }
-
- node->index = dict_table_get_next_index(node->index);
- node->entry = UT_LIST_GET_NEXT(tuple_list, node->entry);
- }
-
- ut_ad(node->entry == NULL);
-
- node->state = INS_NODE_ALLOC_ROW_ID;
-
- return(DB_SUCCESS);
-}
-
-/***************************************************************
-Inserts a row to a table. This is a high-level function used in SQL execution
-graphs. */
-
-que_thr_t*
-row_ins_step(
-/*=========*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr) /* in: query thread */
-{
- ins_node_t* node;
- que_node_t* parent;
- sel_node_t* sel_node;
- trx_t* trx;
- ulint err;
-
- ut_ad(thr);
-
- trx = thr_get_trx(thr);
-
- trx_start_if_not_started(trx);
-
- node = thr->run_node;
-
- ut_ad(que_node_get_type(node) == QUE_NODE_INSERT);
-
- parent = que_node_get_parent(node);
- sel_node = node->select;
-
- if (thr->prev_node == parent) {
- node->state = INS_NODE_SET_IX_LOCK;
- }
-
- /* If this is the first time this node is executed (or when
- execution resumes after wait for the table IX lock), set an
- IX lock on the table and reset the possible select node. MySQL's
- partitioned table code may also call an insert within the same
- SQL statement AFTER it has used this table handle to do a search.
- This happens, for example, when a row update moves it to another
- partition. In that case, we have already set the IX lock on the
- table during the search operation, and there is no need to set
- it again here. But we must write trx->id to node->trx_id_buf. */
-
- trx_write_trx_id(node->trx_id_buf, trx->id);
-
- if (node->state == INS_NODE_SET_IX_LOCK) {
-
- /* It may be that the current session has not yet started
- its transaction, or it has been committed: */
-
- if (UT_DULINT_EQ(trx->id, node->trx_id)) {
- /* No need to do IX-locking */
-
- goto same_trx;
- }
-
- err = lock_table(0, node->table, LOCK_IX, thr);
-
- if (err != DB_SUCCESS) {
-
- goto error_handling;
- }
-
- node->trx_id = trx->id;
-same_trx:
- node->state = INS_NODE_ALLOC_ROW_ID;
-
- if (node->ins_type == INS_SEARCHED) {
- /* Reset the cursor */
- sel_node->state = SEL_NODE_OPEN;
-
- /* Fetch a row to insert */
-
- thr->run_node = sel_node;
-
- return(thr);
- }
- }
-
- if ((node->ins_type == INS_SEARCHED)
- && (sel_node->state != SEL_NODE_FETCH)) {
-
- ut_ad(sel_node->state == SEL_NODE_NO_MORE_ROWS);
-
- /* No more rows to insert */
- thr->run_node = parent;
-
- return(thr);
- }
-
- /* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */
-
- err = row_ins(node, thr);
-
-error_handling:
- trx->error_state = err;
-
- if (err != DB_SUCCESS) {
- /* err == DB_LOCK_WAIT or SQL error detected */
- return(NULL);
- }
-
- /* DO THE TRIGGER ACTIONS HERE */
-
- if (node->ins_type == INS_SEARCHED) {
- /* Fetch a row to insert */
-
- thr->run_node = sel_node;
- } else {
- thr->run_node = que_node_get_parent(node);
- }
-
- return(thr);
-}
diff --git a/storage/innobase/row/row0mysql.c b/storage/innobase/row/row0mysql.c
deleted file mode 100644
index 2d9ed4fc944..00000000000
--- a/storage/innobase/row/row0mysql.c
+++ /dev/null
@@ -1,4199 +0,0 @@
-/******************************************************
-Interface between Innobase row operations and MySQL.
-Contains also create table and other data dictionary operations.
-
-(c) 2000 Innobase Oy
-
-Created 9/17/2000 Heikki Tuuri
-*******************************************************/
-
-#include "row0mysql.h"
-
-#ifdef UNIV_NONINL
-#include "row0mysql.ic"
-#endif
-
-#include "row0ins.h"
-#include "row0sel.h"
-#include "row0upd.h"
-#include "row0row.h"
-#include "que0que.h"
-#include "pars0pars.h"
-#include "dict0dict.h"
-#include "dict0crea.h"
-#include "dict0load.h"
-#include "dict0boot.h"
-#include "trx0roll.h"
-#include "trx0purge.h"
-#include "lock0lock.h"
-#include "rem0cmp.h"
-#include "log0log.h"
-#include "btr0sea.h"
-#include "fil0fil.h"
-#include "ibuf0ibuf.h"
-
-/* A dummy variable used to fool the compiler */
-ibool row_mysql_identically_false = FALSE;
-
-/* Provide optional 4.x backwards compatibility for 5.0 and above */
-ibool row_rollback_on_timeout = FALSE;
-
-/* List of tables we should drop in background. ALTER TABLE in MySQL requires
-that the table handler can drop the table in background when there are no
-queries to it any more. Protected by the kernel mutex. */
-typedef struct row_mysql_drop_struct row_mysql_drop_t;
-struct row_mysql_drop_struct{
- char* table_name;
- UT_LIST_NODE_T(row_mysql_drop_t) row_mysql_drop_list;
-};
-
-UT_LIST_BASE_NODE_T(row_mysql_drop_t) row_mysql_drop_list;
-ibool row_mysql_drop_list_inited = FALSE;
-
-/* Magic table names for invoking various monitor threads */
-static const char S_innodb_monitor[] = "innodb_monitor";
-static const char S_innodb_lock_monitor[] = "innodb_lock_monitor";
-static const char S_innodb_tablespace_monitor[] = "innodb_tablespace_monitor";
-static const char S_innodb_table_monitor[] = "innodb_table_monitor";
-static const char S_innodb_mem_validate[] = "innodb_mem_validate";
-
-/* Evaluates to true if str1 equals str2_onstack, used for comparing
-the above strings. */
-#define STR_EQ(str1, str1_len, str2_onstack) \
- ((str1_len) == sizeof(str2_onstack) \
- && memcmp(str1, str2_onstack, sizeof(str2_onstack)) == 0)
-
-/***********************************************************************
-Determine if the given name is a name reserved for MySQL system tables. */
-static
-ibool
-row_mysql_is_system_table(
-/*======================*/
- /* out: TRUE if name is a MySQL
- system table name */
- const char* name)
-{
- if (strncmp(name, "mysql/", 6) != 0) {
-
- return(FALSE);
- }
-
- return(0 == strcmp(name + 6, "host")
- || 0 == strcmp(name + 6, "user")
- || 0 == strcmp(name + 6, "db"));
-}
-
-/***********************************************************************
-Delays an INSERT, DELETE or UPDATE operation if the purge is lagging. */
-static
-void
-row_mysql_delay_if_needed(void)
-/*===========================*/
-{
- if (srv_dml_needed_delay) {
- os_thread_sleep(srv_dml_needed_delay);
- }
-}
-
-/***********************************************************************
-Frees the blob heap in prebuilt when no longer needed. */
-
-void
-row_mysql_prebuilt_free_blob_heap(
-/*==============================*/
- row_prebuilt_t* prebuilt) /* in: prebuilt struct of a
- ha_innobase:: table handle */
-{
- mem_heap_free(prebuilt->blob_heap);
- prebuilt->blob_heap = NULL;
-}
-
-/***********************************************************************
-Stores a >= 5.0.3 format true VARCHAR length to dest, in the MySQL row
-format. */
-
-byte*
-row_mysql_store_true_var_len(
-/*=========================*/
- /* out: pointer to the data, we skip the 1 or 2 bytes
- at the start that are used to store the len */
- byte* dest, /* in: where to store */
- ulint len, /* in: length, must fit in two bytes */
- ulint lenlen) /* in: storage length of len: either 1 or 2 bytes */
-{
- if (lenlen == 2) {
- ut_a(len < 256 * 256);
-
- mach_write_to_2_little_endian(dest, len);
-
- return(dest + 2);
- }
-
- ut_a(lenlen == 1);
- ut_a(len < 256);
-
- mach_write_to_1(dest, len);
-
- return(dest + 1);
-}
-
-/***********************************************************************
-Reads a >= 5.0.3 format true VARCHAR length, in the MySQL row format, and
-returns a pointer to the data. */
-
-byte*
-row_mysql_read_true_varchar(
-/*========================*/
- /* out: pointer to the data, we skip the 1 or 2 bytes
- at the start that are used to store the len */
- ulint* len, /* out: variable-length field length */
- byte* field, /* in: field in the MySQL format */
- ulint lenlen) /* in: storage length of len: either 1 or 2 bytes */
-{
- if (lenlen == 2) {
- *len = mach_read_from_2_little_endian(field);
-
- return(field + 2);
- }
-
- ut_a(lenlen == 1);
-
- *len = mach_read_from_1(field);
-
- return(field + 1);
-}
-
-/***********************************************************************
-Stores a reference to a BLOB in the MySQL format. */
-
-void
-row_mysql_store_blob_ref(
-/*=====================*/
- byte* dest, /* in: where to store */
- ulint col_len, /* in: dest buffer size: determines into
- how many bytes the BLOB length is stored,
- the space for the length may vary from 1
- to 4 bytes */
- byte* data, /* in: BLOB data; if the value to store
- is SQL NULL this should be NULL pointer */
- ulint len) /* in: BLOB length; if the value to store
- is SQL NULL this should be 0; remember
- also to set the NULL bit in the MySQL record
- header! */
-{
- /* MySQL might assume the field is set to zero except the length and
- the pointer fields */
-
- memset(dest, '\0', col_len);
-
- /* In dest there are 1 - 4 bytes reserved for the BLOB length,
- and after that 8 bytes reserved for the pointer to the data.
- In 32-bit architectures we only use the first 4 bytes of the pointer
- slot. */
-
- ut_a(col_len - 8 > 1 || len < 256);
- ut_a(col_len - 8 > 2 || len < 256 * 256);
- ut_a(col_len - 8 > 3 || len < 256 * 256 * 256);
-
- mach_write_to_n_little_endian(dest, col_len - 8, len);
-
- ut_memcpy(dest + col_len - 8, &data, sizeof(byte*));
-}
-
-/***********************************************************************
-Reads a reference to a BLOB in the MySQL format. */
-
-byte*
-row_mysql_read_blob_ref(
-/*====================*/
- /* out: pointer to BLOB data */
- ulint* len, /* out: BLOB length */
- byte* ref, /* in: BLOB reference in the MySQL format */
- ulint col_len) /* in: BLOB reference length (not BLOB
- length) */
-{
- byte* data;
-
- *len = mach_read_from_n_little_endian(ref, col_len - 8);
-
- ut_memcpy(&data, ref + col_len - 8, sizeof(byte*));
-
- return(data);
-}
-
-/******************************************************************
-Stores a non-SQL-NULL field given in the MySQL format in the InnoDB format.
-The counterpart of this function is row_sel_field_store_in_mysql_format() in
-row0sel.c. */
-
-byte*
-row_mysql_store_col_in_innobase_format(
-/*===================================*/
- /* out: up to which byte we used
- buf in the conversion */
- dfield_t* dfield, /* in/out: dfield where dtype
- information must be already set when
- this function is called! */
- byte* buf, /* in/out: buffer for a converted
- integer value; this must be at least
- col_len long then! */
- ibool row_format_col, /* TRUE if the mysql_data is from
- a MySQL row, FALSE if from a MySQL
- key value;
- in MySQL, a true VARCHAR storage
- format differs in a row and in a
- key value: in a key value the length
- is always stored in 2 bytes! */
- byte* mysql_data, /* in: MySQL column value, not
- SQL NULL; NOTE that dfield may also
- get a pointer to mysql_data,
- therefore do not discard this as long
- as dfield is used! */
- ulint col_len, /* in: MySQL column length; NOTE that
- this is the storage length of the
- column in the MySQL format row, not
- necessarily the length of the actual
- payload data; if the column is a true
- VARCHAR then this is irrelevant */
- ulint comp) /* in: nonzero=compact format */
-{
- byte* ptr = mysql_data;
- dtype_t* dtype;
- ulint type;
- ulint lenlen;
-
- dtype = dfield_get_type(dfield);
-
- type = dtype->mtype;
-
- if (type == DATA_INT) {
- /* Store integer data in Innobase in a big-endian format,
- sign bit negated if the data is a signed integer. In MySQL,
- integers are stored in a little-endian format. */
-
- ptr = buf + col_len;
-
- for (;;) {
- ptr--;
- *ptr = *mysql_data;
- if (ptr == buf) {
- break;
- }
- mysql_data++;
- }
-
- if (!(dtype->prtype & DATA_UNSIGNED)) {
-
- *ptr = (byte) (*ptr ^ 128);
- }
-
- buf += col_len;
- } else if ((type == DATA_VARCHAR
- || type == DATA_VARMYSQL
- || type == DATA_BINARY)) {
-
- if (dtype_get_mysql_type(dtype) == DATA_MYSQL_TRUE_VARCHAR) {
- /* The length of the actual data is stored to 1 or 2
- bytes at the start of the field */
-
- if (row_format_col) {
- if (dtype->prtype & DATA_LONG_TRUE_VARCHAR) {
- lenlen = 2;
- } else {
- lenlen = 1;
- }
- } else {
- /* In a MySQL key value, lenlen is always 2 */
- lenlen = 2;
- }
-
- ptr = row_mysql_read_true_varchar(&col_len, mysql_data,
- lenlen);
- } else {
- /* Remove trailing spaces from old style VARCHAR
- columns. */
-
- /* Handle UCS2 strings differently. */
- ulint mbminlen = dtype_get_mbminlen(dtype);
-
- ptr = mysql_data;
-
- if (mbminlen == 2) {
- /* space=0x0020 */
- /* Trim "half-chars", just in case. */
- col_len &= ~1;
-
- while (col_len >= 2 && ptr[col_len - 2] == 0x00
- && ptr[col_len - 1] == 0x20) {
- col_len -= 2;
- }
- } else {
- ut_a(mbminlen == 1);
- /* space=0x20 */
- while (col_len > 0
- && ptr[col_len - 1] == 0x20) {
- col_len--;
- }
- }
- }
- } else if (comp && type == DATA_MYSQL
- && dtype_get_mbminlen(dtype) == 1
- && dtype_get_mbmaxlen(dtype) > 1) {
- /* In some cases we strip trailing spaces from UTF-8 and other
- multibyte charsets, from FIXED-length CHAR columns, to save
- space. UTF-8 would otherwise normally use 3 * the string length
- bytes to store an ASCII string! */
-
- /* We assume that this CHAR field is encoded in a
- variable-length character set where spaces have
- 1:1 correspondence to 0x20 bytes, such as UTF-8.
-
- Consider a CHAR(n) field, a field of n characters.
- It will contain between n * mbminlen and n * mbmaxlen bytes.
- We will try to truncate it to n bytes by stripping
- space padding. If the field contains single-byte
- characters only, it will be truncated to n characters.
- Consider a CHAR(5) field containing the string ".a "
- where "." denotes a 3-byte character represented by
- the bytes "$%&". After our stripping, the string will
- be stored as "$%&a " (5 bytes). The string ".abc "
- will be stored as "$%&abc" (6 bytes).
-
- The space padding will be restored in row0sel.c, function
- row_sel_field_store_in_mysql_format(). */
-
- ulint n_chars;
-
- ut_a(!(dtype_get_len(dtype) % dtype_get_mbmaxlen(dtype)));
-
- n_chars = dtype_get_len(dtype) / dtype_get_mbmaxlen(dtype);
-
- /* Strip space padding. */
- while (col_len > n_chars && ptr[col_len - 1] == 0x20) {
- col_len--;
- }
- } else if (type == DATA_BLOB && row_format_col) {
-
- ptr = row_mysql_read_blob_ref(&col_len, mysql_data, col_len);
- }
-
- dfield_set_data(dfield, ptr, col_len);
-
- return(buf);
-}
-
-/******************************************************************
-Convert a row in the MySQL format to a row in the Innobase format. Note that
-the function to convert a MySQL format key value to an InnoDB dtuple is
-row_sel_convert_mysql_key_to_innobase() in row0sel.c. */
-static
-void
-row_mysql_convert_row_to_innobase(
-/*==============================*/
- dtuple_t* row, /* in/out: Innobase row where the
- field type information is already
- copied there! */
- row_prebuilt_t* prebuilt, /* in: prebuilt struct where template
- must be of type ROW_MYSQL_WHOLE_ROW */
- byte* mysql_rec) /* in: row in the MySQL format;
- NOTE: do not discard as long as
- row is used, as row may contain
- pointers to this record! */
-{
- mysql_row_templ_t* templ;
- dfield_t* dfield;
- ulint i;
-
- ut_ad(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW);
- ut_ad(prebuilt->mysql_template);
-
- for (i = 0; i < prebuilt->n_template; i++) {
-
- templ = prebuilt->mysql_template + i;
- dfield = dtuple_get_nth_field(row, i);
-
- if (templ->mysql_null_bit_mask != 0) {
- /* Column may be SQL NULL */
-
- if (mysql_rec[templ->mysql_null_byte_offset]
- & (byte) (templ->mysql_null_bit_mask)) {
-
- /* It is SQL NULL */
-
- dfield_set_data(dfield, NULL, UNIV_SQL_NULL);
-
- goto next_column;
- }
- }
-
- row_mysql_store_col_in_innobase_format(
- dfield,
- prebuilt->ins_upd_rec_buff + templ->mysql_col_offset,
- TRUE, /* MySQL row format data */
- mysql_rec + templ->mysql_col_offset,
- templ->mysql_col_len,
- dict_table_is_comp(prebuilt->table));
-next_column:
- ;
- }
-}
-
-/********************************************************************
-Handles user errors and lock waits detected by the database engine. */
-
-ibool
-row_mysql_handle_errors(
-/*====================*/
- /* out: TRUE if it was a lock wait and
- we should continue running the query thread */
- ulint* new_err,/* out: possible new error encountered in
- lock wait, or if no new error, the value
- of trx->error_state at the entry of this
- function */
- trx_t* trx, /* in: transaction */
- que_thr_t* thr, /* in: query thread */
- trx_savept_t* savept) /* in: savepoint or NULL */
-{
-#ifndef UNIV_HOTBACKUP
- ulint err;
-
-handle_new_error:
- err = trx->error_state;
-
- ut_a(err != DB_SUCCESS);
-
- trx->error_state = DB_SUCCESS;
-
- if ((err == DB_DUPLICATE_KEY)
- || (err == DB_FOREIGN_DUPLICATE_KEY)) {
- if (savept) {
- /* Roll back the latest, possibly incomplete
- insertion or update */
-
- trx_general_rollback_for_mysql(trx, TRUE, savept);
- }
- } else if (err == DB_TOO_BIG_RECORD) {
- if (savept) {
- /* Roll back the latest, possibly incomplete
- insertion or update */
-
- trx_general_rollback_for_mysql(trx, TRUE, savept);
- }
- /* MySQL will roll back the latest SQL statement */
- } else if (err == DB_ROW_IS_REFERENCED
- || err == DB_NO_REFERENCED_ROW
- || err == DB_CANNOT_ADD_CONSTRAINT
- || err == DB_TOO_MANY_CONCURRENT_TRXS) {
- if (savept) {
- /* Roll back the latest, possibly incomplete
- insertion or update */
-
- trx_general_rollback_for_mysql(trx, TRUE, savept);
- }
- /* MySQL will roll back the latest SQL statement */
- } else if (err == DB_LOCK_WAIT) {
-
- srv_suspend_mysql_thread(thr);
-
- if (trx->error_state != DB_SUCCESS) {
- que_thr_stop_for_mysql(thr);
-
- goto handle_new_error;
- }
-
- *new_err = err;
-
- return(TRUE);
-
- } else if (err == DB_DEADLOCK
- || err == DB_LOCK_TABLE_FULL
- || (err == DB_LOCK_WAIT_TIMEOUT
- && row_rollback_on_timeout)) {
- /* Roll back the whole transaction; this resolution was added
- to version 3.23.43 */
-
- trx_general_rollback_for_mysql(trx, FALSE, NULL);
-
- } else if (err == DB_OUT_OF_FILE_SPACE
- || err == DB_LOCK_WAIT_TIMEOUT) {
-
- ut_ad(!(err == DB_LOCK_WAIT_TIMEOUT
- && row_rollback_on_timeout));
-
- if (savept) {
- /* Roll back the latest, possibly incomplete
- insertion or update */
-
- trx_general_rollback_for_mysql(trx, TRUE, savept);
- }
- /* MySQL will roll back the latest SQL statement */
-
- } else if (err == DB_MUST_GET_MORE_FILE_SPACE) {
-
- fputs("InnoDB: The database cannot continue"
- " operation because of\n"
- "InnoDB: lack of space. You must add"
- " a new data file to\n"
- "InnoDB: my.cnf and restart the database.\n", stderr);
-
- exit(1);
- } else if (err == DB_CORRUPTION) {
-
- fputs("InnoDB: We detected index corruption"
- " in an InnoDB type table.\n"
- "InnoDB: You have to dump + drop + reimport"
- " the table or, in\n"
- "InnoDB: a case of widespread corruption,"
- " dump all InnoDB\n"
- "InnoDB: tables and recreate the"
- " whole InnoDB tablespace.\n"
- "InnoDB: If the mysqld server crashes"
- " after the startup or when\n"
- "InnoDB: you dump the tables, look at\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "forcing-recovery.html"
- " for help.\n", stderr);
-
- } else {
- fprintf(stderr, "InnoDB: unknown error code %lu\n",
- (ulong) err);
- ut_error;
- }
-
- if (trx->error_state != DB_SUCCESS) {
- *new_err = trx->error_state;
- } else {
- *new_err = err;
- }
-
- trx->error_state = DB_SUCCESS;
-
- return(FALSE);
-#else /* UNIV_HOTBACKUP */
- /* This function depends on MySQL code that is not included in
- InnoDB Hot Backup builds. Besides, this function should never
- be called in InnoDB Hot Backup. */
- ut_error;
- return(FALSE);
-#endif /* UNIV_HOTBACKUP */
-}
-
-/************************************************************************
-Create a prebuilt struct for a MySQL table handle. */
-
-row_prebuilt_t*
-row_create_prebuilt(
-/*================*/
- /* out, own: a prebuilt struct */
- dict_table_t* table) /* in: Innobase table handle */
-{
- row_prebuilt_t* prebuilt;
- mem_heap_t* heap;
- dict_index_t* clust_index;
- dtuple_t* ref;
- ulint ref_len;
- ulint i;
-
- heap = mem_heap_create(128);
-
- prebuilt = mem_heap_alloc(heap, sizeof(row_prebuilt_t));
-
- prebuilt->magic_n = ROW_PREBUILT_ALLOCATED;
- prebuilt->magic_n2 = ROW_PREBUILT_ALLOCATED;
-
- prebuilt->table = table;
-
- prebuilt->trx = NULL;
-
- prebuilt->sql_stat_start = TRUE;
-
- prebuilt->mysql_has_locked = FALSE;
-
- prebuilt->index = NULL;
-
- prebuilt->used_in_HANDLER = FALSE;
-
- prebuilt->n_template = 0;
- prebuilt->mysql_template = NULL;
-
- prebuilt->heap = heap;
- prebuilt->ins_node = NULL;
-
- prebuilt->ins_upd_rec_buff = NULL;
- prebuilt->default_rec = NULL;
-
- prebuilt->upd_node = NULL;
- prebuilt->ins_graph = NULL;
- prebuilt->upd_graph = NULL;
-
- prebuilt->pcur = btr_pcur_create_for_mysql();
- prebuilt->clust_pcur = btr_pcur_create_for_mysql();
-
- prebuilt->select_lock_type = LOCK_NONE;
- prebuilt->stored_select_lock_type = 99999999;
-
- prebuilt->row_read_type = ROW_READ_WITH_LOCKS;
-
- prebuilt->sel_graph = NULL;
-
- prebuilt->search_tuple = dtuple_create(
- heap, 2 * dict_table_get_n_cols(table));
-
- clust_index = dict_table_get_first_index(table);
-
- /* Make sure that search_tuple is long enough for clustered index */
- ut_a(2 * dict_table_get_n_cols(table) >= clust_index->n_fields);
-
- ref_len = dict_index_get_n_unique(clust_index);
-
- ref = dtuple_create(heap, ref_len);
-
- dict_index_copy_types(ref, clust_index, ref_len);
-
- prebuilt->clust_ref = ref;
-
- for (i = 0; i < MYSQL_FETCH_CACHE_SIZE; i++) {
- prebuilt->fetch_cache[i] = NULL;
- }
-
- prebuilt->n_fetch_cached = 0;
-
- prebuilt->blob_heap = NULL;
-
- prebuilt->old_vers_heap = NULL;
-
- prebuilt->autoinc_error = 0;
- prebuilt->autoinc_offset = 0;
-
- /* Default to 1, we will set the actual value later in
- ha_innobase::get_auto_increment(). */
- prebuilt->autoinc_increment = 1;
-
- prebuilt->autoinc_last_value = 0;
-
- return(prebuilt);
-}
-
-/************************************************************************
-Free a prebuilt struct for a MySQL table handle. */
-
-void
-row_prebuilt_free(
-/*==============*/
- row_prebuilt_t* prebuilt) /* in, own: prebuilt struct */
-{
- ulint i;
-
- if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED
- || prebuilt->magic_n2 != ROW_PREBUILT_ALLOCATED) {
- fprintf(stderr,
- "InnoDB: Error: trying to free a corrupt\n"
- "InnoDB: table handle. Magic n %lu,"
- " magic n2 %lu, table name",
- (ulong) prebuilt->magic_n,
- (ulong) prebuilt->magic_n2);
- ut_print_name(stderr, NULL, TRUE, prebuilt->table->name);
- putc('\n', stderr);
-
- mem_analyze_corruption(prebuilt);
-
- ut_error;
- }
-
- prebuilt->magic_n = ROW_PREBUILT_FREED;
- prebuilt->magic_n2 = ROW_PREBUILT_FREED;
-
- btr_pcur_free_for_mysql(prebuilt->pcur);
- btr_pcur_free_for_mysql(prebuilt->clust_pcur);
-
- if (prebuilt->mysql_template) {
- mem_free(prebuilt->mysql_template);
- }
-
- if (prebuilt->ins_graph) {
- que_graph_free_recursive(prebuilt->ins_graph);
- }
-
- if (prebuilt->sel_graph) {
- que_graph_free_recursive(prebuilt->sel_graph);
- }
-
- if (prebuilt->upd_graph) {
- que_graph_free_recursive(prebuilt->upd_graph);
- }
-
- if (prebuilt->blob_heap) {
- mem_heap_free(prebuilt->blob_heap);
- }
-
- if (prebuilt->old_vers_heap) {
- mem_heap_free(prebuilt->old_vers_heap);
- }
-
- for (i = 0; i < MYSQL_FETCH_CACHE_SIZE; i++) {
- if (prebuilt->fetch_cache[i] != NULL) {
-
- if ((ROW_PREBUILT_FETCH_MAGIC_N != mach_read_from_4(
- (prebuilt->fetch_cache[i]) - 4))
- || (ROW_PREBUILT_FETCH_MAGIC_N != mach_read_from_4(
- (prebuilt->fetch_cache[i])
- + prebuilt->mysql_row_len))) {
- fputs("InnoDB: Error: trying to free"
- " a corrupt fetch buffer.\n", stderr);
-
- mem_analyze_corruption(
- prebuilt->fetch_cache[i]);
-
- ut_error;
- }
-
- mem_free((prebuilt->fetch_cache[i]) - 4);
- }
- }
-
- dict_table_decrement_handle_count(prebuilt->table);
-
- mem_heap_free(prebuilt->heap);
-}
-
-/*************************************************************************
-Updates the transaction pointers in query graphs stored in the prebuilt
-struct. */
-
-void
-row_update_prebuilt_trx(
-/*====================*/
- /* out: prebuilt dtuple */
- row_prebuilt_t* prebuilt, /* in: prebuilt struct in MySQL
- handle */
- trx_t* trx) /* in: transaction handle */
-{
- if (trx->magic_n != TRX_MAGIC_N) {
- fprintf(stderr,
- "InnoDB: Error: trying to use a corrupt\n"
- "InnoDB: trx handle. Magic n %lu\n",
- (ulong) trx->magic_n);
-
- mem_analyze_corruption(trx);
-
- ut_error;
- }
-
- if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED) {
- fprintf(stderr,
- "InnoDB: Error: trying to use a corrupt\n"
- "InnoDB: table handle. Magic n %lu, table name",
- (ulong) prebuilt->magic_n);
- ut_print_name(stderr, NULL, TRUE, prebuilt->table->name);
- putc('\n', stderr);
-
- mem_analyze_corruption(prebuilt);
-
- ut_error;
- }
-
- prebuilt->trx = trx;
-
- if (prebuilt->ins_graph) {
- prebuilt->ins_graph->trx = trx;
- }
-
- if (prebuilt->upd_graph) {
- prebuilt->upd_graph->trx = trx;
- }
-
- if (prebuilt->sel_graph) {
- prebuilt->sel_graph->trx = trx;
- }
-}
-
-/*************************************************************************
-Gets pointer to a prebuilt dtuple used in insertions. If the insert graph
-has not yet been built in the prebuilt struct, then this function first
-builds it. */
-static
-dtuple_t*
-row_get_prebuilt_insert_row(
-/*========================*/
- /* out: prebuilt dtuple; the column
- type information is also set in it */
- row_prebuilt_t* prebuilt) /* in: prebuilt struct in MySQL
- handle */
-{
- ins_node_t* node;
- dtuple_t* row;
- dict_table_t* table = prebuilt->table;
- ulint i;
-
- ut_ad(prebuilt && table && prebuilt->trx);
-
- if (prebuilt->ins_node == NULL) {
-
- /* Not called before for this handle: create an insert node
- and query graph to the prebuilt struct */
-
- node = ins_node_create(INS_DIRECT, table, prebuilt->heap);
-
- prebuilt->ins_node = node;
-
- if (prebuilt->ins_upd_rec_buff == NULL) {
- prebuilt->ins_upd_rec_buff = mem_heap_alloc(
- prebuilt->heap, prebuilt->mysql_row_len);
- }
-
- row = dtuple_create(prebuilt->heap,
- dict_table_get_n_cols(table));
-
- dict_table_copy_types(row, table);
-
- /* We init the value of every field to the SQL NULL to avoid
- a debug assertion from failing */
-
- for (i = 0; i < dtuple_get_n_fields(row); i++) {
-
- dtuple_get_nth_field(row, i)->len = UNIV_SQL_NULL;
- }
-
- ins_node_set_new_row(node, row);
-
- prebuilt->ins_graph = que_node_get_parent(
- pars_complete_graph_for_exec(node,
- prebuilt->trx,
- prebuilt->heap));
- prebuilt->ins_graph->state = QUE_FORK_ACTIVE;
- }
-
- return(prebuilt->ins_node->row);
-}
-
-/*************************************************************************
-Updates the table modification counter and calculates new estimates
-for table and index statistics if necessary. */
-UNIV_INLINE
-void
-row_update_statistics_if_needed(
-/*============================*/
- dict_table_t* table) /* in: table */
-{
- ulint counter;
-
- counter = table->stat_modified_counter;
-
- table->stat_modified_counter = counter + 1;
-
- /* Calculate new statistics if 1 / 16 of table has been modified
- since the last time a statistics batch was run, or if
- stat_modified_counter > 2 000 000 000 (to avoid wrap-around).
- We calculate statistics at most every 16th round, since we may have
- a counter table which is very small and updated very often. */
-
- if (counter > 2000000000
- || ((ib_longlong)counter > 16 + table->stat_n_rows / 16)) {
-
- dict_update_statistics(table);
- }
-}
-
-/*************************************************************************
-Unlocks an AUTO_INC type lock possibly reserved by trx. */
-
-void
-row_unlock_table_autoinc_for_mysql(
-/*===============================*/
- trx_t* trx) /* in: transaction */
-{
- if (!trx->auto_inc_lock) {
-
- return;
- }
-
- lock_table_unlock_auto_inc(trx);
-}
-
-/*************************************************************************
-Sets an AUTO_INC type lock on the table mentioned in prebuilt. The
-AUTO_INC lock gives exclusive access to the auto-inc counter of the
-table. The lock is reserved only for the duration of an SQL statement.
-It is not compatible with another AUTO_INC or exclusive lock on the
-table. */
-
-int
-row_lock_table_autoinc_for_mysql(
-/*=============================*/
- /* out: error code or DB_SUCCESS */
- row_prebuilt_t* prebuilt) /* in: prebuilt struct in the MySQL
- table handle */
-{
- trx_t* trx = prebuilt->trx;
- ins_node_t* node = prebuilt->ins_node;
- que_thr_t* thr;
- ulint err;
- ibool was_lock_wait;
-
- ut_ad(trx);
- ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
-
- if (trx->auto_inc_lock) {
-
- return(DB_SUCCESS);
- }
-
- trx->op_info = "setting auto-inc lock";
-
- if (node == NULL) {
- row_get_prebuilt_insert_row(prebuilt);
- node = prebuilt->ins_node;
- }
-
- /* We use the insert query graph as the dummy graph needed
- in the lock module call */
-
- thr = que_fork_get_first_thr(prebuilt->ins_graph);
-
- que_thr_move_to_run_state_for_mysql(thr, trx);
-
-run_again:
- thr->run_node = node;
- thr->prev_node = node;
-
- /* It may be that the current session has not yet started
- its transaction, or it has been committed: */
-
- trx_start_if_not_started(trx);
-
- err = lock_table(0, prebuilt->table, LOCK_AUTO_INC, thr);
-
- trx->error_state = err;
-
- if (err != DB_SUCCESS) {
- que_thr_stop_for_mysql(thr);
-
- was_lock_wait = row_mysql_handle_errors(&err, trx, thr, NULL);
-
- if (was_lock_wait) {
- goto run_again;
- }
-
- trx->op_info = "";
-
- return((int) err);
- }
-
- que_thr_stop_for_mysql_no_error(thr, trx);
-
- trx->op_info = "";
-
- return((int) err);
-}
-
-/*************************************************************************
-Sets a table lock on the table mentioned in prebuilt. */
-
-int
-row_lock_table_for_mysql(
-/*=====================*/
- /* out: error code or DB_SUCCESS */
- row_prebuilt_t* prebuilt, /* in: prebuilt struct in the MySQL
- table handle */
- dict_table_t* table, /* in: table to lock, or NULL
- if prebuilt->table should be
- locked as
- prebuilt->select_lock_type */
- ulint mode) /* in: lock mode of table
- (ignored if table==NULL) */
-{
- trx_t* trx = prebuilt->trx;
- que_thr_t* thr;
- ulint err;
- ibool was_lock_wait;
-
- ut_ad(trx);
- ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
-
- trx->op_info = "setting table lock";
-
- if (prebuilt->sel_graph == NULL) {
- /* Build a dummy select query graph */
- row_prebuild_sel_graph(prebuilt);
- }
-
- /* We use the select query graph as the dummy graph needed
- in the lock module call */
-
- thr = que_fork_get_first_thr(prebuilt->sel_graph);
-
- que_thr_move_to_run_state_for_mysql(thr, trx);
-
-run_again:
- thr->run_node = thr;
- thr->prev_node = thr->common.parent;
-
- /* It may be that the current session has not yet started
- its transaction, or it has been committed: */
-
- trx_start_if_not_started(trx);
-
- if (table) {
- err = lock_table(0, table, mode, thr);
- } else {
- err = lock_table(0, prebuilt->table,
- prebuilt->select_lock_type, thr);
- }
-
- trx->error_state = err;
-
- if (err != DB_SUCCESS) {
- que_thr_stop_for_mysql(thr);
-
- was_lock_wait = row_mysql_handle_errors(&err, trx, thr, NULL);
-
- if (was_lock_wait) {
- goto run_again;
- }
-
- trx->op_info = "";
-
- return((int) err);
- }
-
- que_thr_stop_for_mysql_no_error(thr, trx);
-
- trx->op_info = "";
-
- return((int) err);
-}
-
-/*************************************************************************
-Does an insert for MySQL. */
-
-int
-row_insert_for_mysql(
-/*=================*/
- /* out: error code or DB_SUCCESS */
- byte* mysql_rec, /* in: row in the MySQL format */
- row_prebuilt_t* prebuilt) /* in: prebuilt struct in MySQL
- handle */
-{
- trx_savept_t savept;
- que_thr_t* thr;
- ulint err;
- ibool was_lock_wait;
- trx_t* trx = prebuilt->trx;
- ins_node_t* node = prebuilt->ins_node;
-
- ut_ad(trx);
- ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
-
- if (prebuilt->table->ibd_file_missing) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error:\n"
- "InnoDB: MySQL is trying to use a table handle"
- " but the .ibd file for\n"
- "InnoDB: table %s does not exist.\n"
- "InnoDB: Have you deleted the .ibd file"
- " from the database directory under\n"
- "InnoDB: the MySQL datadir, or have you"
- " used DISCARD TABLESPACE?\n"
- "InnoDB: Look from\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "innodb-troubleshooting.html\n"
- "InnoDB: how you can resolve the problem.\n",
- prebuilt->table->name);
- return(DB_ERROR);
- }
-
- if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED) {
- fprintf(stderr,
- "InnoDB: Error: trying to free a corrupt\n"
- "InnoDB: table handle. Magic n %lu, table name",
- (ulong) prebuilt->magic_n);
- ut_print_name(stderr, prebuilt->trx, TRUE,
- prebuilt->table->name);
- putc('\n', stderr);
-
- mem_analyze_corruption(prebuilt);
-
- ut_error;
- }
-
- if (srv_created_new_raw || srv_force_recovery) {
- fputs("InnoDB: A new raw disk partition was initialized or\n"
- "InnoDB: innodb_force_recovery is on: we do not allow\n"
- "InnoDB: database modifications by the user. Shut down\n"
- "InnoDB: mysqld and edit my.cnf so that"
- " newraw is replaced\n"
- "InnoDB: with raw, and innodb_force_... is removed.\n",
- stderr);
-
- return(DB_ERROR);
- }
-
- trx->op_info = "inserting";
-
- row_mysql_delay_if_needed();
-
- trx_start_if_not_started(trx);
-
- if (node == NULL) {
- row_get_prebuilt_insert_row(prebuilt);
- node = prebuilt->ins_node;
- }
-
- row_mysql_convert_row_to_innobase(node->row, prebuilt, mysql_rec);
-
- savept = trx_savept_take(trx);
-
- thr = que_fork_get_first_thr(prebuilt->ins_graph);
-
- if (prebuilt->sql_stat_start) {
- node->state = INS_NODE_SET_IX_LOCK;
- prebuilt->sql_stat_start = FALSE;
- } else {
- node->state = INS_NODE_ALLOC_ROW_ID;
- }
-
- que_thr_move_to_run_state_for_mysql(thr, trx);
-
-run_again:
- thr->run_node = node;
- thr->prev_node = node;
-
- row_ins_step(thr);
-
- err = trx->error_state;
-
- if (err != DB_SUCCESS) {
- que_thr_stop_for_mysql(thr);
-
- /* TODO: what is this? */ thr->lock_state= QUE_THR_LOCK_ROW;
-
- was_lock_wait = row_mysql_handle_errors(&err, trx, thr,
- &savept);
- thr->lock_state= QUE_THR_LOCK_NOLOCK;
-
- if (was_lock_wait) {
- goto run_again;
- }
-
- trx->op_info = "";
-
- return((int) err);
- }
-
- que_thr_stop_for_mysql_no_error(thr, trx);
-
- prebuilt->table->stat_n_rows++;
-
- srv_n_rows_inserted++;
-
- if (prebuilt->table->stat_n_rows == 0) {
- /* Avoid wrap-over */
- prebuilt->table->stat_n_rows--;
- }
-
- row_update_statistics_if_needed(prebuilt->table);
- trx->op_info = "";
-
- return((int) err);
-}
-
-/*************************************************************************
-Builds a dummy query graph used in selects. */
-
-void
-row_prebuild_sel_graph(
-/*===================*/
- row_prebuilt_t* prebuilt) /* in: prebuilt struct in MySQL
- handle */
-{
- sel_node_t* node;
-
- ut_ad(prebuilt && prebuilt->trx);
-
- if (prebuilt->sel_graph == NULL) {
-
- node = sel_node_create(prebuilt->heap);
-
- prebuilt->sel_graph = que_node_get_parent(
- pars_complete_graph_for_exec(node,
- prebuilt->trx,
- prebuilt->heap));
-
- prebuilt->sel_graph->state = QUE_FORK_ACTIVE;
- }
-}
-
-/*************************************************************************
-Creates an query graph node of 'update' type to be used in the MySQL
-interface. */
-
-upd_node_t*
-row_create_update_node_for_mysql(
-/*=============================*/
- /* out, own: update node */
- dict_table_t* table, /* in: table to update */
- mem_heap_t* heap) /* in: mem heap from which allocated */
-{
- upd_node_t* node;
-
- node = upd_node_create(heap);
-
- node->in_mysql_interface = TRUE;
- node->is_delete = FALSE;
- node->searched_update = FALSE;
- node->select_will_do_update = FALSE;
- node->select = NULL;
- node->pcur = btr_pcur_create_for_mysql();
- node->table = table;
-
- node->update = upd_create(dict_table_get_n_cols(table), heap);
-
- node->update_n_fields = dict_table_get_n_cols(table);
-
- UT_LIST_INIT(node->columns);
- node->has_clust_rec_x_lock = TRUE;
- node->cmpl_info = 0;
-
- node->table_sym = NULL;
- node->col_assign_list = NULL;
-
- return(node);
-}
-
-/*************************************************************************
-Gets pointer to a prebuilt update vector used in updates. If the update
-graph has not yet been built in the prebuilt struct, then this function
-first builds it. */
-
-upd_t*
-row_get_prebuilt_update_vector(
-/*===========================*/
- /* out: prebuilt update vector */
- row_prebuilt_t* prebuilt) /* in: prebuilt struct in MySQL
- handle */
-{
- dict_table_t* table = prebuilt->table;
- upd_node_t* node;
-
- ut_ad(prebuilt && table && prebuilt->trx);
-
- if (prebuilt->upd_node == NULL) {
-
- /* Not called before for this handle: create an update node
- and query graph to the prebuilt struct */
-
- node = row_create_update_node_for_mysql(table, prebuilt->heap);
-
- prebuilt->upd_node = node;
-
- prebuilt->upd_graph = que_node_get_parent(
- pars_complete_graph_for_exec(node,
- prebuilt->trx,
- prebuilt->heap));
- prebuilt->upd_graph->state = QUE_FORK_ACTIVE;
- }
-
- return(prebuilt->upd_node->update);
-}
-
-/*************************************************************************
-Does an update or delete of a row for MySQL. */
-
-int
-row_update_for_mysql(
-/*=================*/
- /* out: error code or DB_SUCCESS */
- byte* mysql_rec, /* in: the row to be updated, in
- the MySQL format */
- row_prebuilt_t* prebuilt) /* in: prebuilt struct in MySQL
- handle */
-{
- trx_savept_t savept;
- ulint err;
- que_thr_t* thr;
- ibool was_lock_wait;
- dict_index_t* clust_index;
- /* ulint ref_len; */
- upd_node_t* node;
- dict_table_t* table = prebuilt->table;
- trx_t* trx = prebuilt->trx;
-
- ut_ad(prebuilt && trx);
- ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
- UT_NOT_USED(mysql_rec);
-
- if (prebuilt->table->ibd_file_missing) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error:\n"
- "InnoDB: MySQL is trying to use a table handle"
- " but the .ibd file for\n"
- "InnoDB: table %s does not exist.\n"
- "InnoDB: Have you deleted the .ibd file"
- " from the database directory under\n"
- "InnoDB: the MySQL datadir, or have you"
- " used DISCARD TABLESPACE?\n"
- "InnoDB: Look from\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "innodb-troubleshooting.html\n"
- "InnoDB: how you can resolve the problem.\n",
- prebuilt->table->name);
- return(DB_ERROR);
- }
-
- if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED) {
- fprintf(stderr,
- "InnoDB: Error: trying to free a corrupt\n"
- "InnoDB: table handle. Magic n %lu, table name",
- (ulong) prebuilt->magic_n);
- ut_print_name(stderr, prebuilt->trx, TRUE,
- prebuilt->table->name);
- putc('\n', stderr);
-
- mem_analyze_corruption(prebuilt);
-
- ut_error;
- }
-
- if (srv_created_new_raw || srv_force_recovery) {
- fputs("InnoDB: A new raw disk partition was initialized or\n"
- "InnoDB: innodb_force_recovery is on: we do not allow\n"
- "InnoDB: database modifications by the user. Shut down\n"
- "InnoDB: mysqld and edit my.cnf so that newraw"
- " is replaced\n"
- "InnoDB: with raw, and innodb_force_... is removed.\n",
- stderr);
-
- return(DB_ERROR);
- }
-
- trx->op_info = "updating or deleting";
-
- row_mysql_delay_if_needed();
-
- trx_start_if_not_started(trx);
-
- node = prebuilt->upd_node;
-
- clust_index = dict_table_get_first_index(table);
-
- if (prebuilt->pcur->btr_cur.index == clust_index) {
- btr_pcur_copy_stored_position(node->pcur, prebuilt->pcur);
- } else {
- btr_pcur_copy_stored_position(node->pcur,
- prebuilt->clust_pcur);
- }
-
- ut_a(node->pcur->rel_pos == BTR_PCUR_ON);
-
- /* MySQL seems to call rnd_pos before updating each row it
- has cached: we can get the correct cursor position from
- prebuilt->pcur; NOTE that we cannot build the row reference
- from mysql_rec if the clustered index was automatically
- generated for the table: MySQL does not know anything about
- the row id used as the clustered index key */
-
- savept = trx_savept_take(trx);
-
- thr = que_fork_get_first_thr(prebuilt->upd_graph);
-
- node->state = UPD_NODE_UPDATE_CLUSTERED;
-
- ut_ad(!prebuilt->sql_stat_start);
-
- que_thr_move_to_run_state_for_mysql(thr, trx);
-
-run_again:
- thr->run_node = node;
- thr->prev_node = node;
-
- row_upd_step(thr);
-
- err = trx->error_state;
-
- if (err != DB_SUCCESS) {
- que_thr_stop_for_mysql(thr);
-
- if (err == DB_RECORD_NOT_FOUND) {
- trx->error_state = DB_SUCCESS;
- trx->op_info = "";
-
- return((int) err);
- }
-
- thr->lock_state= QUE_THR_LOCK_ROW;
- was_lock_wait = row_mysql_handle_errors(&err, trx, thr,
- &savept);
- thr->lock_state= QUE_THR_LOCK_NOLOCK;
-
- if (was_lock_wait) {
- goto run_again;
- }
-
- trx->op_info = "";
-
- return((int) err);
- }
-
- que_thr_stop_for_mysql_no_error(thr, trx);
-
- if (node->is_delete) {
- if (prebuilt->table->stat_n_rows > 0) {
- prebuilt->table->stat_n_rows--;
- }
-
- srv_n_rows_deleted++;
- } else {
- srv_n_rows_updated++;
- }
-
- row_update_statistics_if_needed(prebuilt->table);
-
- trx->op_info = "";
-
- return((int) err);
-}
-
-/*************************************************************************
-This can only be used when srv_locks_unsafe_for_binlog is TRUE or
-this session is using a READ COMMITTED isolation level. Before
-calling this function we must use trx_reset_new_rec_lock_info() and
-trx_register_new_rec_lock() to store the information which new record locks
-really were set. This function removes a newly set lock under prebuilt->pcur,
-and also under prebuilt->clust_pcur. Currently, this is only used and tested
-in the case of an UPDATE or a DELETE statement, where the row lock is of the
-LOCK_X type.
-Thus, this implements a 'mini-rollback' that releases the latest record
-locks we set. */
-
-int
-row_unlock_for_mysql(
-/*=================*/
- /* out: error code or DB_SUCCESS */
- row_prebuilt_t* prebuilt, /* in: prebuilt struct in MySQL
- handle */
- ibool has_latches_on_recs)/* TRUE if called so that we have
- the latches on the records under pcur
- and clust_pcur, and we do not need to
- reposition the cursors. */
-{
- btr_pcur_t* pcur = prebuilt->pcur;
- btr_pcur_t* clust_pcur = prebuilt->clust_pcur;
- trx_t* trx = prebuilt->trx;
-
- ut_ad(prebuilt && trx);
- ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
-
- if (UNIV_UNLIKELY
- (!srv_locks_unsafe_for_binlog
- && trx->isolation_level != TRX_ISO_READ_COMMITTED)) {
-
- fprintf(stderr,
- "InnoDB: Error: calling row_unlock_for_mysql though\n"
- "InnoDB: innodb_locks_unsafe_for_binlog is FALSE and\n"
- "InnoDB: this session is not using"
- " READ COMMITTED isolation level.\n");
-
- return(DB_SUCCESS);
- }
-
- trx->op_info = "unlock_row";
-
- if (prebuilt->new_rec_locks >= 1) {
-
- rec_t* rec;
- dict_index_t* index;
- dulint rec_trx_id;
- mtr_t mtr;
-
- mtr_start(&mtr);
-
- /* Restore the cursor position and find the record */
-
- if (!has_latches_on_recs) {
- btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, &mtr);
- }
-
- rec = btr_pcur_get_rec(pcur);
- index = btr_pcur_get_btr_cur(pcur)->index;
-
- if (prebuilt->new_rec_locks >= 2) {
- /* Restore the cursor position and find the record
- in the clustered index. */
-
- if (!has_latches_on_recs) {
- btr_pcur_restore_position(BTR_SEARCH_LEAF,
- clust_pcur, &mtr);
- }
-
- rec = btr_pcur_get_rec(clust_pcur);
- index = btr_pcur_get_btr_cur(clust_pcur)->index;
- }
-
- /* If the record has been modified by this
- transaction, do not unlock it. */
- ut_a(index->type & DICT_CLUSTERED);
-
- if (index->trx_id_offset) {
- rec_trx_id = trx_read_trx_id(rec
- + index->trx_id_offset);
- } else {
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
-
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
-
- rec_trx_id = row_get_rec_trx_id(rec, index, offsets);
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- }
-
- if (ut_dulint_cmp(rec_trx_id, trx->id) != 0) {
- /* We did not update the record: unlock it */
-
- rec = btr_pcur_get_rec(pcur);
- index = btr_pcur_get_btr_cur(pcur)->index;
-
- lock_rec_unlock(trx, rec, prebuilt->select_lock_type);
-
- if (prebuilt->new_rec_locks >= 2) {
- rec = btr_pcur_get_rec(clust_pcur);
- index = btr_pcur_get_btr_cur(clust_pcur)->index;
-
- lock_rec_unlock(trx, rec,
- prebuilt->select_lock_type);
- }
- }
-
- mtr_commit(&mtr);
- }
-
- trx->op_info = "";
-
- return(DB_SUCCESS);
-}
-
-/**************************************************************************
-Does a cascaded delete or set null in a foreign key operation. */
-
-ulint
-row_update_cascade_for_mysql(
-/*=========================*/
- /* out: error code or DB_SUCCESS */
- que_thr_t* thr, /* in: query thread */
- upd_node_t* node, /* in: update node used in the cascade
- or set null operation */
- dict_table_t* table) /* in: table where we do the operation */
-{
- ulint err;
- trx_t* trx;
-
- trx = thr_get_trx(thr);
-run_again:
- thr->run_node = node;
- thr->prev_node = node;
-
- row_upd_step(thr);
-
- err = trx->error_state;
-
- /* Note that the cascade node is a subnode of another InnoDB
- query graph node. We do a normal lock wait in this node, but
- all errors are handled by the parent node. */
-
- if (err == DB_LOCK_WAIT) {
- /* Handle lock wait here */
-
- que_thr_stop_for_mysql(thr);
-
- srv_suspend_mysql_thread(thr);
-
- /* Note that a lock wait may also end in a lock wait timeout,
- or this transaction is picked as a victim in selective
- deadlock resolution */
-
- if (trx->error_state != DB_SUCCESS) {
-
- return(trx->error_state);
- }
-
- /* Retry operation after a normal lock wait */
-
- goto run_again;
- }
-
- if (err != DB_SUCCESS) {
-
- return(err);
- }
-
- if (node->is_delete) {
- if (table->stat_n_rows > 0) {
- table->stat_n_rows--;
- }
-
- srv_n_rows_deleted++;
- } else {
- srv_n_rows_updated++;
- }
-
- row_update_statistics_if_needed(table);
-
- return(err);
-}
-
-/*************************************************************************
-Checks if a table is such that we automatically created a clustered
-index on it (on row id). */
-
-ibool
-row_table_got_default_clust_index(
-/*==============================*/
- dict_table_t* table)
-{
- const dict_index_t* clust_index;
-
- clust_index = dict_table_get_first_index(table);
-
- return(dict_index_get_nth_col(clust_index, 0)->mtype == DATA_SYS);
-}
-
-/*************************************************************************
-Calculates the key number used inside MySQL for an Innobase index. We have
-to take into account if we generated a default clustered index for the table */
-
-ulint
-row_get_mysql_key_number_for_index(
-/*===============================*/
- dict_index_t* index)
-{
- dict_index_t* ind;
- ulint i;
-
- ut_a(index);
-
- i = 0;
- ind = dict_table_get_first_index(index->table);
-
- while (index != ind) {
- ind = dict_table_get_next_index(ind);
- i++;
- }
-
- if (row_table_got_default_clust_index(index->table)) {
- ut_a(i > 0);
- i--;
- }
-
- return(i);
-}
-
-/*************************************************************************
-Locks the data dictionary in shared mode from modifications, for performing
-foreign key check, rollback, or other operation invisible to MySQL. */
-
-void
-row_mysql_freeze_data_dictionary(
-/*=============================*/
- trx_t* trx) /* in: transaction */
-{
- ut_a(trx->dict_operation_lock_mode == 0);
-
- rw_lock_s_lock(&dict_operation_lock);
-
- trx->dict_operation_lock_mode = RW_S_LATCH;
-}
-
-/*************************************************************************
-Unlocks the data dictionary shared lock. */
-
-void
-row_mysql_unfreeze_data_dictionary(
-/*===============================*/
- trx_t* trx) /* in: transaction */
-{
- ut_a(trx->dict_operation_lock_mode == RW_S_LATCH);
-
- rw_lock_s_unlock(&dict_operation_lock);
-
- trx->dict_operation_lock_mode = 0;
-}
-
-/*************************************************************************
-Locks the data dictionary exclusively for performing a table create or other
-data dictionary modification operation. */
-
-void
-row_mysql_lock_data_dictionary(
-/*===========================*/
- trx_t* trx) /* in: transaction */
-{
- ut_a(trx->dict_operation_lock_mode == 0
- || trx->dict_operation_lock_mode == RW_X_LATCH);
-
- /* Serialize data dictionary operations with dictionary mutex:
- no deadlocks or lock waits can occur then in these operations */
-
- rw_lock_x_lock(&dict_operation_lock);
- trx->dict_operation_lock_mode = RW_X_LATCH;
-
- mutex_enter(&(dict_sys->mutex));
-}
-
-/*************************************************************************
-Unlocks the data dictionary exclusive lock. */
-
-void
-row_mysql_unlock_data_dictionary(
-/*=============================*/
- trx_t* trx) /* in: transaction */
-{
- ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
-
- /* Serialize data dictionary operations with dictionary mutex:
- no deadlocks can occur then in these operations */
-
- mutex_exit(&(dict_sys->mutex));
- rw_lock_x_unlock(&dict_operation_lock);
-
- trx->dict_operation_lock_mode = 0;
-}
-
-/*************************************************************************
-Creates a table for MySQL. If the name of the table ends in
-one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
-"innodb_table_monitor", then this will also start the printing of monitor
-output by the master thread. If the table name ends in "innodb_mem_validate",
-InnoDB will try to invoke mem_validate(). */
-
-int
-row_create_table_for_mysql(
-/*=======================*/
- /* out: error code or DB_SUCCESS */
- dict_table_t* table, /* in: table definition */
- trx_t* trx) /* in: transaction handle */
-{
- tab_node_t* node;
- mem_heap_t* heap;
- que_thr_t* thr;
- const char* table_name;
- ulint table_name_len;
- ulint err;
- ulint i;
-
- ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(mutex_own(&(dict_sys->mutex)));
- ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
-
- if (srv_created_new_raw) {
- fputs("InnoDB: A new raw disk partition was initialized:\n"
- "InnoDB: we do not allow database modifications"
- " by the user.\n"
- "InnoDB: Shut down mysqld and edit my.cnf so that newraw"
- " is replaced with raw.\n", stderr);
-
- dict_mem_table_free(table);
- trx_commit_for_mysql(trx);
-
- return(DB_ERROR);
- }
-
- trx->op_info = "creating table";
-
- if (row_mysql_is_system_table(table->name)) {
-
- fprintf(stderr,
- "InnoDB: Error: trying to create a MySQL system"
- " table %s of type InnoDB.\n"
- "InnoDB: MySQL system tables must be"
- " of the MyISAM type!\n",
- table->name);
-
- dict_mem_table_free(table);
- trx_commit_for_mysql(trx);
-
- return(DB_ERROR);
- }
-
- /* Check that no reserved column names are used. */
- for (i = 0; i < dict_table_get_n_user_cols(table); i++) {
- if (dict_col_name_is_reserved(
- dict_table_get_col_name(table, i))) {
-
- dict_mem_table_free(table);
- trx_commit_for_mysql(trx);
-
- return(DB_ERROR);
- }
- }
-
- trx_start_if_not_started(trx);
-
- /* The table name is prefixed with the database name and a '/'.
- Certain table names starting with 'innodb_' have their special
- meaning regardless of the database name. Thus, we need to
- ignore the database name prefix in the comparisons. */
- table_name = strchr(table->name, '/');
- ut_a(table_name);
- table_name++;
- table_name_len = strlen(table_name) + 1;
-
- if (STR_EQ(table_name, table_name_len, S_innodb_monitor)) {
-
- /* Table equals "innodb_monitor":
- start monitor prints */
-
- srv_print_innodb_monitor = TRUE;
-
- /* The lock timeout monitor thread also takes care
- of InnoDB monitor prints */
-
- os_event_set(srv_lock_timeout_thread_event);
- } else if (STR_EQ(table_name, table_name_len,
- S_innodb_lock_monitor)) {
-
- srv_print_innodb_monitor = TRUE;
- srv_print_innodb_lock_monitor = TRUE;
- os_event_set(srv_lock_timeout_thread_event);
- } else if (STR_EQ(table_name, table_name_len,
- S_innodb_tablespace_monitor)) {
-
- srv_print_innodb_tablespace_monitor = TRUE;
- os_event_set(srv_lock_timeout_thread_event);
- } else if (STR_EQ(table_name, table_name_len,
- S_innodb_table_monitor)) {
-
- srv_print_innodb_table_monitor = TRUE;
- os_event_set(srv_lock_timeout_thread_event);
- } else if (STR_EQ(table_name, table_name_len,
- S_innodb_mem_validate)) {
- /* We define here a debugging feature intended for
- developers */
-
- fputs("Validating InnoDB memory:\n"
- "to use this feature you must compile InnoDB with\n"
- "UNIV_MEM_DEBUG defined in univ.i and"
- " the server must be\n"
- "quiet because allocation from a mem heap"
- " is not protected\n"
- "by any semaphore.\n", stderr);
-#ifdef UNIV_MEM_DEBUG
- ut_a(mem_validate());
- fputs("Memory validated\n", stderr);
-#else /* UNIV_MEM_DEBUG */
- fputs("Memory NOT validated (recompile with UNIV_MEM_DEBUG)\n",
- stderr);
-#endif /* UNIV_MEM_DEBUG */
- }
-
- heap = mem_heap_create(512);
-
- trx->dict_operation = TRUE;
-
- node = tab_create_graph_create(table, heap);
-
- thr = pars_complete_graph_for_exec(node, trx, heap);
-
- ut_a(thr == que_fork_start_command(que_node_get_parent(thr)));
- que_run_threads(thr);
-
- err = trx->error_state;
-
- if (err != DB_SUCCESS) {
- /* We have special error handling here */
-
- trx->error_state = DB_SUCCESS;
-
- trx_general_rollback_for_mysql(trx, FALSE, NULL);
-
- if (err == DB_OUT_OF_FILE_SPACE) {
- ut_print_timestamp(stderr);
-
- fputs(" InnoDB: Warning: cannot create table ",
- stderr);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs(" because tablespace full\n", stderr);
-
- if (dict_table_get_low(table->name)) {
-
- row_drop_table_for_mysql(table->name, trx,
- FALSE);
- }
-
- } else if (err == DB_DUPLICATE_KEY) {
- ut_print_timestamp(stderr);
-
- fputs(" InnoDB: Error: table ", stderr);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs(" already exists in InnoDB internal\n"
- "InnoDB: data dictionary. Have you deleted"
- " the .frm file\n"
- "InnoDB: and not used DROP TABLE?"
- " Have you used DROP DATABASE\n"
- "InnoDB: for InnoDB tables in"
- " MySQL version <= 3.23.43?\n"
- "InnoDB: See the Restrictions section"
- " of the InnoDB manual.\n"
- "InnoDB: You can drop the orphaned table"
- " inside InnoDB by\n"
- "InnoDB: creating an InnoDB table with"
- " the same name in another\n"
- "InnoDB: database and copying the .frm file"
- " to the current database.\n"
- "InnoDB: Then MySQL thinks the table exists,"
- " and DROP TABLE will\n"
- "InnoDB: succeed.\n"
- "InnoDB: You can look for further help from\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "innodb-troubleshooting.html\n",
- stderr);
- }
-
- /* We may also get err == DB_ERROR if the .ibd file for the
- table already exists */
-
- trx->error_state = DB_SUCCESS;
- }
-
- que_graph_free((que_t*) que_node_get_parent(thr));
-
- trx->op_info = "";
-
- return((int) err);
-}
-
-/*************************************************************************
-Does an index creation operation for MySQL. TODO: currently failure
-to create an index results in dropping the whole table! This is no problem
-currently as all indexes must be created at the same time as the table. */
-
-int
-row_create_index_for_mysql(
-/*=======================*/
- /* out: error number or DB_SUCCESS */
- dict_index_t* index, /* in: index definition */
- trx_t* trx, /* in: transaction handle */
- const ulint* field_lengths) /* in: if not NULL, must contain
- dict_index_get_n_fields(index)
- actual field lengths for the
- index columns, which are
- then checked for not being too
- large. */
-{
- ind_node_t* node;
- mem_heap_t* heap;
- que_thr_t* thr;
- ulint err;
- ulint i, j;
- ulint len;
- char* table_name;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(mutex_own(&(dict_sys->mutex)));
- ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
-
- trx->op_info = "creating index";
-
- /* Copy the table name because we may want to drop the
- table later, after the index object is freed (inside
- que_run_threads()) and thus index->table_name is not available. */
- table_name = mem_strdup(index->table_name);
-
- trx_start_if_not_started(trx);
-
- /* Check that the same column does not appear twice in the index.
- Starting from 4.0.14, InnoDB should be able to cope with that, but
- safer not to allow them. */
-
- for (i = 0; i < dict_index_get_n_fields(index); i++) {
- for (j = 0; j < i; j++) {
- if (0 == ut_strcmp(
- dict_index_get_nth_field(index, j)->name,
- dict_index_get_nth_field(index, i)->name)) {
-
- ut_print_timestamp(stderr);
-
- fputs(" InnoDB: Error: column ", stderr);
- ut_print_name(stderr, trx, FALSE,
- dict_index_get_nth_field(
- index, i)->name);
- fputs(" appears twice in ", stderr);
- dict_index_name_print(stderr, trx, index);
- fputs("\n"
- "InnoDB: This is not allowed"
- " in InnoDB.\n", stderr);
-
- err = DB_COL_APPEARS_TWICE_IN_INDEX;
-
- goto error_handling;
- }
- }
-
- /* Check also that prefix_len and actual length
- < DICT_MAX_INDEX_COL_LEN */
-
- len = dict_index_get_nth_field(index, i)->prefix_len;
-
- if (field_lengths) {
- len = ut_max(len, field_lengths[i]);
- }
-
- if (len >= DICT_MAX_INDEX_COL_LEN) {
- err = DB_TOO_BIG_RECORD;
-
- goto error_handling;
- }
- }
-
- heap = mem_heap_create(512);
-
- trx->dict_operation = TRUE;
-
- /* Note that the space id where we store the index is inherited from
- the table in dict_build_index_def_step() in dict0crea.c. */
-
- node = ind_create_graph_create(index, heap);
-
- thr = pars_complete_graph_for_exec(node, trx, heap);
-
- ut_a(thr == que_fork_start_command(que_node_get_parent(thr)));
- que_run_threads(thr);
-
- err = trx->error_state;
-
- que_graph_free((que_t*) que_node_get_parent(thr));
-
-error_handling:
- if (err != DB_SUCCESS) {
- /* We have special error handling here */
-
- trx->error_state = DB_SUCCESS;
-
- trx_general_rollback_for_mysql(trx, FALSE, NULL);
-
- row_drop_table_for_mysql(table_name, trx, FALSE);
-
- trx->error_state = DB_SUCCESS;
- }
-
- trx->op_info = "";
-
- mem_free(table_name);
-
- return((int) err);
-}
-
-/*************************************************************************
-Scans a table create SQL string and adds to the data dictionary
-the foreign key constraints declared in the string. This function
-should be called after the indexes for a table have been created.
-Each foreign key constraint must be accompanied with indexes in
-bot participating tables. The indexes are allowed to contain more
-fields than mentioned in the constraint. Check also that foreign key
-constraints which reference this table are ok. */
-
-int
-row_table_add_foreign_constraints(
-/*==============================*/
- /* out: error code or DB_SUCCESS */
- trx_t* trx, /* in: transaction */
- const char* sql_string, /* in: table create statement where
- foreign keys are declared like:
- FOREIGN KEY (a, b) REFERENCES table2(c, d),
- table2 can be written also with the
- database name before it: test.table2 */
- const char* name, /* in: table full name in the
- normalized form
- database_name/table_name */
- ibool reject_fks) /* in: if TRUE, fail with error
- code DB_CANNOT_ADD_CONSTRAINT if
- any foreign keys are found. */
-{
- ulint err;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_a(sql_string);
-
- trx->op_info = "adding foreign keys";
-
- trx_start_if_not_started(trx);
-
- trx->dict_operation = TRUE;
-
- err = dict_create_foreign_constraints(trx, sql_string, name,
- reject_fks);
-
- if (err == DB_SUCCESS) {
- /* Check that also referencing constraints are ok */
- err = dict_load_foreigns(name, TRUE);
- }
-
- if (err != DB_SUCCESS) {
- /* We have special error handling here */
-
- trx->error_state = DB_SUCCESS;
-
- trx_general_rollback_for_mysql(trx, FALSE, NULL);
-
- row_drop_table_for_mysql(name, trx, FALSE);
-
- trx->error_state = DB_SUCCESS;
- }
-
- return((int) err);
-}
-
-/*************************************************************************
-Drops a table for MySQL as a background operation. MySQL relies on Unix
-in ALTER TABLE to the fact that the table handler does not remove the
-table before all handles to it has been removed. Furhermore, the MySQL's
-call to drop table must be non-blocking. Therefore we do the drop table
-as a background operation, which is taken care of by the master thread
-in srv0srv.c. */
-static
-int
-row_drop_table_for_mysql_in_background(
-/*===================================*/
- /* out: error code or DB_SUCCESS */
- const char* name) /* in: table name */
-{
- ulint error;
- trx_t* trx;
-
- trx = trx_allocate_for_background();
-
- /* If the original transaction was dropping a table referenced by
- foreign keys, we must set the following to be able to drop the
- table: */
-
- trx->check_foreigns = FALSE;
-
- /* fputs("InnoDB: Error: Dropping table ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fputs(" in background drop list\n", stderr); */
-
- /* Try to drop the table in InnoDB */
-
- error = row_drop_table_for_mysql(name, trx, FALSE);
-
- /* Flush the log to reduce probability that the .frm files and
- the InnoDB data dictionary get out-of-sync if the user runs
- with innodb_flush_log_at_trx_commit = 0 */
-
- log_buffer_flush_to_disk();
-
- trx_commit_for_mysql(trx);
-
- trx_free_for_background(trx);
-
- return((int) error);
-}
-
-/*************************************************************************
-The master thread in srv0srv.c calls this regularly to drop tables which
-we must drop in background after queries to them have ended. Such lazy
-dropping of tables is needed in ALTER TABLE on Unix. */
-
-ulint
-row_drop_tables_for_mysql_in_background(void)
-/*=========================================*/
- /* out: how many tables dropped
- + remaining tables in list */
-{
- row_mysql_drop_t* drop;
- dict_table_t* table;
- ulint n_tables;
- ulint n_tables_dropped = 0;
-loop:
- mutex_enter(&kernel_mutex);
-
- if (!row_mysql_drop_list_inited) {
-
- UT_LIST_INIT(row_mysql_drop_list);
- row_mysql_drop_list_inited = TRUE;
- }
-
- drop = UT_LIST_GET_FIRST(row_mysql_drop_list);
-
- n_tables = UT_LIST_GET_LEN(row_mysql_drop_list);
-
- mutex_exit(&kernel_mutex);
-
- if (drop == NULL) {
- /* All tables dropped */
-
- return(n_tables + n_tables_dropped);
- }
-
- mutex_enter(&(dict_sys->mutex));
- table = dict_table_get_low(drop->table_name);
- mutex_exit(&(dict_sys->mutex));
-
- if (table == NULL) {
- /* If for some reason the table has already been dropped
- through some other mechanism, do not try to drop it */
-
- goto already_dropped;
- }
-
- if (DB_SUCCESS != row_drop_table_for_mysql_in_background(
- drop->table_name)) {
- /* If the DROP fails for some table, we return, and let the
- main thread retry later */
-
- return(n_tables + n_tables_dropped);
- }
-
- n_tables_dropped++;
-
-already_dropped:
- mutex_enter(&kernel_mutex);
-
- UT_LIST_REMOVE(row_mysql_drop_list, row_mysql_drop_list, drop);
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Dropped table %s in background drop queue.\n",
- drop->table_name);
-
- mem_free(drop->table_name);
-
- mem_free(drop);
-
- mutex_exit(&kernel_mutex);
-
- goto loop;
-}
-
-/*************************************************************************
-Get the background drop list length. NOTE: the caller must own the kernel
-mutex! */
-
-ulint
-row_get_background_drop_list_len_low(void)
-/*======================================*/
- /* out: how many tables in list */
-{
- ut_ad(mutex_own(&kernel_mutex));
-
- if (!row_mysql_drop_list_inited) {
-
- UT_LIST_INIT(row_mysql_drop_list);
- row_mysql_drop_list_inited = TRUE;
- }
-
- return(UT_LIST_GET_LEN(row_mysql_drop_list));
-}
-
-/*************************************************************************
-If a table is not yet in the drop list, adds the table to the list of tables
-which the master thread drops in background. We need this on Unix because in
-ALTER TABLE MySQL may call drop table even if the table has running queries on
-it. Also, if there are running foreign key checks on the table, we drop the
-table lazily. */
-static
-ibool
-row_add_table_to_background_drop_list(
-/*==================================*/
- /* out: TRUE if the table was not yet in the
- drop list, and was added there */
- dict_table_t* table) /* in: table */
-{
- row_mysql_drop_t* drop;
-
- mutex_enter(&kernel_mutex);
-
- if (!row_mysql_drop_list_inited) {
-
- UT_LIST_INIT(row_mysql_drop_list);
- row_mysql_drop_list_inited = TRUE;
- }
-
- /* Look if the table already is in the drop list */
- drop = UT_LIST_GET_FIRST(row_mysql_drop_list);
-
- while (drop != NULL) {
- if (strcmp(drop->table_name, table->name) == 0) {
- /* Already in the list */
-
- mutex_exit(&kernel_mutex);
-
- return(FALSE);
- }
-
- drop = UT_LIST_GET_NEXT(row_mysql_drop_list, drop);
- }
-
- drop = mem_alloc(sizeof(row_mysql_drop_t));
-
- drop->table_name = mem_strdup(table->name);
-
- UT_LIST_ADD_LAST(row_mysql_drop_list, row_mysql_drop_list, drop);
-
- /* fputs("InnoDB: Adding table ", stderr);
- ut_print_name(stderr, trx, TRUE, drop->table_name);
- fputs(" to background drop list\n", stderr); */
-
- mutex_exit(&kernel_mutex);
-
- return(TRUE);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*************************************************************************
-Discards the tablespace of a table which stored in an .ibd file. Discarding
-means that this function deletes the .ibd file and assigns a new table id for
-the table. Also the flag table->ibd_file_missing is set TRUE. */
-
-int
-row_discard_tablespace_for_mysql(
-/*=============================*/
- /* out: error code or DB_SUCCESS */
- const char* name, /* in: table name */
- trx_t* trx) /* in: transaction handle */
-{
- dict_foreign_t* foreign;
- dulint new_id;
- dict_table_t* table;
- ibool success;
- ulint err;
- pars_info_t* info = NULL;
-
- /* How do we prevent crashes caused by ongoing operations on
- the table? Old operations could try to access non-existent
- pages.
-
- 1) SQL queries, INSERT, SELECT, ...: we must get an exclusive
- MySQL table lock on the table before we can do DISCARD
- TABLESPACE. Then there are no running queries on the table.
-
- 2) Purge and rollback: we assign a new table id for the
- table. Since purge and rollback look for the table based on
- the table id, they see the table as 'dropped' and discard
- their operations.
-
- 3) Insert buffer: we remove all entries for the tablespace in
- the insert buffer tree; as long as the tablespace mem object
- does not exist, ongoing insert buffer page merges are
- discarded in buf0rea.c. If we recreate the tablespace mem
- object with IMPORT TABLESPACE later, then the tablespace will
- have the same id, but the tablespace_version field in the mem
- object is different, and ongoing old insert buffer page merges
- get discarded.
-
- 4) Linear readahead and random readahead: we use the same
- method as in 3) to discard ongoing operations.
-
- 5) FOREIGN KEY operations: if
- table->n_foreign_key_checks_running > 0, we do not allow the
- discard. We also reserve the data dictionary latch. */
-
- ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
-
- trx->op_info = "discarding tablespace";
- trx_start_if_not_started(trx);
-
- /* Serialize data dictionary operations with dictionary mutex:
- no deadlocks can occur then in these operations */
-
- row_mysql_lock_data_dictionary(trx);
-
- table = dict_table_get_low(name);
-
- if (!table) {
- err = DB_TABLE_NOT_FOUND;
-
- goto funct_exit;
- }
-
- if (table->space == 0) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: table ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fputs("\n"
- "InnoDB: is in the system tablespace 0"
- " which cannot be discarded\n", stderr);
- err = DB_ERROR;
-
- goto funct_exit;
- }
-
- if (table->n_foreign_key_checks_running > 0) {
-
- ut_print_timestamp(stderr);
- fputs(" InnoDB: You are trying to DISCARD table ", stderr);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs("\n"
- "InnoDB: though there is a foreign key check"
- " running on it.\n"
- "InnoDB: Cannot discard the table.\n",
- stderr);
-
- err = DB_ERROR;
-
- goto funct_exit;
- }
-
- /* Check if the table is referenced by foreign key constraints from
- some other table (not the table itself) */
-
- foreign = UT_LIST_GET_FIRST(table->referenced_list);
-
- while (foreign && foreign->foreign_table == table) {
- foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
- }
-
- if (foreign && trx->check_foreigns) {
-
- FILE* ef = dict_foreign_err_file;
-
- /* We only allow discarding a referenced table if
- FOREIGN_KEY_CHECKS is set to 0 */
-
- err = DB_CANNOT_DROP_CONSTRAINT;
-
- mutex_enter(&dict_foreign_err_mutex);
- rewind(ef);
- ut_print_timestamp(ef);
-
- fputs(" Cannot DISCARD table ", ef);
- ut_print_name(ef, trx, TRUE, name);
- fputs("\n"
- "because it is referenced by ", ef);
- ut_print_name(ef, trx, TRUE, foreign->foreign_table_name);
- putc('\n', ef);
- mutex_exit(&dict_foreign_err_mutex);
-
- goto funct_exit;
- }
-
- new_id = dict_hdr_get_new_id(DICT_HDR_TABLE_ID);
-
- /* Remove all locks except the table-level S and X locks. */
- lock_remove_all_on_table(table, FALSE);
-
- info = pars_info_create();
-
- pars_info_add_str_literal(info, "table_name", name);
- pars_info_add_dulint_literal(info, "new_id", new_id);
-
- err = que_eval_sql(info,
- "PROCEDURE DISCARD_TABLESPACE_PROC () IS\n"
- "old_id CHAR;\n"
- "BEGIN\n"
- "SELECT ID INTO old_id\n"
- "FROM SYS_TABLES\n"
- "WHERE NAME = :table_name\n"
- "LOCK IN SHARE MODE;\n"
- "IF (SQL % NOTFOUND) THEN\n"
- " COMMIT WORK;\n"
- " RETURN;\n"
- "END IF;\n"
- "UPDATE SYS_TABLES SET ID = :new_id\n"
- " WHERE ID = old_id;\n"
- "UPDATE SYS_COLUMNS SET TABLE_ID = :new_id\n"
- " WHERE TABLE_ID = old_id;\n"
- "UPDATE SYS_INDEXES SET TABLE_ID = :new_id\n"
- " WHERE TABLE_ID = old_id;\n"
- "COMMIT WORK;\n"
- "END;\n"
- , FALSE, trx);
-
- if (err != DB_SUCCESS) {
- trx->error_state = DB_SUCCESS;
- trx_general_rollback_for_mysql(trx, FALSE, NULL);
- trx->error_state = DB_SUCCESS;
- } else {
- dict_table_change_id_in_cache(table, new_id);
-
- success = fil_discard_tablespace(table->space);
-
- if (!success) {
- trx->error_state = DB_SUCCESS;
- trx_general_rollback_for_mysql(trx, FALSE, NULL);
- trx->error_state = DB_SUCCESS;
-
- err = DB_ERROR;
- } else {
- /* Set the flag which tells that now it is legal to
- IMPORT a tablespace for this table */
- table->tablespace_discarded = TRUE;
- table->ibd_file_missing = TRUE;
- }
- }
-
-funct_exit:
- trx_commit_for_mysql(trx);
-
- row_mysql_unlock_data_dictionary(trx);
-
- trx->op_info = "";
-
- return((int) err);
-}
-
-/*********************************************************************
-Imports a tablespace. The space id in the .ibd file must match the space id
-of the table in the data dictionary. */
-
-int
-row_import_tablespace_for_mysql(
-/*============================*/
- /* out: error code or DB_SUCCESS */
- const char* name, /* in: table name */
- trx_t* trx) /* in: transaction handle */
-{
- dict_table_t* table;
- ibool success;
- dulint current_lsn;
- ulint err = DB_SUCCESS;
-
- ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
-
- trx_start_if_not_started(trx);
-
- trx->op_info = "importing tablespace";
-
- current_lsn = log_get_lsn();
-
- /* It is possible, though very improbable, that the lsn's in the
- tablespace to be imported have risen above the current system lsn, if
- a lengthy purge, ibuf merge, or rollback was performed on a backup
- taken with ibbackup. If that is the case, reset page lsn's in the
- file. We assume that mysqld was shut down after it performed these
- cleanup operations on the .ibd file, so that it stamped the latest lsn
- to the FIL_PAGE_FILE_FLUSH_LSN in the first page of the .ibd file.
-
- TODO: reset also the trx id's in clustered index records and write
- a new space id to each data page. That would allow us to import clean
- .ibd files from another MySQL installation. */
-
- success = fil_reset_too_high_lsns(name, current_lsn);
-
- if (!success) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: cannot reset lsn's in table ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fputs("\n"
- "InnoDB: in ALTER TABLE ... IMPORT TABLESPACE\n",
- stderr);
-
- err = DB_ERROR;
-
- row_mysql_lock_data_dictionary(trx);
-
- goto funct_exit;
- }
-
- /* Serialize data dictionary operations with dictionary mutex:
- no deadlocks can occur then in these operations */
-
- row_mysql_lock_data_dictionary(trx);
-
- table = dict_table_get_low(name);
-
- if (!table) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: table ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fputs("\n"
- "InnoDB: does not exist in the InnoDB data dictionary\n"
- "InnoDB: in ALTER TABLE ... IMPORT TABLESPACE\n",
- stderr);
-
- err = DB_TABLE_NOT_FOUND;
-
- goto funct_exit;
- }
-
- if (table->space == 0) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: table ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fputs("\n"
- "InnoDB: is in the system tablespace 0"
- " which cannot be imported\n", stderr);
- err = DB_ERROR;
-
- goto funct_exit;
- }
-
- if (!table->tablespace_discarded) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: you are trying to"
- " IMPORT a tablespace\n"
- "InnoDB: ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fputs(", though you have not called DISCARD on it yet\n"
- "InnoDB: during the lifetime of the mysqld process!\n",
- stderr);
-
- err = DB_ERROR;
-
- goto funct_exit;
- }
-
- /* Play safe and remove all insert buffer entries, though we should
- have removed them already when DISCARD TABLESPACE was called */
-
- ibuf_delete_for_discarded_space(table->space);
-
- success = fil_open_single_table_tablespace(TRUE, table->space,
- table->name);
- if (success) {
- table->ibd_file_missing = FALSE;
- table->tablespace_discarded = FALSE;
- } else {
- if (table->ibd_file_missing) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: cannot find or open in the"
- " database directory the .ibd file of\n"
- "InnoDB: table ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fputs("\n"
- "InnoDB: in ALTER TABLE ... IMPORT TABLESPACE\n",
- stderr);
- }
-
- err = DB_ERROR;
- }
-
-funct_exit:
- trx_commit_for_mysql(trx);
-
- row_mysql_unlock_data_dictionary(trx);
-
- trx->op_info = "";
-
- return((int) err);
-}
-
-/*************************************************************************
-Truncates a table for MySQL. */
-
-int
-row_truncate_table_for_mysql(
-/*=========================*/
- /* out: error code or DB_SUCCESS */
- dict_table_t* table, /* in: table handle */
- trx_t* trx) /* in: transaction handle */
-{
- dict_foreign_t* foreign;
- ulint err;
- mem_heap_t* heap;
- byte* buf;
- dtuple_t* tuple;
- dfield_t* dfield;
- dict_index_t* sys_index;
- btr_pcur_t pcur;
- mtr_t mtr;
- dulint new_id;
- pars_info_t* info = NULL;
-
- /* How do we prevent crashes caused by ongoing operations on
- the table? Old operations could try to access non-existent
- pages.
-
- 1) SQL queries, INSERT, SELECT, ...: we must get an exclusive
- MySQL table lock on the table before we can do TRUNCATE
- TABLE. Then there are no running queries on the table. This is
- guaranteed, because in ha_innobase::store_lock(), we do not
- weaken the TL_WRITE lock requested by MySQL when executing
- SQLCOM_TRUNCATE.
-
- 2) Purge and rollback: we assign a new table id for the
- table. Since purge and rollback look for the table based on
- the table id, they see the table as 'dropped' and discard
- their operations.
-
- 3) Insert buffer: TRUNCATE TABLE is analogous to DROP TABLE,
- so we do not have to remove insert buffer records, as the
- insert buffer works at a low level. If a freed page is later
- reallocated, the allocator will remove the ibuf entries for
- it.
-
- TODO: when we truncate *.ibd files (analogous to DISCARD
- TABLESPACE), we will have to remove we remove all entries for
- the table in the insert buffer tree!
-
- 4) Linear readahead and random readahead: we use the same
- method as in 3) to discard ongoing operations. (This will only
- be relevant for TRUNCATE TABLE by DISCARD TABLESPACE.)
-
- 5) FOREIGN KEY operations: if
- table->n_foreign_key_checks_running > 0, we do not allow the
- TRUNCATE. We also reserve the data dictionary latch. */
-
- ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
- ut_ad(table);
-
- if (srv_created_new_raw) {
- fputs("InnoDB: A new raw disk partition was initialized:\n"
- "InnoDB: we do not allow database modifications"
- " by the user.\n"
- "InnoDB: Shut down mysqld and edit my.cnf so that newraw"
- " is replaced with raw.\n", stderr);
-
- return(DB_ERROR);
- }
-
- trx->op_info = "truncating table";
-
- trx_start_if_not_started(trx);
-
- /* Serialize data dictionary operations with dictionary mutex:
- no deadlocks can occur then in these operations */
-
- ut_a(trx->dict_operation_lock_mode == 0);
- /* Prevent foreign key checks etc. while we are truncating the
- table */
-
- row_mysql_lock_data_dictionary(trx);
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- /* Check if the table is referenced by foreign key constraints from
- some other table (not the table itself) */
-
- foreign = UT_LIST_GET_FIRST(table->referenced_list);
-
- while (foreign && foreign->foreign_table == table) {
- foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
- }
-
- if (foreign && trx->check_foreigns) {
- FILE* ef = dict_foreign_err_file;
-
- /* We only allow truncating a referenced table if
- FOREIGN_KEY_CHECKS is set to 0 */
-
- mutex_enter(&dict_foreign_err_mutex);
- rewind(ef);
- ut_print_timestamp(ef);
-
- fputs(" Cannot truncate table ", ef);
- ut_print_name(ef, trx, TRUE, table->name);
- fputs(" by DROP+CREATE\n"
- "InnoDB: because it is referenced by ", ef);
- ut_print_name(ef, trx, TRUE, foreign->foreign_table_name);
- putc('\n', ef);
- mutex_exit(&dict_foreign_err_mutex);
-
- err = DB_ERROR;
- goto funct_exit;
- }
-
- /* TODO: could we replace the counter n_foreign_key_checks_running
- with lock checks on the table? Acquire here an exclusive lock on the
- table, and rewrite lock0lock.c and the lock wait in srv0srv.c so that
- they can cope with the table having been truncated here? Foreign key
- checks take an IS or IX lock on the table. */
-
- if (table->n_foreign_key_checks_running > 0) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Cannot truncate table ", stderr);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs(" by DROP+CREATE\n"
- "InnoDB: because there is a foreign key check"
- " running on it.\n",
- stderr);
- err = DB_ERROR;
-
- goto funct_exit;
- }
-
- /* Remove all locks except the table-level S and X locks. */
- lock_remove_all_on_table(table, FALSE);
-
- trx->table_id = table->id;
-
- /* scan SYS_INDEXES for all indexes of the table */
- heap = mem_heap_create(800);
-
- tuple = dtuple_create(heap, 1);
- dfield = dtuple_get_nth_field(tuple, 0);
-
- buf = mem_heap_alloc(heap, 8);
- mach_write_to_8(buf, table->id);
-
- dfield_set_data(dfield, buf, 8);
- sys_index = dict_table_get_first_index(dict_sys->sys_indexes);
- dict_index_copy_types(tuple, sys_index, 1);
-
- mtr_start(&mtr);
- btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
- BTR_MODIFY_LEAF, &pcur, &mtr);
- for (;;) {
- rec_t* rec;
- const byte* field;
- ulint len;
- ulint root_page_no;
-
- if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) {
- /* The end of SYS_INDEXES has been reached. */
- break;
- }
-
- rec = btr_pcur_get_rec(&pcur);
-
- field = rec_get_nth_field_old(rec, 0, &len);
- ut_ad(len == 8);
-
- if (memcmp(buf, field, len) != 0) {
- /* End of indexes for the table (TABLE_ID mismatch). */
- break;
- }
-
- if (rec_get_deleted_flag(rec, FALSE)) {
- /* The index has been dropped. */
- goto next_rec;
- }
-
- /* This call may commit and restart mtr
- and reposition pcur. */
- root_page_no = dict_truncate_index_tree(table, &pcur, &mtr);
-
- rec = btr_pcur_get_rec(&pcur);
-
- if (root_page_no != FIL_NULL) {
- page_rec_write_index_page_no(
- rec, DICT_SYS_INDEXES_PAGE_NO_FIELD,
- root_page_no, &mtr);
- /* We will need to commit and restart the
- mini-transaction in order to avoid deadlocks.
- The dict_truncate_index_tree() call has allocated
- a page in this mini-transaction, and the rest of
- this loop could latch another index page. */
- mtr_commit(&mtr);
- mtr_start(&mtr);
- btr_pcur_restore_position(BTR_MODIFY_LEAF,
- &pcur, &mtr);
- }
-
-next_rec:
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
- }
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- mem_heap_free(heap);
-
- new_id = dict_hdr_get_new_id(DICT_HDR_TABLE_ID);
-
- info = pars_info_create();
-
- pars_info_add_dulint_literal(info, "old_id", table->id);
- pars_info_add_dulint_literal(info, "new_id", new_id);
-
- err = que_eval_sql(info,
- "PROCEDURE RENUMBER_TABLESPACE_PROC () IS\n"
- "BEGIN\n"
- "UPDATE SYS_TABLES SET ID = :new_id\n"
- " WHERE ID = :old_id;\n"
- "UPDATE SYS_COLUMNS SET TABLE_ID = :new_id\n"
- " WHERE TABLE_ID = :old_id;\n"
- "UPDATE SYS_INDEXES SET TABLE_ID = :new_id\n"
- " WHERE TABLE_ID = :old_id;\n"
- "COMMIT WORK;\n"
- "END;\n"
- , FALSE, trx);
-
- if (err != DB_SUCCESS) {
- trx->error_state = DB_SUCCESS;
- trx_general_rollback_for_mysql(trx, FALSE, NULL);
- trx->error_state = DB_SUCCESS;
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Unable to assign a new identifier to table ",
- stderr);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs("\n"
- "InnoDB: after truncating it. Background processes"
- " may corrupt the table!\n", stderr);
- err = DB_ERROR;
- } else {
- dict_table_change_id_in_cache(table, new_id);
- }
-
- /* MySQL calls ha_innobase::reset_auto_increment() which does
- the same thing. */
- dict_table_autoinc_lock(table);
- dict_table_autoinc_initialize(table, 1);
- dict_table_autoinc_unlock(table);
- dict_update_statistics(table);
-
- trx_commit_for_mysql(trx);
-
-funct_exit:
-
- row_mysql_unlock_data_dictionary(trx);
-
- trx->op_info = "";
-
- srv_wake_master_thread();
-
- return((int) err);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*************************************************************************
-Drops a table for MySQL. If the name of the dropped table ends in
-one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
-"innodb_table_monitor", then this will also stop the printing of monitor
-output by the master thread. */
-
-int
-row_drop_table_for_mysql(
-/*=====================*/
- /* out: error code or DB_SUCCESS */
- const char* name, /* in: table name */
- trx_t* trx, /* in: transaction handle */
- ibool drop_db)/* in: TRUE=dropping whole database */
-{
- dict_foreign_t* foreign;
- dict_table_t* table;
- ulint space_id;
- ulint err;
- const char* table_name;
- ulint namelen;
- ibool locked_dictionary = FALSE;
- pars_info_t* info = NULL;
-
- ut_a(name != NULL);
-
- if (srv_created_new_raw) {
- fputs("InnoDB: A new raw disk partition was initialized:\n"
- "InnoDB: we do not allow database modifications"
- " by the user.\n"
- "InnoDB: Shut down mysqld and edit my.cnf so that newraw"
- " is replaced with raw.\n", stderr);
-
- return(DB_ERROR);
- }
-
- trx->op_info = "dropping table";
-
- trx_start_if_not_started(trx);
-
- /* The table name is prefixed with the database name and a '/'.
- Certain table names starting with 'innodb_' have their special
- meaning regardless of the database name. Thus, we need to
- ignore the database name prefix in the comparisons. */
- table_name = strchr(name, '/');
- ut_a(table_name);
- table_name++;
- namelen = strlen(table_name) + 1;
-
- if (namelen == sizeof S_innodb_monitor
- && !memcmp(table_name, S_innodb_monitor,
- sizeof S_innodb_monitor)) {
-
- /* Table name equals "innodb_monitor":
- stop monitor prints */
-
- srv_print_innodb_monitor = FALSE;
- srv_print_innodb_lock_monitor = FALSE;
- } else if (namelen == sizeof S_innodb_lock_monitor
- && !memcmp(table_name, S_innodb_lock_monitor,
- sizeof S_innodb_lock_monitor)) {
- srv_print_innodb_monitor = FALSE;
- srv_print_innodb_lock_monitor = FALSE;
- } else if (namelen == sizeof S_innodb_tablespace_monitor
- && !memcmp(table_name, S_innodb_tablespace_monitor,
- sizeof S_innodb_tablespace_monitor)) {
-
- srv_print_innodb_tablespace_monitor = FALSE;
- } else if (namelen == sizeof S_innodb_table_monitor
- && !memcmp(table_name, S_innodb_table_monitor,
- sizeof S_innodb_table_monitor)) {
-
- srv_print_innodb_table_monitor = FALSE;
- }
-
- /* Serialize data dictionary operations with dictionary mutex:
- no deadlocks can occur then in these operations */
-
- if (trx->dict_operation_lock_mode != RW_X_LATCH) {
- /* Prevent foreign key checks etc. while we are dropping the
- table */
-
- row_mysql_lock_data_dictionary(trx);
-
- locked_dictionary = TRUE;
- }
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- table = dict_table_get_low(name);
-
- if (!table) {
- err = DB_TABLE_NOT_FOUND;
- ut_print_timestamp(stderr);
-
- fputs(" InnoDB: Error: table ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fputs(" does not exist in the InnoDB internal\n"
- "InnoDB: data dictionary though MySQL is"
- " trying to drop it.\n"
- "InnoDB: Have you copied the .frm file"
- " of the table to the\n"
- "InnoDB: MySQL database directory"
- " from another database?\n"
- "InnoDB: You can look for further help from\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "innodb-troubleshooting.html\n",
- stderr);
- goto funct_exit;
- }
-
- /* Check if the table is referenced by foreign key constraints from
- some other table (not the table itself) */
-
- foreign = UT_LIST_GET_FIRST(table->referenced_list);
-
- while (foreign && foreign->foreign_table == table) {
-check_next_foreign:
- foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
- }
-
- if (foreign && trx->check_foreigns
- && !(drop_db && dict_tables_have_same_db(
- name, foreign->foreign_table_name))) {
- FILE* ef = dict_foreign_err_file;
-
- /* We only allow dropping a referenced table if
- FOREIGN_KEY_CHECKS is set to 0 */
-
- err = DB_CANNOT_DROP_CONSTRAINT;
-
- mutex_enter(&dict_foreign_err_mutex);
- rewind(ef);
- ut_print_timestamp(ef);
-
- fputs(" Cannot drop table ", ef);
- ut_print_name(ef, trx, TRUE, name);
- fputs("\n"
- "because it is referenced by ", ef);
- ut_print_name(ef, trx, TRUE, foreign->foreign_table_name);
- putc('\n', ef);
- mutex_exit(&dict_foreign_err_mutex);
-
- goto funct_exit;
- }
-
- if (foreign && trx->check_foreigns) {
- goto check_next_foreign;
- }
-
- if (table->n_mysql_handles_opened > 0) {
- ibool added;
-
- added = row_add_table_to_background_drop_list(table);
-
- if (added) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Warning: MySQL is"
- " trying to drop table ", stderr);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs("\n"
- "InnoDB: though there are still"
- " open handles to it.\n"
- "InnoDB: Adding the table to the"
- " background drop queue.\n",
- stderr);
-
- /* We return DB_SUCCESS to MySQL though the drop will
- happen lazily later */
-
- err = DB_SUCCESS;
- } else {
- /* The table is already in the background drop list */
- err = DB_ERROR;
- }
-
- goto funct_exit;
- }
-
- /* TODO: could we replace the counter n_foreign_key_checks_running
- with lock checks on the table? Acquire here an exclusive lock on the
- table, and rewrite lock0lock.c and the lock wait in srv0srv.c so that
- they can cope with the table having been dropped here? Foreign key
- checks take an IS or IX lock on the table. */
-
- if (table->n_foreign_key_checks_running > 0) {
-
- ibool added;
-
- added = row_add_table_to_background_drop_list(table);
-
- if (added) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: You are trying to drop table ",
- stderr);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs("\n"
- "InnoDB: though there is a"
- " foreign key check running on it.\n"
- "InnoDB: Adding the table to"
- " the background drop queue.\n",
- stderr);
-
- /* We return DB_SUCCESS to MySQL though the drop will
- happen lazily later */
-
- err = DB_SUCCESS;
- } else {
- /* The table is already in the background drop list */
- err = DB_ERROR;
- }
-
- goto funct_exit;
- }
-
- /* Remove all locks there are on the table or its records */
- lock_remove_all_on_table(table, TRUE);
-
- trx->dict_operation = TRUE;
- trx->table_id = table->id;
-
- /* We use the private SQL parser of Innobase to generate the
- query graphs needed in deleting the dictionary data from system
- tables in Innobase. Deleting a row from SYS_INDEXES table also
- frees the file segments of the B-tree associated with the index. */
-
- info = pars_info_create();
-
- pars_info_add_str_literal(info, "table_name", name);
-
- err = que_eval_sql(info,
- "PROCEDURE DROP_TABLE_PROC () IS\n"
- "sys_foreign_id CHAR;\n"
- "table_id CHAR;\n"
- "index_id CHAR;\n"
- "foreign_id CHAR;\n"
- "found INT;\n"
- "BEGIN\n"
- "SELECT ID INTO table_id\n"
- "FROM SYS_TABLES\n"
- "WHERE NAME = :table_name\n"
- "LOCK IN SHARE MODE;\n"
- "IF (SQL % NOTFOUND) THEN\n"
- " COMMIT WORK;\n"
- " RETURN;\n"
- "END IF;\n"
- "found := 1;\n"
- "SELECT ID INTO sys_foreign_id\n"
- "FROM SYS_TABLES\n"
- "WHERE NAME = 'SYS_FOREIGN'\n"
- "LOCK IN SHARE MODE;\n"
- "IF (SQL % NOTFOUND) THEN\n"
- " found := 0;\n"
- "END IF;\n"
- "IF (:table_name = 'SYS_FOREIGN') THEN\n"
- " found := 0;\n"
- "END IF;\n"
- "IF (:table_name = 'SYS_FOREIGN_COLS') THEN\n"
- " found := 0;\n"
- "END IF;\n"
- "WHILE found = 1 LOOP\n"
- " SELECT ID INTO foreign_id\n"
- " FROM SYS_FOREIGN\n"
- " WHERE FOR_NAME = :table_name\n"
- " AND TO_BINARY(FOR_NAME)\n"
- " = TO_BINARY(:table_name)\n"
- " LOCK IN SHARE MODE;\n"
- " IF (SQL % NOTFOUND) THEN\n"
- " found := 0;\n"
- " ELSE\n"
- " DELETE FROM SYS_FOREIGN_COLS\n"
- " WHERE ID = foreign_id;\n"
- " DELETE FROM SYS_FOREIGN\n"
- " WHERE ID = foreign_id;\n"
- " END IF;\n"
- "END LOOP;\n"
- "found := 1;\n"
- "WHILE found = 1 LOOP\n"
- " SELECT ID INTO index_id\n"
- " FROM SYS_INDEXES\n"
- " WHERE TABLE_ID = table_id\n"
- " LOCK IN SHARE MODE;\n"
- " IF (SQL % NOTFOUND) THEN\n"
- " found := 0;\n"
- " ELSE\n"
- " DELETE FROM SYS_FIELDS\n"
- " WHERE INDEX_ID = index_id;\n"
- " DELETE FROM SYS_INDEXES\n"
- " WHERE ID = index_id\n"
- " AND TABLE_ID = table_id;\n"
- " END IF;\n"
- "END LOOP;\n"
- "DELETE FROM SYS_COLUMNS\n"
- "WHERE TABLE_ID = table_id;\n"
- "DELETE FROM SYS_TABLES\n"
- "WHERE ID = table_id;\n"
- "COMMIT WORK;\n"
- "END;\n"
- , FALSE, trx);
-
- if (err != DB_SUCCESS) {
- ut_a(err == DB_OUT_OF_FILE_SPACE);
-
- err = DB_MUST_GET_MORE_FILE_SPACE;
-
- row_mysql_handle_errors(&err, trx, NULL, NULL);
-
- ut_error;
- } else {
- ibool is_path;
- const char* name_or_path;
- mem_heap_t* heap;
-
- heap = mem_heap_create(200);
-
- /* Clone the name, in case it has been allocated
- from table->heap, which will be freed by
- dict_table_remove_from_cache(table) below. */
- name = mem_heap_strdup(heap, name);
- space_id = table->space;
-
- if (table->dir_path_of_temp_table != NULL) {
- is_path = TRUE;
- name_or_path = mem_heap_strdup(
- heap, table->dir_path_of_temp_table);
- } else {
- is_path = FALSE;
- name_or_path = name;
- }
-
- dict_table_remove_from_cache(table);
-
- if (dict_load_table(name) != NULL) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: not able to remove table ",
- stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fputs(" from the dictionary cache!\n", stderr);
- err = DB_ERROR;
- }
-
- /* Do not drop possible .ibd tablespace if something went
- wrong: we do not want to delete valuable data of the user */
-
- if (err == DB_SUCCESS && space_id > 0) {
- if (!fil_space_for_table_exists_in_mem(space_id,
- name_or_path,
- is_path,
- FALSE, TRUE)) {
- err = DB_SUCCESS;
-
- fprintf(stderr,
- "InnoDB: We removed now the InnoDB"
- " internal data dictionary entry\n"
- "InnoDB: of table ");
- ut_print_name(stderr, trx, TRUE, name);
- fprintf(stderr, ".\n");
- } else if (!fil_delete_tablespace(space_id)) {
- fprintf(stderr,
- "InnoDB: We removed now the InnoDB"
- " internal data dictionary entry\n"
- "InnoDB: of table ");
- ut_print_name(stderr, trx, TRUE, name);
- fprintf(stderr, ".\n");
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: not able to"
- " delete tablespace %lu of table ",
- (ulong) space_id);
- ut_print_name(stderr, trx, TRUE, name);
- fputs("!\n", stderr);
- err = DB_ERROR;
- }
- }
-
- mem_heap_free(heap);
- }
-funct_exit:
-
- trx_commit_for_mysql(trx);
-
- if (locked_dictionary) {
- row_mysql_unlock_data_dictionary(trx);
- }
-
- trx->op_info = "";
-
-#ifndef UNIV_HOTBACKUP
- srv_wake_master_thread();
-#endif /* !UNIV_HOTBACKUP */
-
- return((int) err);
-}
-
-/***********************************************************************
-Drop all foreign keys in a database, see Bug#18942.
-Called at the end of row_drop_database_for_mysql(). */
-static
-ulint
-drop_all_foreign_keys_in_db(
-/*========================*/
- /* out: error code or DB_SUCCESS */
- const char* name, /* in: database name which ends to '/' */
- trx_t* trx) /* in: transaction handle */
-{
- pars_info_t* pinfo;
- ulint err;
-
- ut_a(name[strlen(name) - 1] == '/');
-
- pinfo = pars_info_create();
-
- pars_info_add_str_literal(pinfo, "dbname", name);
-
-/* true if for_name is not prefixed with dbname */
-#define TABLE_NOT_IN_THIS_DB \
-"SUBSTR(for_name, 0, LENGTH(:dbname)) <> :dbname"
-
- err = que_eval_sql(pinfo,
- "PROCEDURE DROP_ALL_FOREIGN_KEYS_PROC () IS\n"
- "foreign_id CHAR;\n"
- "for_name CHAR;\n"
- "found INT;\n"
- "DECLARE CURSOR cur IS\n"
- "SELECT ID, FOR_NAME FROM SYS_FOREIGN\n"
- "WHERE FOR_NAME >= :dbname\n"
- "LOCK IN SHARE MODE\n"
- "ORDER BY FOR_NAME;\n"
- "BEGIN\n"
- "found := 1;\n"
- "OPEN cur;\n"
- "WHILE found = 1 LOOP\n"
- " FETCH cur INTO foreign_id, for_name;\n"
- " IF (SQL % NOTFOUND) THEN\n"
- " found := 0;\n"
- " ELSIF (" TABLE_NOT_IN_THIS_DB ") THEN\n"
- " found := 0;\n"
- " ELSIF (1=1) THEN\n"
- " DELETE FROM SYS_FOREIGN_COLS\n"
- " WHERE ID = foreign_id;\n"
- " DELETE FROM SYS_FOREIGN\n"
- " WHERE ID = foreign_id;\n"
- " END IF;\n"
- "END LOOP;\n"
- "CLOSE cur;\n"
- "COMMIT WORK;\n"
- "END;\n",
- FALSE, /* do not reserve dict mutex,
- we are already holding it */
- trx);
-
- return(err);
-}
-
-/*************************************************************************
-Drops a database for MySQL. */
-
-int
-row_drop_database_for_mysql(
-/*========================*/
- /* out: error code or DB_SUCCESS */
- const char* name, /* in: database name which ends to '/' */
- trx_t* trx) /* in: transaction handle */
-{
- dict_table_t* table;
- char* table_name;
- int err = DB_SUCCESS;
- ulint namelen = strlen(name);
-
- ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
- ut_a(name != NULL);
- ut_a(name[namelen - 1] == '/');
-
- trx->op_info = "dropping database";
-
- trx_start_if_not_started(trx);
-loop:
- row_mysql_lock_data_dictionary(trx);
-
- while ((table_name = dict_get_first_table_name_in_db(name))) {
- ut_a(memcmp(table_name, name, namelen) == 0);
-
- table = dict_table_get_low(table_name);
-
- ut_a(table);
-
- /* Wait until MySQL does not have any queries running on
- the table */
-
- if (table->n_mysql_handles_opened > 0) {
- row_mysql_unlock_data_dictionary(trx);
-
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Warning: MySQL is trying to"
- " drop database ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fputs("\n"
- "InnoDB: though there are still"
- " open handles to table ", stderr);
- ut_print_name(stderr, trx, TRUE, table_name);
- fputs(".\n", stderr);
-
- os_thread_sleep(1000000);
-
- mem_free(table_name);
-
- goto loop;
- }
-
- err = row_drop_table_for_mysql(table_name, trx, TRUE);
-
- if (err != DB_SUCCESS) {
- fputs("InnoDB: DROP DATABASE ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fprintf(stderr, " failed with error %lu for table ",
- (ulint) err);
- ut_print_name(stderr, trx, TRUE, table_name);
- putc('\n', stderr);
- mem_free(table_name);
- break;
- }
-
- mem_free(table_name);
- }
-
- if (err == DB_SUCCESS) {
- /* after dropping all tables try to drop all leftover
- foreign keys in case orphaned ones exist */
- err = (int) drop_all_foreign_keys_in_db(name, trx);
-
- if (err != DB_SUCCESS) {
- fputs("InnoDB: DROP DATABASE ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fprintf(stderr, " failed with error %d while "
- "dropping all foreign keys", err);
- }
- }
-
- trx_commit_for_mysql(trx);
-
- row_mysql_unlock_data_dictionary(trx);
-
- trx->op_info = "";
-
- return(err);
-}
-
-/*************************************************************************
-Checks if a table name contains the string "/#sql" which denotes temporary
-tables in MySQL. */
-static
-ibool
-row_is_mysql_tmp_table_name(
-/*========================*/
- /* out: TRUE if temporary table */
- const char* name) /* in: table name in the form
- 'database/tablename' */
-{
- return(strstr(name, "/#sql") != NULL);
- /* return(strstr(name, "/@0023sql") != NULL); */
-}
-
-/********************************************************************
-Delete a single constraint. */
-static
-int
-row_delete_constraint_low(
-/*======================*/
- /* out: error code or DB_SUCCESS */
- const char* id, /* in: constraint id */
- trx_t* trx) /* in: transaction handle */
-{
- pars_info_t* info = pars_info_create();
-
- pars_info_add_str_literal(info, "id", id);
-
- return((int) que_eval_sql(info,
- "PROCEDURE DELETE_CONSTRAINT () IS\n"
- "BEGIN\n"
- "DELETE FROM SYS_FOREIGN_COLS WHERE ID = :id;\n"
- "DELETE FROM SYS_FOREIGN WHERE ID = :id;\n"
- "END;\n"
- , FALSE, trx));
-}
-
-/********************************************************************
-Delete a single constraint. */
-static
-int
-row_delete_constraint(
-/*==================*/
- /* out: error code or DB_SUCCESS */
- const char* id, /* in: constraint id */
- const char* database_name, /* in: database name, with the
- trailing '/' */
- mem_heap_t* heap, /* in: memory heap */
- trx_t* trx) /* in: transaction handle */
-{
- ulint err;
-
- /* New format constraints have ids <databasename>/<constraintname>. */
- err = row_delete_constraint_low(
- mem_heap_strcat(heap, database_name, id), trx);
-
- if ((err == DB_SUCCESS) && !strchr(id, '/')) {
- /* Old format < 4.0.18 constraints have constraint ids
- <number>_<number>. We only try deleting them if the
- constraint name does not contain a '/' character, otherwise
- deleting a new format constraint named 'foo/bar' from
- database 'baz' would remove constraint 'bar' from database
- 'foo', if it existed. */
-
- err = row_delete_constraint_low(id, trx);
- }
-
- return((int) err);
-}
-
-/*************************************************************************
-Renames a table for MySQL. */
-
-int
-row_rename_table_for_mysql(
-/*=======================*/
- /* out: error code or DB_SUCCESS */
- const char* old_name, /* in: old table name */
- const char* new_name, /* in: new table name */
- trx_t* trx) /* in: transaction handle */
-{
- dict_table_t* table;
- ulint err;
- mem_heap_t* heap = NULL;
- const char** constraints_to_drop = NULL;
- ulint n_constraints_to_drop = 0;
- ibool old_is_tmp, new_is_tmp;
- pars_info_t* info = NULL;
-
- ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
- ut_a(old_name != NULL);
- ut_a(new_name != NULL);
-
- if (srv_created_new_raw || srv_force_recovery) {
- fputs("InnoDB: A new raw disk partition was initialized or\n"
- "InnoDB: innodb_force_recovery is on: we do not allow\n"
- "InnoDB: database modifications by the user. Shut down\n"
- "InnoDB: mysqld and edit my.cnf so that newraw"
- " is replaced\n"
- "InnoDB: with raw, and innodb_force_... is removed.\n",
- stderr);
-
- trx_commit_for_mysql(trx);
- return(DB_ERROR);
- }
-
- if (row_mysql_is_system_table(new_name)) {
-
- fprintf(stderr,
- "InnoDB: Error: trying to create a MySQL"
- " system table %s of type InnoDB.\n"
- "InnoDB: MySQL system tables must be"
- " of the MyISAM type!\n",
- new_name);
-
- trx_commit_for_mysql(trx);
- return(DB_ERROR);
- }
-
- trx->op_info = "renaming table";
- trx_start_if_not_started(trx);
-
- old_is_tmp = row_is_mysql_tmp_table_name(old_name);
- new_is_tmp = row_is_mysql_tmp_table_name(new_name);
-
- /* Serialize data dictionary operations with dictionary mutex:
- no deadlocks can occur then in these operations */
-
- row_mysql_lock_data_dictionary(trx);
-
- table = dict_table_get_low(old_name);
-
- if (!table) {
- err = DB_TABLE_NOT_FOUND;
- ut_print_timestamp(stderr);
-
- fputs(" InnoDB: Error: table ", stderr);
- ut_print_name(stderr, trx, TRUE, old_name);
- fputs(" does not exist in the InnoDB internal\n"
- "InnoDB: data dictionary though MySQL is"
- " trying to rename the table.\n"
- "InnoDB: Have you copied the .frm file"
- " of the table to the\n"
- "InnoDB: MySQL database directory"
- " from another database?\n"
- "InnoDB: You can look for further help from\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "innodb-troubleshooting.html\n",
- stderr);
- goto funct_exit;
- }
-
- if (table->ibd_file_missing) {
- err = DB_TABLE_NOT_FOUND;
- ut_print_timestamp(stderr);
-
- fputs(" InnoDB: Error: table ", stderr);
- ut_print_name(stderr, trx, TRUE, old_name);
- fputs(" does not have an .ibd file"
- " in the database directory.\n"
- "InnoDB: You can look for further help from\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "innodb-troubleshooting.html\n",
- stderr);
- goto funct_exit;
- }
-
- if (new_is_tmp) {
- /* MySQL is doing an ALTER TABLE command and it renames the
- original table to a temporary table name. We want to preserve
- the original foreign key constraint definitions despite the
- name change. An exception is those constraints for which
- the ALTER TABLE contained DROP FOREIGN KEY <foreign key id>.*/
-
- heap = mem_heap_create(100);
-
- err = dict_foreign_parse_drop_constraints(
- heap, trx, table, &n_constraints_to_drop,
- &constraints_to_drop);
-
- if (err != DB_SUCCESS) {
-
- goto funct_exit;
- }
- }
-
- /* We use the private SQL parser of Innobase to generate the query
- graphs needed in deleting the dictionary data from system tables in
- Innobase. Deleting a row from SYS_INDEXES table also frees the file
- segments of the B-tree associated with the index. */
-
- info = pars_info_create();
-
- pars_info_add_str_literal(info, "new_table_name", new_name);
- pars_info_add_str_literal(info, "old_table_name", old_name);
-
- err = que_eval_sql(info,
- "PROCEDURE RENAME_TABLE () IS\n"
- "BEGIN\n"
- "UPDATE SYS_TABLES SET NAME = :new_table_name\n"
- " WHERE NAME = :old_table_name;\n"
- "END;\n"
- , FALSE, trx);
-
- if (err != DB_SUCCESS) {
-
- goto end;
- }
-
- if (!new_is_tmp) {
- /* Rename all constraints. */
-
- info = pars_info_create();
-
- pars_info_add_str_literal(info, "new_table_name", new_name);
- pars_info_add_str_literal(info, "old_table_name", old_name);
-
- err = que_eval_sql(
- info,
- "PROCEDURE RENAME_CONSTRAINT_IDS () IS\n"
- "gen_constr_prefix CHAR;\n"
- "new_db_name CHAR;\n"
- "foreign_id CHAR;\n"
- "new_foreign_id CHAR;\n"
- "old_db_name_len INT;\n"
- "old_t_name_len INT;\n"
- "new_db_name_len INT;\n"
- "id_len INT;\n"
- "found INT;\n"
- "BEGIN\n"
- "found := 1;\n"
- "old_db_name_len := INSTR(:old_table_name, '/')-1;\n"
- "new_db_name_len := INSTR(:new_table_name, '/')-1;\n"
- "new_db_name := SUBSTR(:new_table_name, 0,\n"
- " new_db_name_len);\n"
- "old_t_name_len := LENGTH(:old_table_name);\n"
- "gen_constr_prefix := CONCAT(:old_table_name,\n"
- " '_ibfk_');\n"
- "WHILE found = 1 LOOP\n"
- " SELECT ID INTO foreign_id\n"
- " FROM SYS_FOREIGN\n"
- " WHERE FOR_NAME = :old_table_name\n"
- " AND TO_BINARY(FOR_NAME)\n"
- " = TO_BINARY(:old_table_name)\n"
- " LOCK IN SHARE MODE;\n"
- " IF (SQL % NOTFOUND) THEN\n"
- " found := 0;\n"
- " ELSE\n"
- " UPDATE SYS_FOREIGN\n"
- " SET FOR_NAME = :new_table_name\n"
- " WHERE ID = foreign_id;\n"
- " id_len := LENGTH(foreign_id);\n"
- " IF (INSTR(foreign_id, '/') > 0) THEN\n"
- " IF (INSTR(foreign_id,\n"
- " gen_constr_prefix) > 0)\n"
- " THEN\n"
- " new_foreign_id :=\n"
- " CONCAT(:new_table_name,\n"
- " SUBSTR(foreign_id, old_t_name_len,\n"
- " id_len - old_t_name_len));\n"
- " ELSE\n"
- " new_foreign_id :=\n"
- " CONCAT(new_db_name,\n"
- " SUBSTR(foreign_id,\n"
- " old_db_name_len,\n"
- " id_len - old_db_name_len));\n"
- " END IF;\n"
- " UPDATE SYS_FOREIGN\n"
- " SET ID = new_foreign_id\n"
- " WHERE ID = foreign_id;\n"
- " UPDATE SYS_FOREIGN_COLS\n"
- " SET ID = new_foreign_id\n"
- " WHERE ID = foreign_id;\n"
- " END IF;\n"
- " END IF;\n"
- "END LOOP;\n"
- "UPDATE SYS_FOREIGN SET REF_NAME = :new_table_name\n"
- "WHERE REF_NAME = :old_table_name\n"
- " AND TO_BINARY(REF_NAME)\n"
- " = TO_BINARY(:old_table_name);\n"
- "END;\n"
- , FALSE, trx);
-
- } else if (n_constraints_to_drop > 0) {
- /* Drop some constraints of tmp tables. */
-
- ulint db_name_len = dict_get_db_name_len(old_name) + 1;
- char* db_name = mem_heap_strdupl(heap, old_name,
- db_name_len);
- ulint i;
-
- for (i = 0; i < n_constraints_to_drop; i++) {
- err = row_delete_constraint(constraints_to_drop[i],
- db_name, heap, trx);
-
- if (err != DB_SUCCESS) {
- break;
- }
- }
- }
-
-end:
- if (err != DB_SUCCESS) {
- if (err == DB_DUPLICATE_KEY) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error; possible reasons:\n"
- "InnoDB: 1) Table rename would cause"
- " two FOREIGN KEY constraints\n"
- "InnoDB: to have the same internal name"
- " in case-insensitive comparison.\n"
- "InnoDB: 2) table ", stderr);
- ut_print_name(stderr, trx, TRUE, new_name);
- fputs(" exists in the InnoDB internal data\n"
- "InnoDB: dictionary though MySQL is"
- " trying to rename table ", stderr);
- ut_print_name(stderr, trx, TRUE, old_name);
- fputs(" to it.\n"
- "InnoDB: Have you deleted the .frm file"
- " and not used DROP TABLE?\n"
- "InnoDB: You can look for further help from\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "innodb-troubleshooting.html\n"
- "InnoDB: If table ", stderr);
- ut_print_name(stderr, trx, TRUE, new_name);
- fputs(" is a temporary table #sql..., then"
- " it can be that\n"
- "InnoDB: there are still queries running"
- " on the table, and it will be\n"
- "InnoDB: dropped automatically when"
- " the queries end.\n"
- "InnoDB: You can drop the orphaned table"
- " inside InnoDB by\n"
- "InnoDB: creating an InnoDB table with"
- " the same name in another\n"
- "InnoDB: database and copying the .frm file"
- " to the current database.\n"
- "InnoDB: Then MySQL thinks the table exists,"
- " and DROP TABLE will\n"
- "InnoDB: succeed.\n", stderr);
- }
- trx->error_state = DB_SUCCESS;
- trx_general_rollback_for_mysql(trx, FALSE, NULL);
- trx->error_state = DB_SUCCESS;
- } else {
- /* The following call will also rename the .ibd data file if
- the table is stored in a single-table tablespace */
-
- ibool success = dict_table_rename_in_cache(table, new_name,
- !new_is_tmp);
-
- if (!success) {
- trx->error_state = DB_SUCCESS;
- trx_general_rollback_for_mysql(trx, FALSE, NULL);
- trx->error_state = DB_SUCCESS;
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error in table rename,"
- " cannot rename ", stderr);
- ut_print_name(stderr, trx, TRUE, old_name);
- fputs(" to ", stderr);
- ut_print_name(stderr, trx, TRUE, new_name);
- putc('\n', stderr);
- err = DB_ERROR;
-
- goto funct_exit;
- }
-
- /* We only want to switch off some of the type checking in
- an ALTER, not in a RENAME. */
-
- err = dict_load_foreigns(
- new_name, old_is_tmp ? trx->check_foreigns : TRUE);
-
- if (err != DB_SUCCESS) {
- ut_print_timestamp(stderr);
-
- if (old_is_tmp) {
- fputs(" InnoDB: Error: in ALTER TABLE ",
- stderr);
- ut_print_name(stderr, trx, TRUE, new_name);
- fputs("\n"
- "InnoDB: has or is referenced"
- " in foreign key constraints\n"
- "InnoDB: which are not compatible"
- " with the new table definition.\n",
- stderr);
- } else {
- fputs(" InnoDB: Error: in RENAME TABLE"
- " table ",
- stderr);
- ut_print_name(stderr, trx, TRUE, new_name);
- fputs("\n"
- "InnoDB: is referenced in"
- " foreign key constraints\n"
- "InnoDB: which are not compatible"
- " with the new table definition.\n",
- stderr);
- }
-
- ut_a(dict_table_rename_in_cache(table,
- old_name, FALSE));
- trx->error_state = DB_SUCCESS;
- trx_general_rollback_for_mysql(trx, FALSE, NULL);
- trx->error_state = DB_SUCCESS;
- }
- }
-
-funct_exit:
- trx_commit_for_mysql(trx);
- row_mysql_unlock_data_dictionary(trx);
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- trx->op_info = "";
-
- return((int) err);
-}
-
-/*************************************************************************
-Checks that the index contains entries in an ascending order, unique
-constraint is not broken, and calculates the number of index entries
-in the read view of the current transaction. */
-static
-ibool
-row_scan_and_check_index(
-/*=====================*/
- /* out: TRUE if ok */
- row_prebuilt_t* prebuilt, /* in: prebuilt struct in MySQL */
- dict_index_t* index, /* in: index */
- ulint* n_rows) /* out: number of entries seen in the
- current consistent read */
-{
- dtuple_t* prev_entry = NULL;
- ulint matched_fields;
- ulint matched_bytes;
- byte* buf;
- ulint ret;
- rec_t* rec;
- ibool is_ok = TRUE;
- int cmp;
- ibool contains_null;
- ulint i;
- ulint cnt;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- *n_rows = 0;
-
- buf = mem_alloc(UNIV_PAGE_SIZE);
- heap = mem_heap_create(100);
-
- /* Make a dummy template in prebuilt, which we will use
- in scanning the index entries */
-
- prebuilt->index = index;
- prebuilt->sql_stat_start = TRUE;
- prebuilt->template_type = ROW_MYSQL_DUMMY_TEMPLATE;
- prebuilt->n_template = 0;
- prebuilt->need_to_access_clustered = FALSE;
-
- dtuple_set_n_fields(prebuilt->search_tuple, 0);
-
- prebuilt->select_lock_type = LOCK_NONE;
- cnt = 1000;
-
- ret = row_search_for_mysql(buf, PAGE_CUR_G, prebuilt, 0, 0);
-loop:
- /* Check thd->killed every 1,000 scanned rows */
- if (--cnt == 0) {
- if (trx_is_interrupted(prebuilt->trx)) {
- goto func_exit;
- }
- cnt = 1000;
- }
- if (ret != DB_SUCCESS) {
-func_exit:
- mem_free(buf);
- mem_heap_free(heap);
-
- return(is_ok);
- }
-
- *n_rows = *n_rows + 1;
-
- /* row_search... returns the index record in buf, record origin offset
- within buf stored in the first 4 bytes, because we have built a dummy
- template */
-
- rec = buf + mach_read_from_4(buf);
-
- if (prev_entry != NULL) {
- matched_fields = 0;
- matched_bytes = 0;
-
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- cmp = cmp_dtuple_rec_with_match(prev_entry, rec, offsets,
- &matched_fields,
- &matched_bytes);
- contains_null = FALSE;
-
- /* In a unique secondary index we allow equal key values if
- they contain SQL NULLs */
-
- for (i = 0;
- i < dict_index_get_n_ordering_defined_by_user(index);
- i++) {
- if (UNIV_SQL_NULL == dfield_get_len(
- dtuple_get_nth_field(prev_entry, i))) {
-
- contains_null = TRUE;
- }
- }
-
- if (cmp > 0) {
- fputs("InnoDB: index records in a wrong order in ",
- stderr);
-not_ok:
- dict_index_name_print(stderr,
- prebuilt->trx, index);
- fputs("\n"
- "InnoDB: prev record ", stderr);
- dtuple_print(stderr, prev_entry);
- fputs("\n"
- "InnoDB: record ", stderr);
- rec_print_new(stderr, rec, offsets);
- putc('\n', stderr);
- is_ok = FALSE;
- } else if ((index->type & DICT_UNIQUE)
- && !contains_null
- && matched_fields
- >= dict_index_get_n_ordering_defined_by_user(
- index)) {
-
- fputs("InnoDB: duplicate key in ", stderr);
- goto not_ok;
- }
- }
-
- mem_heap_empty(heap);
- offsets = offsets_;
-
- prev_entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap);
-
- ret = row_search_for_mysql(buf, PAGE_CUR_G, prebuilt, 0, ROW_SEL_NEXT);
-
- goto loop;
-}
-
-/*************************************************************************
-Checks a table for corruption. */
-
-ulint
-row_check_table_for_mysql(
-/*======================*/
- /* out: DB_ERROR or DB_SUCCESS */
- row_prebuilt_t* prebuilt) /* in: prebuilt struct in MySQL
- handle */
-{
- dict_table_t* table = prebuilt->table;
- dict_index_t* index;
- ulint n_rows;
- ulint n_rows_in_table = ULINT_UNDEFINED;
- ulint ret = DB_SUCCESS;
- ulint old_isolation_level;
-
- if (prebuilt->table->ibd_file_missing) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error:\n"
- "InnoDB: MySQL is trying to use a table handle"
- " but the .ibd file for\n"
- "InnoDB: table %s does not exist.\n"
- "InnoDB: Have you deleted the .ibd file"
- " from the database directory under\n"
- "InnoDB: the MySQL datadir, or have you"
- " used DISCARD TABLESPACE?\n"
- "InnoDB: Look from\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "innodb-troubleshooting.html\n"
- "InnoDB: how you can resolve the problem.\n",
- prebuilt->table->name);
- return(DB_ERROR);
- }
-
- prebuilt->trx->op_info = "checking table";
-
- old_isolation_level = prebuilt->trx->isolation_level;
-
- /* We must run the index record counts at an isolation level
- >= READ COMMITTED, because a dirty read can see a wrong number
- of records in some index; to play safe, we use always
- REPEATABLE READ here */
-
- prebuilt->trx->isolation_level = TRX_ISO_REPEATABLE_READ;
-
- /* Enlarge the fatal lock wait timeout during CHECK TABLE. */
- mutex_enter(&kernel_mutex);
- srv_fatal_semaphore_wait_threshold += 7200; /* 2 hours */
- mutex_exit(&kernel_mutex);
-
- index = dict_table_get_first_index(table);
-
- while (index != NULL) {
- /* fputs("Validating index ", stderr);
- ut_print_name(stderr, trx, FALSE, index->name);
- putc('\n', stderr); */
-
- if (!btr_validate_index(index, prebuilt->trx)) {
- ret = DB_ERROR;
- } else {
- if (!row_scan_and_check_index(prebuilt,
- index, &n_rows)) {
- ret = DB_ERROR;
- }
-
- if (trx_is_interrupted(prebuilt->trx)) {
- break;
- }
-
- /* fprintf(stderr, "%lu entries in index %s\n", n_rows,
- index->name); */
-
- if (index == dict_table_get_first_index(table)) {
- n_rows_in_table = n_rows;
- } else if (n_rows != n_rows_in_table) {
-
- ret = DB_ERROR;
-
- fputs("Error: ", stderr);
- dict_index_name_print(stderr,
- prebuilt->trx, index);
- fprintf(stderr,
- " contains %lu entries,"
- " should be %lu\n",
- (ulong) n_rows,
- (ulong) n_rows_in_table);
- }
- }
-
- index = dict_table_get_next_index(index);
- }
-
- /* Restore the original isolation level */
- prebuilt->trx->isolation_level = old_isolation_level;
-
- /* We validate also the whole adaptive hash index for all tables
- at every CHECK TABLE */
-
- if (!btr_search_validate()) {
-
- ret = DB_ERROR;
- }
-
- /* Restore the fatal lock wait timeout after CHECK TABLE. */
- mutex_enter(&kernel_mutex);
- srv_fatal_semaphore_wait_threshold -= 7200; /* 2 hours */
- mutex_exit(&kernel_mutex);
-
- prebuilt->trx->op_info = "";
-
- return(ret);
-}
-
-/*************************************************************************
-Determines if a table is a magic monitor table. */
-
-ibool
-row_is_magic_monitor_table(
-/*=======================*/
- /* out: TRUE if monitor table */
- const char* table_name) /* in: name of the table, in the
- form database/table_name */
-{
- const char* name; /* table_name without database/ */
- ulint len;
-
- name = strchr(table_name, '/');
- ut_a(name != NULL);
- name++;
- len = strlen(name) + 1;
-
- if (STR_EQ(name, len, S_innodb_monitor)
- || STR_EQ(name, len, S_innodb_lock_monitor)
- || STR_EQ(name, len, S_innodb_tablespace_monitor)
- || STR_EQ(name, len, S_innodb_table_monitor)
- || STR_EQ(name, len, S_innodb_mem_validate)) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
diff --git a/storage/innobase/row/row0purge.c b/storage/innobase/row/row0purge.c
deleted file mode 100644
index 1fef47da13f..00000000000
--- a/storage/innobase/row/row0purge.c
+++ /dev/null
@@ -1,673 +0,0 @@
-/******************************************************
-Purge obsolete records
-
-(c) 1997 Innobase Oy
-
-Created 3/14/1997 Heikki Tuuri
-*******************************************************/
-
-#include "row0purge.h"
-
-#ifdef UNIV_NONINL
-#include "row0purge.ic"
-#endif
-
-#include "fsp0fsp.h"
-#include "mach0data.h"
-#include "trx0rseg.h"
-#include "trx0trx.h"
-#include "trx0roll.h"
-#include "trx0undo.h"
-#include "trx0purge.h"
-#include "trx0rec.h"
-#include "que0que.h"
-#include "row0row.h"
-#include "row0upd.h"
-#include "row0vers.h"
-#include "row0mysql.h"
-#include "log0log.h"
-
-/************************************************************************
-Creates a purge node to a query graph. */
-
-purge_node_t*
-row_purge_node_create(
-/*==================*/
- /* out, own: purge node */
- que_thr_t* parent, /* in: parent node, i.e., a thr node */
- mem_heap_t* heap) /* in: memory heap where created */
-{
- purge_node_t* node;
-
- ut_ad(parent && heap);
-
- node = mem_heap_alloc(heap, sizeof(purge_node_t));
-
- node->common.type = QUE_NODE_PURGE;
- node->common.parent = parent;
-
- node->heap = mem_heap_create(256);
-
- return(node);
-}
-
-/***************************************************************
-Repositions the pcur in the purge node on the clustered index record,
-if found. */
-static
-ibool
-row_purge_reposition_pcur(
-/*======================*/
- /* out: TRUE if the record was found */
- ulint mode, /* in: latching mode */
- purge_node_t* node, /* in: row purge node */
- mtr_t* mtr) /* in: mtr */
-{
- ibool found;
-
- if (node->found_clust) {
- found = btr_pcur_restore_position(mode, &(node->pcur), mtr);
-
- return(found);
- }
-
- found = row_search_on_row_ref(&(node->pcur), mode, node->table,
- node->ref, mtr);
- node->found_clust = found;
-
- if (found) {
- btr_pcur_store_position(&(node->pcur), mtr);
- }
-
- return(found);
-}
-
-/***************************************************************
-Removes a delete marked clustered index record if possible. */
-static
-ibool
-row_purge_remove_clust_if_poss_low(
-/*===============================*/
- /* out: TRUE if success, or if not found, or
- if modified after the delete marking */
- purge_node_t* node, /* in: row purge node */
- ulint mode) /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
-{
- dict_index_t* index;
- btr_pcur_t* pcur;
- btr_cur_t* btr_cur;
- ibool success;
- ulint err;
- mtr_t mtr;
- rec_t* rec;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- index = dict_table_get_first_index(node->table);
-
- pcur = &(node->pcur);
- btr_cur = btr_pcur_get_btr_cur(pcur);
-
- mtr_start(&mtr);
-
- success = row_purge_reposition_pcur(mode, node, &mtr);
-
- if (!success) {
- /* The record is already removed */
-
- btr_pcur_commit_specify_mtr(pcur, &mtr);
-
- return(TRUE);
- }
-
- rec = btr_pcur_get_rec(pcur);
-
- if (0 != ut_dulint_cmp(node->roll_ptr, row_get_rec_roll_ptr(
- rec, index, rec_get_offsets(
- rec, index, offsets_,
- ULINT_UNDEFINED, &heap)))) {
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- /* Someone else has modified the record later: do not remove */
- btr_pcur_commit_specify_mtr(pcur, &mtr);
-
- return(TRUE);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- if (mode == BTR_MODIFY_LEAF) {
- success = btr_cur_optimistic_delete(btr_cur, &mtr);
- } else {
- ut_ad(mode == BTR_MODIFY_TREE);
- btr_cur_pessimistic_delete(&err, FALSE, btr_cur, FALSE, &mtr);
-
- if (err == DB_SUCCESS) {
- success = TRUE;
- } else if (err == DB_OUT_OF_FILE_SPACE) {
- success = FALSE;
- } else {
- ut_error;
- }
- }
-
- btr_pcur_commit_specify_mtr(pcur, &mtr);
-
- return(success);
-}
-
-/***************************************************************
-Removes a clustered index record if it has not been modified after the delete
-marking. */
-static
-void
-row_purge_remove_clust_if_poss(
-/*===========================*/
- purge_node_t* node) /* in: row purge node */
-{
- ibool success;
- ulint n_tries = 0;
-
- /* fputs("Purge: Removing clustered record\n", stderr); */
-
- success = row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_LEAF);
- if (success) {
-
- return;
- }
-retry:
- success = row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_TREE);
- /* The delete operation may fail if we have little
- file space left: TODO: easiest to crash the database
- and restart with more file space */
-
- if (!success && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
- n_tries++;
-
- os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
-
- goto retry;
- }
-
- ut_a(success);
-}
-
-/***************************************************************
-Removes a secondary index entry if possible. */
-static
-ibool
-row_purge_remove_sec_if_poss_low(
-/*=============================*/
- /* out: TRUE if success or if not found */
- purge_node_t* node, /* in: row purge node */
- dict_index_t* index, /* in: index */
- dtuple_t* entry, /* in: index entry */
- ulint mode) /* in: latch mode BTR_MODIFY_LEAF or
- BTR_MODIFY_TREE */
-{
- btr_pcur_t pcur;
- btr_cur_t* btr_cur;
- ibool success;
- ibool old_has = 0; /* remove warning */
- ibool found;
- ulint err;
- mtr_t mtr;
- mtr_t* mtr_vers;
-
- log_free_check();
- mtr_start(&mtr);
-
- found = row_search_index_entry(index, entry, mode, &pcur, &mtr);
-
- if (!found) {
- /* Not found */
-
- /* fputs("PURGE:........sec entry not found\n", stderr); */
- /* dtuple_print(entry); */
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- return(TRUE);
- }
-
- btr_cur = btr_pcur_get_btr_cur(&pcur);
-
- /* We should remove the index record if no later version of the row,
- which cannot be purged yet, requires its existence. If some requires,
- we should do nothing. */
-
- mtr_vers = mem_alloc(sizeof(mtr_t));
-
- mtr_start(mtr_vers);
-
- success = row_purge_reposition_pcur(BTR_SEARCH_LEAF, node, mtr_vers);
-
- if (success) {
- old_has = row_vers_old_has_index_entry(
- TRUE, btr_pcur_get_rec(&(node->pcur)),
- mtr_vers, index, entry);
- }
-
- btr_pcur_commit_specify_mtr(&(node->pcur), mtr_vers);
-
- mem_free(mtr_vers);
-
- if (!success || !old_has) {
- /* Remove the index record */
-
- if (mode == BTR_MODIFY_LEAF) {
- success = btr_cur_optimistic_delete(btr_cur, &mtr);
- } else {
- ut_ad(mode == BTR_MODIFY_TREE);
- btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
- FALSE, &mtr);
- if (err == DB_SUCCESS) {
- success = TRUE;
- } else if (err == DB_OUT_OF_FILE_SPACE) {
- success = FALSE;
- } else {
- ut_error;
- }
- }
- }
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- return(success);
-}
-
-/***************************************************************
-Removes a secondary index entry if possible. */
-UNIV_INLINE
-void
-row_purge_remove_sec_if_poss(
-/*=========================*/
- purge_node_t* node, /* in: row purge node */
- dict_index_t* index, /* in: index */
- dtuple_t* entry) /* in: index entry */
-{
- ibool success;
- ulint n_tries = 0;
-
- /* fputs("Purge: Removing secondary record\n", stderr); */
-
- success = row_purge_remove_sec_if_poss_low(node, index, entry,
- BTR_MODIFY_LEAF);
- if (success) {
-
- return;
- }
-retry:
- success = row_purge_remove_sec_if_poss_low(node, index, entry,
- BTR_MODIFY_TREE);
- /* The delete operation may fail if we have little
- file space left: TODO: easiest to crash the database
- and restart with more file space */
-
- if (!success && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
-
- n_tries++;
-
- os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
-
- goto retry;
- }
-
- ut_a(success);
-}
-
-/***************************************************************
-Purges a delete marking of a record. */
-static
-void
-row_purge_del_mark(
-/*===============*/
- purge_node_t* node) /* in: row purge node */
-{
- mem_heap_t* heap;
- dtuple_t* entry;
- dict_index_t* index;
-
- ut_ad(node);
-
- heap = mem_heap_create(1024);
-
- while (node->index != NULL) {
- index = node->index;
-
- /* Build the index entry */
- entry = row_build_index_entry(node->row, index, heap);
-
- row_purge_remove_sec_if_poss(node, index, entry);
-
- node->index = dict_table_get_next_index(node->index);
- }
-
- mem_heap_free(heap);
-
- row_purge_remove_clust_if_poss(node);
-}
-
-/***************************************************************
-Purges an update of an existing record. Also purges an update of a delete
-marked record if that record contained an externally stored field. */
-static
-void
-row_purge_upd_exist_or_extern(
-/*==========================*/
- purge_node_t* node) /* in: row purge node */
-{
- mem_heap_t* heap;
- dtuple_t* entry;
- dict_index_t* index;
- upd_field_t* ufield;
- ibool is_insert;
- ulint rseg_id;
- ulint page_no;
- ulint offset;
- ulint internal_offset;
- byte* data_field;
- ulint data_field_len;
- ulint i;
- mtr_t mtr;
-
- ut_ad(node);
-
- if (node->rec_type == TRX_UNDO_UPD_DEL_REC) {
-
- goto skip_secondaries;
- }
-
- heap = mem_heap_create(1024);
-
- while (node->index != NULL) {
- index = node->index;
-
- if (row_upd_changes_ord_field_binary(NULL, node->index,
- node->update)) {
- /* Build the older version of the index entry */
- entry = row_build_index_entry(node->row, index, heap);
-
- row_purge_remove_sec_if_poss(node, index, entry);
- }
-
- node->index = dict_table_get_next_index(node->index);
- }
-
- mem_heap_free(heap);
-
-skip_secondaries:
- /* Free possible externally stored fields */
- for (i = 0; i < upd_get_n_fields(node->update); i++) {
-
- ufield = upd_get_nth_field(node->update, i);
-
- if (ufield->extern_storage) {
- /* We use the fact that new_val points to
- node->undo_rec and get thus the offset of
- dfield data inside the unod record. Then we
- can calculate from node->roll_ptr the file
- address of the new_val data */
-
- internal_offset = ((byte*)ufield->new_val.data)
- - node->undo_rec;
-
- ut_a(internal_offset < UNIV_PAGE_SIZE);
-
- trx_undo_decode_roll_ptr(node->roll_ptr,
- &is_insert, &rseg_id,
- &page_no, &offset);
- mtr_start(&mtr);
-
- /* We have to acquire an X-latch to the clustered
- index tree */
-
- index = dict_table_get_first_index(node->table);
-
- mtr_x_lock(dict_index_get_lock(index), &mtr);
-
- /* NOTE: we must also acquire an X-latch to the
- root page of the tree. We will need it when we
- free pages from the tree. If the tree is of height 1,
- the tree X-latch does NOT protect the root page,
- because it is also a leaf page. Since we will have a
- latch on an undo log page, we would break the
- latching order if we would only later latch the
- root page of such a tree! */
-
- btr_root_get(index, &mtr);
-
- /* We assume in purge of externally stored fields
- that the space id of the undo log record is 0! */
-
- data_field = buf_page_get(0, page_no, RW_X_LATCH, &mtr)
- + offset + internal_offset;
-
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(buf_frame_align(data_field),
- SYNC_TRX_UNDO_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
-
- data_field_len = ufield->new_val.len;
-
- btr_free_externally_stored_field(index, data_field,
- data_field_len,
- FALSE, &mtr);
- mtr_commit(&mtr);
- }
- }
-}
-
-/***************************************************************
-Parses the row reference and other info in a modify undo log record. */
-static
-ibool
-row_purge_parse_undo_rec(
-/*=====================*/
- /* out: TRUE if purge operation required:
- NOTE that then the CALLER must unfreeze
- data dictionary! */
- purge_node_t* node, /* in: row undo node */
- ibool* updated_extern,
- /* out: TRUE if an externally stored field
- was updated */
- que_thr_t* thr) /* in: query thread */
-{
- dict_index_t* clust_index;
- byte* ptr;
- trx_t* trx;
- dulint undo_no;
- dulint table_id;
- dulint trx_id;
- dulint roll_ptr;
- ulint info_bits;
- ulint type;
- ulint cmpl_info;
-
- ut_ad(node && thr);
-
- trx = thr_get_trx(thr);
-
- ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info,
- updated_extern, &undo_no, &table_id);
- node->rec_type = type;
-
- if (type == TRX_UNDO_UPD_DEL_REC && !(*updated_extern)) {
-
- return(FALSE);
- }
-
- ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
- &info_bits);
- node->table = NULL;
-
- if (type == TRX_UNDO_UPD_EXIST_REC
- && cmpl_info & UPD_NODE_NO_ORD_CHANGE && !(*updated_extern)) {
-
- /* Purge requires no changes to indexes: we may return */
-
- return(FALSE);
- }
-
- /* Prevent DROP TABLE etc. from running when we are doing the purge
- for this row */
-
- row_mysql_freeze_data_dictionary(trx);
-
- mutex_enter(&(dict_sys->mutex));
-
- node->table = dict_table_get_on_id_low(table_id);
-
- mutex_exit(&(dict_sys->mutex));
-
- if (node->table == NULL) {
- /* The table has been dropped: no need to do purge */
-
- row_mysql_unfreeze_data_dictionary(trx);
-
- return(FALSE);
- }
-
- if (node->table->ibd_file_missing) {
- /* We skip purge of missing .ibd files */
-
- node->table = NULL;
-
- row_mysql_unfreeze_data_dictionary(trx);
-
- return(FALSE);
- }
-
- clust_index = dict_table_get_first_index(node->table);
-
- if (clust_index == NULL) {
- /* The table was corrupt in the data dictionary */
-
- row_mysql_unfreeze_data_dictionary(trx);
-
- return(FALSE);
- }
-
- ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
- node->heap);
-
- ptr = trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id,
- roll_ptr, info_bits, trx,
- node->heap, &(node->update));
-
- /* Read to the partial row the fields that occur in indexes */
-
- if (!(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
- ptr = trx_undo_rec_get_partial_row(ptr, clust_index,
- &(node->row), node->heap);
- }
-
- return(TRUE);
-}
-
-/***************************************************************
-Fetches an undo log record and does the purge for the recorded operation.
-If none left, or the current purge completed, returns the control to the
-parent node, which is always a query thread node. */
-static
-ulint
-row_purge(
-/*======*/
- /* out: DB_SUCCESS if operation successfully
- completed, else error code */
- purge_node_t* node, /* in: row purge node */
- que_thr_t* thr) /* in: query thread */
-{
- dulint roll_ptr;
- ibool purge_needed;
- ibool updated_extern;
- trx_t* trx;
-
- ut_ad(node && thr);
-
- trx = thr_get_trx(thr);
-
- node->undo_rec = trx_purge_fetch_next_rec(&roll_ptr,
- &(node->reservation),
- node->heap);
- if (!node->undo_rec) {
- /* Purge completed for this query thread */
-
- thr->run_node = que_node_get_parent(node);
-
- return(DB_SUCCESS);
- }
-
- node->roll_ptr = roll_ptr;
-
- if (node->undo_rec == &trx_purge_dummy_rec) {
- purge_needed = FALSE;
- } else {
- purge_needed = row_purge_parse_undo_rec(node, &updated_extern,
- thr);
- /* If purge_needed == TRUE, we must also remember to unfreeze
- data dictionary! */
- }
-
- if (purge_needed) {
- node->found_clust = FALSE;
-
- node->index = dict_table_get_next_index(
- dict_table_get_first_index(node->table));
-
- if (node->rec_type == TRX_UNDO_DEL_MARK_REC) {
- row_purge_del_mark(node);
-
- } else if (updated_extern
- || node->rec_type == TRX_UNDO_UPD_EXIST_REC) {
-
- row_purge_upd_exist_or_extern(node);
- }
-
- if (node->found_clust) {
- btr_pcur_close(&(node->pcur));
- }
-
- row_mysql_unfreeze_data_dictionary(trx);
- }
-
- /* Do some cleanup */
- trx_purge_rec_release(node->reservation);
- mem_heap_empty(node->heap);
-
- thr->run_node = node;
-
- return(DB_SUCCESS);
-}
-
-/***************************************************************
-Does the purge operation for a single undo log record. This is a high-level
-function used in an SQL execution graph. */
-
-que_thr_t*
-row_purge_step(
-/*===========*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr) /* in: query thread */
-{
- purge_node_t* node;
- ulint err;
-
- ut_ad(thr);
-
- node = thr->run_node;
-
- ut_ad(que_node_get_type(node) == QUE_NODE_PURGE);
-
- err = row_purge(node, thr);
-
- ut_ad(err == DB_SUCCESS);
-
- return(thr);
-}
diff --git a/storage/innobase/row/row0row.c b/storage/innobase/row/row0row.c
deleted file mode 100644
index 08e50817db9..00000000000
--- a/storage/innobase/row/row0row.c
+++ /dev/null
@@ -1,726 +0,0 @@
-/******************************************************
-General row routines
-
-(c) 1996 Innobase Oy
-
-Created 4/20/1996 Heikki Tuuri
-*******************************************************/
-
-#include "row0row.h"
-
-#ifdef UNIV_NONINL
-#include "row0row.ic"
-#endif
-
-#include "dict0dict.h"
-#include "btr0btr.h"
-#include "mach0data.h"
-#include "trx0rseg.h"
-#include "trx0trx.h"
-#include "trx0roll.h"
-#include "trx0undo.h"
-#include "trx0purge.h"
-#include "trx0rec.h"
-#include "que0que.h"
-#include "row0row.h"
-#include "row0upd.h"
-#include "rem0cmp.h"
-#include "read0read.h"
-
-/*************************************************************************
-Reads the trx id or roll ptr field from a clustered index record: this function
-is slower than the specialized inline functions. */
-
-dulint
-row_get_rec_sys_field(
-/*==================*/
- /* out: value of the field */
- ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets)/* in: rec_get_offsets(rec, index) */
-{
- ulint pos;
- byte* field;
- ulint len;
-
- ut_ad(index->type & DICT_CLUSTERED);
-
- pos = dict_index_get_sys_col_pos(index, type);
-
- field = rec_get_nth_field(rec, offsets, pos, &len);
-
- if (type == DATA_TRX_ID) {
-
- return(trx_read_trx_id(field));
- } else {
- ut_ad(type == DATA_ROLL_PTR);
-
- return(trx_read_roll_ptr(field));
- }
-}
-
-/*************************************************************************
-Sets the trx id or roll ptr field in a clustered index record: this function
-is slower than the specialized inline functions. */
-
-void
-row_set_rec_sys_field(
-/*==================*/
- /* out: value of the field */
- ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: clustered index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- dulint val) /* in: value to set */
-{
- ulint pos;
- byte* field;
- ulint len;
-
- ut_ad(index->type & DICT_CLUSTERED);
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- pos = dict_index_get_sys_col_pos(index, type);
-
- field = rec_get_nth_field(rec, offsets, pos, &len);
-
- if (type == DATA_TRX_ID) {
-
- trx_write_trx_id(field, val);
- } else {
- ut_ad(type == DATA_ROLL_PTR);
-
- trx_write_roll_ptr(field, val);
- }
-}
-
-/*********************************************************************
-When an insert to a table is performed, this function builds the entry which
-has to be inserted to an index on the table. */
-
-dtuple_t*
-row_build_index_entry(
-/*==================*/
- /* out: index entry which should be inserted */
- dtuple_t* row, /* in: row which should be inserted to the
- table */
- dict_index_t* index, /* in: index on the table */
- mem_heap_t* heap) /* in: memory heap from which the memory for
- the index entry is allocated */
-{
- dtuple_t* entry;
- ulint entry_len;
- dict_field_t* ind_field;
- dfield_t* dfield;
- dfield_t* dfield2;
- ulint i;
- ulint storage_len;
-
- ut_ad(row && index && heap);
- ut_ad(dtuple_check_typed(row));
-
- entry_len = dict_index_get_n_fields(index);
- entry = dtuple_create(heap, entry_len);
-
- if (index->type & DICT_UNIVERSAL) {
- dtuple_set_n_fields_cmp(entry, entry_len);
- } else {
- dtuple_set_n_fields_cmp(
- entry, dict_index_get_n_unique_in_tree(index));
- }
-
- for (i = 0; i < entry_len; i++) {
- const dict_col_t* col;
- ind_field = dict_index_get_nth_field(index, i);
- col = ind_field->col;
-
- dfield = dtuple_get_nth_field(entry, i);
-
- dfield2 = dtuple_get_nth_field(row, dict_col_get_no(col));
-
- dfield_copy(dfield, dfield2);
-
- /* If a column prefix index, take only the prefix */
- if (ind_field->prefix_len > 0
- && dfield_get_len(dfield2) != UNIV_SQL_NULL) {
-
- storage_len = dtype_get_at_most_n_mbchars(
- col->prtype, col->mbminlen, col->mbmaxlen,
- ind_field->prefix_len,
- dfield_get_len(dfield2), dfield2->data);
-
- dfield_set_len(dfield, storage_len);
- }
- }
-
- ut_ad(dtuple_check_typed(entry));
-
- return(entry);
-}
-
-/***********************************************************************
-An inverse function to dict_row_build_index_entry. Builds a row from a
-record in a clustered index. */
-
-dtuple_t*
-row_build(
-/*======*/
- /* out, own: row built; see the NOTE below! */
- ulint type, /* in: ROW_COPY_POINTERS or ROW_COPY_DATA;
- the latter copies also the data fields to
- heap while the first only places pointers to
- data fields on the index page, and thus is
- more efficient */
- dict_index_t* index, /* in: clustered index */
- rec_t* rec, /* in: record in the clustered index;
- NOTE: in the case ROW_COPY_POINTERS
- the data fields in the row will point
- directly into this record, therefore,
- the buffer page of this record must be
- at least s-latched and the latch held
- as long as the row dtuple is used! */
- const ulint* offsets,/* in: rec_get_offsets(rec, index)
- or NULL, in which case this function
- will invoke rec_get_offsets() */
- mem_heap_t* heap) /* in: memory heap from which the memory
- needed is allocated */
-{
- dtuple_t* row;
- dict_table_t* table;
- dict_field_t* ind_field;
- dfield_t* dfield;
- ulint n_fields;
- byte* field;
- ulint len;
- ulint row_len;
- byte* buf;
- ulint i;
- mem_heap_t* tmp_heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- ut_ad(index && rec && heap);
- ut_ad(index->type & DICT_CLUSTERED);
-
- if (!offsets) {
- offsets = rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &tmp_heap);
- } else {
- ut_ad(rec_offs_validate(rec, index, offsets));
- }
-
- if (type != ROW_COPY_POINTERS) {
- /* Take a copy of rec to heap */
- buf = mem_heap_alloc(heap, rec_offs_size(offsets));
- rec = rec_copy(buf, rec, offsets);
- /* Avoid a debug assertion in rec_offs_validate(). */
- rec_offs_make_valid(rec, index, (ulint*) offsets);
- }
-
- table = index->table;
- row_len = dict_table_get_n_cols(table);
-
- row = dtuple_create(heap, row_len);
-
- dtuple_set_info_bits(row, rec_get_info_bits(
- rec, dict_table_is_comp(table)));
-
- n_fields = rec_offs_n_fields(offsets);
-
- dict_table_copy_types(row, table);
-
- for (i = 0; i < n_fields; i++) {
- ind_field = dict_index_get_nth_field(index, i);
-
- if (ind_field->prefix_len == 0) {
-
- const dict_col_t* col
- = dict_field_get_col(ind_field);
-
- dfield = dtuple_get_nth_field(row,
- dict_col_get_no(col));
- field = rec_get_nth_field(rec, offsets, i, &len);
-
- dfield_set_data(dfield, field, len);
- }
- }
-
- ut_ad(dtuple_check_typed(row));
-
- if (tmp_heap) {
- mem_heap_free(tmp_heap);
- }
-
- return(row);
-}
-
-/***********************************************************************
-Converts an index record to a typed data tuple. NOTE that externally
-stored (often big) fields are NOT copied to heap. */
-
-dtuple_t*
-row_rec_to_index_entry(
-/*===================*/
- /* out, own: index entry built; see the
- NOTE below! */
- ulint type, /* in: ROW_COPY_DATA, or ROW_COPY_POINTERS:
- the former copies also the data fields to
- heap as the latter only places pointers to
- data fields on the index page */
- dict_index_t* index, /* in: index */
- rec_t* rec, /* in: record in the index;
- NOTE: in the case ROW_COPY_POINTERS
- the data fields in the row will point
- directly into this record, therefore,
- the buffer page of this record must be
- at least s-latched and the latch held
- as long as the dtuple is used! */
- mem_heap_t* heap) /* in: memory heap from which the memory
- needed is allocated */
-{
- dtuple_t* entry;
- dfield_t* dfield;
- ulint i;
- byte* field;
- ulint len;
- ulint rec_len;
- byte* buf;
- mem_heap_t* tmp_heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- ut_ad(rec && heap && index);
-
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &tmp_heap);
-
- if (type == ROW_COPY_DATA) {
- /* Take a copy of rec to heap */
- buf = mem_heap_alloc(heap, rec_offs_size(offsets));
- rec = rec_copy(buf, rec, offsets);
- /* Avoid a debug assertion in rec_offs_validate(). */
- rec_offs_make_valid(rec, index, offsets);
- }
-
- rec_len = rec_offs_n_fields(offsets);
-
- entry = dtuple_create(heap, rec_len);
-
- dtuple_set_n_fields_cmp(entry,
- dict_index_get_n_unique_in_tree(index));
- ut_ad(rec_len == dict_index_get_n_fields(index));
-
- dict_index_copy_types(entry, index, rec_len);
-
- dtuple_set_info_bits(entry,
- rec_get_info_bits(rec, rec_offs_comp(offsets)));
-
- for (i = 0; i < rec_len; i++) {
-
- dfield = dtuple_get_nth_field(entry, i);
- field = rec_get_nth_field(rec, offsets, i, &len);
-
- dfield_set_data(dfield, field, len);
- }
-
- ut_ad(dtuple_check_typed(entry));
- if (tmp_heap) {
- mem_heap_free(tmp_heap);
- }
-
- return(entry);
-}
-
-/***********************************************************************
-Builds from a secondary index record a row reference with which we can
-search the clustered index record. */
-
-dtuple_t*
-row_build_row_ref(
-/*==============*/
- /* out, own: row reference built; see the
- NOTE below! */
- ulint type, /* in: ROW_COPY_DATA, or ROW_COPY_POINTERS:
- the former copies also the data fields to
- heap, whereas the latter only places pointers
- to data fields on the index page */
- dict_index_t* index, /* in: index */
- rec_t* rec, /* in: record in the index;
- NOTE: in the case ROW_COPY_POINTERS
- the data fields in the row will point
- directly into this record, therefore,
- the buffer page of this record must be
- at least s-latched and the latch held
- as long as the row reference is used! */
- mem_heap_t* heap) /* in: memory heap from which the memory
- needed is allocated */
-{
- dict_table_t* table;
- dict_index_t* clust_index;
- dfield_t* dfield;
- dtuple_t* ref;
- byte* field;
- ulint len;
- ulint ref_len;
- ulint pos;
- byte* buf;
- ulint clust_col_prefix_len;
- ulint i;
- mem_heap_t* tmp_heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- ut_ad(index && rec && heap);
-
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &tmp_heap);
-
- if (type == ROW_COPY_DATA) {
- /* Take a copy of rec to heap */
-
- buf = mem_heap_alloc(heap, rec_offs_size(offsets));
-
- rec = rec_copy(buf, rec, offsets);
- /* Avoid a debug assertion in rec_offs_validate(). */
- rec_offs_make_valid(rec, index, offsets);
- }
-
- table = index->table;
-
- clust_index = dict_table_get_first_index(table);
-
- ref_len = dict_index_get_n_unique(clust_index);
-
- ref = dtuple_create(heap, ref_len);
-
- dict_index_copy_types(ref, clust_index, ref_len);
-
- for (i = 0; i < ref_len; i++) {
- dfield = dtuple_get_nth_field(ref, i);
-
- pos = dict_index_get_nth_field_pos(index, clust_index, i);
-
- ut_a(pos != ULINT_UNDEFINED);
-
- field = rec_get_nth_field(rec, offsets, pos, &len);
-
- dfield_set_data(dfield, field, len);
-
- /* If the primary key contains a column prefix, then the
- secondary index may contain a longer prefix of the same
- column, or the full column, and we must adjust the length
- accordingly. */
-
- clust_col_prefix_len = dict_index_get_nth_field(
- clust_index, i)->prefix_len;
-
- if (clust_col_prefix_len > 0) {
- if (len != UNIV_SQL_NULL) {
-
- const dtype_t* dtype
- = dfield_get_type(dfield);
-
- dfield_set_len(dfield,
- dtype_get_at_most_n_mbchars(
- dtype->prtype,
- dtype->mbminlen,
- dtype->mbmaxlen,
- clust_col_prefix_len,
- len, (char*) field));
- }
- }
- }
-
- ut_ad(dtuple_check_typed(ref));
- if (tmp_heap) {
- mem_heap_free(tmp_heap);
- }
-
- return(ref);
-}
-
-/***********************************************************************
-Builds from a secondary index record a row reference with which we can
-search the clustered index record. */
-
-void
-row_build_row_ref_in_tuple(
-/*=======================*/
- dtuple_t* ref, /* in/out: row reference built; see the
- NOTE below! */
- dict_index_t* index, /* in: index */
- rec_t* rec, /* in: record in the index;
- NOTE: the data fields in ref will point
- directly into this record, therefore,
- the buffer page of this record must be
- at least s-latched and the latch held
- as long as the row reference is used! */
- trx_t* trx) /* in: transaction */
-{
- dict_index_t* clust_index;
- dfield_t* dfield;
- byte* field;
- ulint len;
- ulint ref_len;
- ulint pos;
- ulint clust_col_prefix_len;
- ulint i;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- ut_a(ref);
- ut_a(index);
- ut_a(rec);
-
- if (UNIV_UNLIKELY(!index->table)) {
- fputs("InnoDB: table ", stderr);
-notfound:
- ut_print_name(stderr, trx, TRUE, index->table_name);
- fputs(" for index ", stderr);
- ut_print_name(stderr, trx, FALSE, index->name);
- fputs(" not found\n", stderr);
- ut_error;
- }
-
- clust_index = dict_table_get_first_index(index->table);
-
- if (!clust_index) {
- fputs("InnoDB: clust index for table ", stderr);
- goto notfound;
- }
-
- offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
-
- ref_len = dict_index_get_n_unique(clust_index);
-
- ut_ad(ref_len == dtuple_get_n_fields(ref));
-
- dict_index_copy_types(ref, clust_index, ref_len);
-
- for (i = 0; i < ref_len; i++) {
- dfield = dtuple_get_nth_field(ref, i);
-
- pos = dict_index_get_nth_field_pos(index, clust_index, i);
-
- ut_a(pos != ULINT_UNDEFINED);
-
- field = rec_get_nth_field(rec, offsets, pos, &len);
-
- dfield_set_data(dfield, field, len);
-
- /* If the primary key contains a column prefix, then the
- secondary index may contain a longer prefix of the same
- column, or the full column, and we must adjust the length
- accordingly. */
-
- clust_col_prefix_len = dict_index_get_nth_field(
- clust_index, i)->prefix_len;
-
- if (clust_col_prefix_len > 0) {
- if (len != UNIV_SQL_NULL) {
-
- const dtype_t* dtype
- = dfield_get_type(dfield);
-
- dfield_set_len(dfield,
- dtype_get_at_most_n_mbchars(
- dtype->prtype,
- dtype->mbminlen,
- dtype->mbmaxlen,
- clust_col_prefix_len,
- len, (char*) field));
- }
- }
- }
-
- ut_ad(dtuple_check_typed(ref));
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-}
-
-/***********************************************************************
-From a row build a row reference with which we can search the clustered
-index record. */
-
-void
-row_build_row_ref_from_row(
-/*=======================*/
- dtuple_t* ref, /* in/out: row reference built; see the
- NOTE below! ref must have the right number
- of fields! */
- dict_table_t* table, /* in: table */
- dtuple_t* row) /* in: row
- NOTE: the data fields in ref will point
- directly into data of this row */
-{
- dict_index_t* clust_index;
- ulint ref_len;
- ulint i;
-
- ut_ad(ref && table && row);
-
- clust_index = dict_table_get_first_index(table);
-
- ref_len = dict_index_get_n_unique(clust_index);
-
- ut_ad(ref_len == dtuple_get_n_fields(ref));
-
- for (i = 0; i < ref_len; i++) {
- const dict_col_t* col;
- dict_field_t* field;
- dfield_t* dfield;
- dfield_t* dfield2;
-
- dfield = dtuple_get_nth_field(ref, i);
-
- field = dict_index_get_nth_field(clust_index, i);
-
- col = dict_field_get_col(field);
-
- dfield2 = dtuple_get_nth_field(row, dict_col_get_no(col));
-
- dfield_copy(dfield, dfield2);
-
- if (field->prefix_len > 0
- && dfield->len != UNIV_SQL_NULL) {
-
- dfield->len = dtype_get_at_most_n_mbchars(
- col->prtype, col->mbminlen, col->mbmaxlen,
- field->prefix_len, dfield->len, dfield->data);
- }
- }
-
- ut_ad(dtuple_check_typed(ref));
-}
-
-/*******************************************************************
-Searches the clustered index record for a row, if we have the row reference. */
-
-ibool
-row_search_on_row_ref(
-/*==================*/
- /* out: TRUE if found */
- btr_pcur_t* pcur, /* in/out: persistent cursor, which must
- be closed by the caller */
- ulint mode, /* in: BTR_MODIFY_LEAF, ... */
- dict_table_t* table, /* in: table */
- dtuple_t* ref, /* in: row reference */
- mtr_t* mtr) /* in: mtr */
-{
- ulint low_match;
- rec_t* rec;
- dict_index_t* index;
-
- ut_ad(dtuple_check_typed(ref));
-
- index = dict_table_get_first_index(table);
-
- ut_a(dtuple_get_n_fields(ref) == dict_index_get_n_unique(index));
-
- btr_pcur_open(index, ref, PAGE_CUR_LE, mode, pcur, mtr);
-
- low_match = btr_pcur_get_low_match(pcur);
-
- rec = btr_pcur_get_rec(pcur);
-
- if (page_rec_is_infimum(rec)) {
-
- return(FALSE);
- }
-
- if (low_match != dtuple_get_n_fields(ref)) {
-
- return(FALSE);
- }
-
- return(TRUE);
-}
-
-/*************************************************************************
-Fetches the clustered index record for a secondary index record. The latches
-on the secondary index record are preserved. */
-
-rec_t*
-row_get_clust_rec(
-/*==============*/
- /* out: record or NULL, if no record found */
- ulint mode, /* in: BTR_MODIFY_LEAF, ... */
- rec_t* rec, /* in: record in a secondary index */
- dict_index_t* index, /* in: secondary index */
- dict_index_t** clust_index,/* out: clustered index */
- mtr_t* mtr) /* in: mtr */
-{
- mem_heap_t* heap;
- dtuple_t* ref;
- dict_table_t* table;
- btr_pcur_t pcur;
- ibool found;
- rec_t* clust_rec;
-
- ut_ad((index->type & DICT_CLUSTERED) == 0);
-
- table = index->table;
-
- heap = mem_heap_create(256);
-
- ref = row_build_row_ref(ROW_COPY_POINTERS, index, rec, heap);
-
- found = row_search_on_row_ref(&pcur, mode, table, ref, mtr);
-
- clust_rec = found ? btr_pcur_get_rec(&pcur) : NULL;
-
- mem_heap_free(heap);
-
- btr_pcur_close(&pcur);
-
- *clust_index = dict_table_get_first_index(table);
-
- return(clust_rec);
-}
-
-/*******************************************************************
-Searches an index record. */
-
-ibool
-row_search_index_entry(
-/*===================*/
- /* out: TRUE if found */
- dict_index_t* index, /* in: index */
- dtuple_t* entry, /* in: index entry */
- ulint mode, /* in: BTR_MODIFY_LEAF, ... */
- btr_pcur_t* pcur, /* in/out: persistent cursor, which must
- be closed by the caller */
- mtr_t* mtr) /* in: mtr */
-{
- ulint n_fields;
- ulint low_match;
- rec_t* rec;
-
- ut_ad(dtuple_check_typed(entry));
-
- btr_pcur_open(index, entry, PAGE_CUR_LE, mode, pcur, mtr);
- low_match = btr_pcur_get_low_match(pcur);
-
- rec = btr_pcur_get_rec(pcur);
-
- n_fields = dtuple_get_n_fields(entry);
-
- if (page_rec_is_infimum(rec)) {
-
- return(FALSE);
- }
-
- if (low_match != n_fields) {
- /* Not found */
-
- return(FALSE);
- }
-
- return(TRUE);
-}
diff --git a/storage/innobase/row/row0sel.c b/storage/innobase/row/row0sel.c
deleted file mode 100644
index 29efb2861b7..00000000000
--- a/storage/innobase/row/row0sel.c
+++ /dev/null
@@ -1,4640 +0,0 @@
-/*******************************************************
-Select
-
-(c) 1997 Innobase Oy
-
-Created 12/19/1997 Heikki Tuuri
-*******************************************************/
-
-#include "row0sel.h"
-
-#ifdef UNIV_NONINL
-#include "row0sel.ic"
-#endif
-
-#include "dict0dict.h"
-#include "dict0boot.h"
-#include "trx0undo.h"
-#include "trx0trx.h"
-#include "btr0btr.h"
-#include "btr0cur.h"
-#include "btr0sea.h"
-#include "mach0data.h"
-#include "que0que.h"
-#include "row0upd.h"
-#include "row0row.h"
-#include "row0vers.h"
-#include "rem0cmp.h"
-#include "lock0lock.h"
-#include "eval0eval.h"
-#include "pars0sym.h"
-#include "pars0pars.h"
-#include "row0mysql.h"
-#include "read0read.h"
-#include "buf0lru.h"
-#include "ha_prototypes.h"
-
-/* Maximum number of rows to prefetch; MySQL interface has another parameter */
-#define SEL_MAX_N_PREFETCH 16
-
-/* Number of rows fetched, after which to start prefetching; MySQL interface
-has another parameter */
-#define SEL_PREFETCH_LIMIT 1
-
-/* When a select has accessed about this many pages, it returns control back
-to que_run_threads: this is to allow canceling runaway queries */
-
-#define SEL_COST_LIMIT 100
-
-/* Flags for search shortcut */
-#define SEL_FOUND 0
-#define SEL_EXHAUSTED 1
-#define SEL_RETRY 2
-
-/************************************************************************
-Returns TRUE if the user-defined column values in a secondary index record
-are alphabetically the same as the corresponding columns in the clustered
-index record.
-NOTE: the comparison is NOT done as a binary comparison, but character
-fields are compared with collation! */
-static
-ibool
-row_sel_sec_rec_is_for_clust_rec(
-/*=============================*/
- /* out: TRUE if the secondary
- record is equal to the corresponding
- fields in the clustered record,
- when compared with collation */
- rec_t* sec_rec, /* in: secondary index record */
- dict_index_t* sec_index, /* in: secondary index */
- rec_t* clust_rec, /* in: clustered index record */
- dict_index_t* clust_index) /* in: clustered index */
-{
- byte* sec_field;
- ulint sec_len;
- byte* clust_field;
- ulint clust_len;
- ulint n;
- ulint i;
- mem_heap_t* heap = NULL;
- ulint clust_offsets_[REC_OFFS_NORMAL_SIZE];
- ulint sec_offsets_[REC_OFFS_SMALL_SIZE];
- ulint* clust_offs = clust_offsets_;
- ulint* sec_offs = sec_offsets_;
- ibool is_equal = TRUE;
-
- *clust_offsets_ = (sizeof clust_offsets_) / sizeof *clust_offsets_;
- *sec_offsets_ = (sizeof sec_offsets_) / sizeof *sec_offsets_;
-
- clust_offs = rec_get_offsets(clust_rec, clust_index, clust_offs,
- ULINT_UNDEFINED, &heap);
- sec_offs = rec_get_offsets(sec_rec, sec_index, sec_offs,
- ULINT_UNDEFINED, &heap);
-
- n = dict_index_get_n_ordering_defined_by_user(sec_index);
-
- for (i = 0; i < n; i++) {
- const dict_field_t* ifield;
- const dict_col_t* col;
-
- ifield = dict_index_get_nth_field(sec_index, i);
- col = dict_field_get_col(ifield);
-
- clust_field = rec_get_nth_field(
- clust_rec, clust_offs,
- dict_col_get_clust_pos(col, clust_index), &clust_len);
- sec_field = rec_get_nth_field(sec_rec, sec_offs, i, &sec_len);
-
- if (ifield->prefix_len > 0 && clust_len != UNIV_SQL_NULL) {
-
- clust_len = dtype_get_at_most_n_mbchars(
- col->prtype, col->mbminlen, col->mbmaxlen,
- ifield->prefix_len,
- clust_len, (char*) clust_field);
- }
-
- if (0 != cmp_data_data(col->mtype, col->prtype,
- clust_field, clust_len,
- sec_field, sec_len)) {
- is_equal = FALSE;
- goto func_exit;
- }
- }
-
-func_exit:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(is_equal);
-}
-
-/*************************************************************************
-Creates a select node struct. */
-
-sel_node_t*
-sel_node_create(
-/*============*/
- /* out, own: select node struct */
- mem_heap_t* heap) /* in: memory heap where created */
-{
- sel_node_t* node;
-
- node = mem_heap_alloc(heap, sizeof(sel_node_t));
- node->common.type = QUE_NODE_SELECT;
- node->state = SEL_NODE_OPEN;
-
- node->select_will_do_update = FALSE;
- node->latch_mode = BTR_SEARCH_LEAF;
-
- node->plans = NULL;
-
- return(node);
-}
-
-/*************************************************************************
-Frees the memory private to a select node when a query graph is freed,
-does not free the heap where the node was originally created. */
-
-void
-sel_node_free_private(
-/*==================*/
- sel_node_t* node) /* in: select node struct */
-{
- ulint i;
- plan_t* plan;
-
- if (node->plans != NULL) {
- for (i = 0; i < node->n_tables; i++) {
- plan = sel_node_get_nth_plan(node, i);
-
- btr_pcur_close(&(plan->pcur));
- btr_pcur_close(&(plan->clust_pcur));
-
- if (plan->old_vers_heap) {
- mem_heap_free(plan->old_vers_heap);
- }
- }
- }
-}
-
-/*************************************************************************
-Evaluates the values in a select list. If there are aggregate functions,
-their argument value is added to the aggregate total. */
-UNIV_INLINE
-void
-sel_eval_select_list(
-/*=================*/
- sel_node_t* node) /* in: select node */
-{
- que_node_t* exp;
-
- exp = node->select_list;
-
- while (exp) {
- eval_exp(exp);
-
- exp = que_node_get_next(exp);
- }
-}
-
-/*************************************************************************
-Assigns the values in the select list to the possible into-variables in
-SELECT ... INTO ... */
-UNIV_INLINE
-void
-sel_assign_into_var_values(
-/*=======================*/
- sym_node_t* var, /* in: first variable in a list of variables */
- sel_node_t* node) /* in: select node */
-{
- que_node_t* exp;
-
- if (var == NULL) {
-
- return;
- }
-
- exp = node->select_list;
-
- while (var) {
- ut_ad(exp);
-
- eval_node_copy_val(var->alias, exp);
-
- exp = que_node_get_next(exp);
- var = que_node_get_next(var);
- }
-}
-
-/*************************************************************************
-Resets the aggregate value totals in the select list of an aggregate type
-query. */
-UNIV_INLINE
-void
-sel_reset_aggregate_vals(
-/*=====================*/
- sel_node_t* node) /* in: select node */
-{
- func_node_t* func_node;
-
- ut_ad(node->is_aggregate);
-
- func_node = node->select_list;
-
- while (func_node) {
- eval_node_set_int_val(func_node, 0);
-
- func_node = que_node_get_next(func_node);
- }
-
- node->aggregate_already_fetched = FALSE;
-}
-
-/*************************************************************************
-Copies the input variable values when an explicit cursor is opened. */
-UNIV_INLINE
-void
-row_sel_copy_input_variable_vals(
-/*=============================*/
- sel_node_t* node) /* in: select node */
-{
- sym_node_t* var;
-
- var = UT_LIST_GET_FIRST(node->copy_variables);
-
- while (var) {
- eval_node_copy_val(var, var->alias);
-
- var->indirection = NULL;
-
- var = UT_LIST_GET_NEXT(col_var_list, var);
- }
-}
-
-/*************************************************************************
-Fetches the column values from a record. */
-static
-void
-row_sel_fetch_columns(
-/*==================*/
- dict_index_t* index, /* in: record index */
- rec_t* rec, /* in: record in a clustered or non-clustered
- index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- sym_node_t* column) /* in: first column in a column list, or
- NULL */
-{
- dfield_t* val;
- ulint index_type;
- ulint field_no;
- byte* data;
- ulint len;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- if (index->type & DICT_CLUSTERED) {
- index_type = SYM_CLUST_FIELD_NO;
- } else {
- index_type = SYM_SEC_FIELD_NO;
- }
-
- while (column) {
- mem_heap_t* heap = NULL;
- ibool needs_copy;
-
- field_no = column->field_nos[index_type];
-
- if (field_no != ULINT_UNDEFINED) {
-
- if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets,
- field_no))) {
-
- /* Copy an externally stored field to the
- temporary heap */
-
- heap = mem_heap_create(1);
-
- data = btr_rec_copy_externally_stored_field(
- rec, offsets, field_no, &len, heap);
-
- ut_a(len != UNIV_SQL_NULL);
-
- needs_copy = TRUE;
- } else {
- data = rec_get_nth_field(rec, offsets,
- field_no, &len);
-
- needs_copy = column->copy_val;
- }
-
- if (needs_copy) {
- eval_node_copy_and_alloc_val(column, data,
- len);
- } else {
- val = que_node_get_val(column);
- dfield_set_data(val, data, len);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- }
-
- column = UT_LIST_GET_NEXT(col_var_list, column);
- }
-}
-
-/*************************************************************************
-Allocates a prefetch buffer for a column when prefetch is first time done. */
-static
-void
-sel_col_prefetch_buf_alloc(
-/*=======================*/
- sym_node_t* column) /* in: symbol table node for a column */
-{
- sel_buf_t* sel_buf;
- ulint i;
-
- ut_ad(que_node_get_type(column) == QUE_NODE_SYMBOL);
-
- column->prefetch_buf = mem_alloc(SEL_MAX_N_PREFETCH
- * sizeof(sel_buf_t));
- for (i = 0; i < SEL_MAX_N_PREFETCH; i++) {
- sel_buf = column->prefetch_buf + i;
-
- sel_buf->data = NULL;
-
- sel_buf->val_buf_size = 0;
- }
-}
-
-/*************************************************************************
-Frees a prefetch buffer for a column, including the dynamically allocated
-memory for data stored there. */
-
-void
-sel_col_prefetch_buf_free(
-/*======================*/
- sel_buf_t* prefetch_buf) /* in, own: prefetch buffer */
-{
- sel_buf_t* sel_buf;
- ulint i;
-
- for (i = 0; i < SEL_MAX_N_PREFETCH; i++) {
- sel_buf = prefetch_buf + i;
-
- if (sel_buf->val_buf_size > 0) {
-
- mem_free(sel_buf->data);
- }
- }
-}
-
-/*************************************************************************
-Pops the column values for a prefetched, cached row from the column prefetch
-buffers and places them to the val fields in the column nodes. */
-static
-void
-sel_pop_prefetched_row(
-/*===================*/
- plan_t* plan) /* in: plan node for a table */
-{
- sym_node_t* column;
- sel_buf_t* sel_buf;
- dfield_t* val;
- byte* data;
- ulint len;
- ulint val_buf_size;
-
- ut_ad(plan->n_rows_prefetched > 0);
-
- column = UT_LIST_GET_FIRST(plan->columns);
-
- while (column) {
- val = que_node_get_val(column);
-
- if (!column->copy_val) {
- /* We did not really push any value for the
- column */
-
- ut_ad(!column->prefetch_buf);
- ut_ad(que_node_get_val_buf_size(column) == 0);
-#ifdef UNIV_DEBUG
- dfield_set_data(val, NULL, 0);
-#endif
- goto next_col;
- }
-
- ut_ad(column->prefetch_buf);
-
- sel_buf = column->prefetch_buf + plan->first_prefetched;
-
- data = sel_buf->data;
- len = sel_buf->len;
- val_buf_size = sel_buf->val_buf_size;
-
- /* We must keep track of the allocated memory for
- column values to be able to free it later: therefore
- we swap the values for sel_buf and val */
-
- sel_buf->data = dfield_get_data(val);
- sel_buf->len = dfield_get_len(val);
- sel_buf->val_buf_size = que_node_get_val_buf_size(column);
-
- dfield_set_data(val, data, len);
- que_node_set_val_buf_size(column, val_buf_size);
-next_col:
- column = UT_LIST_GET_NEXT(col_var_list, column);
- }
-
- plan->n_rows_prefetched--;
-
- plan->first_prefetched++;
-}
-
-/*************************************************************************
-Pushes the column values for a prefetched, cached row to the column prefetch
-buffers from the val fields in the column nodes. */
-UNIV_INLINE
-void
-sel_push_prefetched_row(
-/*====================*/
- plan_t* plan) /* in: plan node for a table */
-{
- sym_node_t* column;
- sel_buf_t* sel_buf;
- dfield_t* val;
- byte* data;
- ulint len;
- ulint pos;
- ulint val_buf_size;
-
- if (plan->n_rows_prefetched == 0) {
- pos = 0;
- plan->first_prefetched = 0;
- } else {
- pos = plan->n_rows_prefetched;
-
- /* We have the convention that pushing new rows starts only
- after the prefetch stack has been emptied: */
-
- ut_ad(plan->first_prefetched == 0);
- }
-
- plan->n_rows_prefetched++;
-
- ut_ad(pos < SEL_MAX_N_PREFETCH);
-
- column = UT_LIST_GET_FIRST(plan->columns);
-
- while (column) {
- if (!column->copy_val) {
- /* There is no sense to push pointers to database
- page fields when we do not keep latch on the page! */
-
- goto next_col;
- }
-
- if (!column->prefetch_buf) {
- /* Allocate a new prefetch buffer */
-
- sel_col_prefetch_buf_alloc(column);
- }
-
- sel_buf = column->prefetch_buf + pos;
-
- val = que_node_get_val(column);
-
- data = dfield_get_data(val);
- len = dfield_get_len(val);
- val_buf_size = que_node_get_val_buf_size(column);
-
- /* We must keep track of the allocated memory for
- column values to be able to free it later: therefore
- we swap the values for sel_buf and val */
-
- dfield_set_data(val, sel_buf->data, sel_buf->len);
- que_node_set_val_buf_size(column, sel_buf->val_buf_size);
-
- sel_buf->data = data;
- sel_buf->len = len;
- sel_buf->val_buf_size = val_buf_size;
-next_col:
- column = UT_LIST_GET_NEXT(col_var_list, column);
- }
-}
-
-/*************************************************************************
-Builds a previous version of a clustered index record for a consistent read */
-static
-ulint
-row_sel_build_prev_vers(
-/*====================*/
- /* out: DB_SUCCESS or error code */
- read_view_t* read_view, /* in: read view */
- dict_index_t* index, /* in: plan node for table */
- rec_t* rec, /* in: record in a clustered index */
- ulint** offsets, /* in/out: offsets returned by
- rec_get_offsets(rec, plan->index) */
- mem_heap_t** offset_heap, /* in/out: memory heap from which
- the offsets are allocated */
- mem_heap_t** old_vers_heap, /* out: old version heap to use */
- rec_t** old_vers, /* out: old version, or NULL if the
- record does not exist in the view:
- i.e., it was freshly inserted
- afterwards */
- mtr_t* mtr) /* in: mtr */
-{
- ulint err;
-
- if (*old_vers_heap) {
- mem_heap_empty(*old_vers_heap);
- } else {
- *old_vers_heap = mem_heap_create(512);
- }
-
- err = row_vers_build_for_consistent_read(
- rec, mtr, index, offsets, read_view, offset_heap,
- *old_vers_heap, old_vers);
- return(err);
-}
-
-/*************************************************************************
-Builds the last committed version of a clustered index record for a
-semi-consistent read. */
-static
-ulint
-row_sel_build_committed_vers_for_mysql(
-/*===================================*/
- /* out: DB_SUCCESS or error code */
- dict_index_t* clust_index, /* in: clustered index */
- row_prebuilt_t* prebuilt, /* in: prebuilt struct */
- rec_t* rec, /* in: record in a clustered index */
- ulint** offsets, /* in/out: offsets returned by
- rec_get_offsets(rec, clust_index) */
- mem_heap_t** offset_heap, /* in/out: memory heap from which
- the offsets are allocated */
- rec_t** old_vers, /* out: old version, or NULL if the
- record does not exist in the view:
- i.e., it was freshly inserted
- afterwards */
- mtr_t* mtr) /* in: mtr */
-{
- ulint err;
-
- if (prebuilt->old_vers_heap) {
- mem_heap_empty(prebuilt->old_vers_heap);
- } else {
- prebuilt->old_vers_heap = mem_heap_create(200);
- }
-
- err = row_vers_build_for_semi_consistent_read(
- rec, mtr, clust_index, offsets, offset_heap,
- prebuilt->old_vers_heap, old_vers);
- return(err);
-}
-
-/*************************************************************************
-Tests the conditions which determine when the index segment we are searching
-through has been exhausted. */
-UNIV_INLINE
-ibool
-row_sel_test_end_conds(
-/*===================*/
- /* out: TRUE if row passed the tests */
- plan_t* plan) /* in: plan for the table; the column values must
- already have been retrieved and the right sides of
- comparisons evaluated */
-{
- func_node_t* cond;
-
- /* All conditions in end_conds are comparisons of a column to an
- expression */
-
- cond = UT_LIST_GET_FIRST(plan->end_conds);
-
- while (cond) {
- /* Evaluate the left side of the comparison, i.e., get the
- column value if there is an indirection */
-
- eval_sym(cond->args);
-
- /* Do the comparison */
-
- if (!eval_cmp(cond)) {
-
- return(FALSE);
- }
-
- cond = UT_LIST_GET_NEXT(cond_list, cond);
- }
-
- return(TRUE);
-}
-
-/*************************************************************************
-Tests the other conditions. */
-UNIV_INLINE
-ibool
-row_sel_test_other_conds(
-/*=====================*/
- /* out: TRUE if row passed the tests */
- plan_t* plan) /* in: plan for the table; the column values must
- already have been retrieved */
-{
- func_node_t* cond;
-
- cond = UT_LIST_GET_FIRST(plan->other_conds);
-
- while (cond) {
- eval_exp(cond);
-
- if (!eval_node_get_ibool_val(cond)) {
-
- return(FALSE);
- }
-
- cond = UT_LIST_GET_NEXT(cond_list, cond);
- }
-
- return(TRUE);
-}
-
-/*************************************************************************
-Retrieves the clustered index record corresponding to a record in a
-non-clustered index. Does the necessary locking. */
-static
-ulint
-row_sel_get_clust_rec(
-/*==================*/
- /* out: DB_SUCCESS or error code */
- sel_node_t* node, /* in: select_node */
- plan_t* plan, /* in: plan node for table */
- rec_t* rec, /* in: record in a non-clustered index */
- que_thr_t* thr, /* in: query thread */
- rec_t** out_rec,/* out: clustered record or an old version of
- it, NULL if the old version did not exist
- in the read view, i.e., it was a fresh
- inserted version */
- mtr_t* mtr) /* in: mtr used to get access to the
- non-clustered record; the same mtr is used to
- access the clustered index */
-{
- dict_index_t* index;
- rec_t* clust_rec;
- rec_t* old_vers;
- ulint err;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- *out_rec = NULL;
-
- offsets = rec_get_offsets(rec,
- btr_pcur_get_btr_cur(&plan->pcur)->index,
- offsets, ULINT_UNDEFINED, &heap);
-
- row_build_row_ref_fast(plan->clust_ref, plan->clust_map, rec, offsets);
-
- index = dict_table_get_first_index(plan->table);
-
- btr_pcur_open_with_no_init(index, plan->clust_ref, PAGE_CUR_LE,
- node->latch_mode, &(plan->clust_pcur),
- 0, mtr);
-
- clust_rec = btr_pcur_get_rec(&(plan->clust_pcur));
-
- /* Note: only if the search ends up on a non-infimum record is the
- low_match value the real match to the search tuple */
-
- if (!page_rec_is_user_rec(clust_rec)
- || btr_pcur_get_low_match(&(plan->clust_pcur))
- < dict_index_get_n_unique(index)) {
-
- ut_a(rec_get_deleted_flag(rec,
- dict_table_is_comp(plan->table)));
- ut_a(node->read_view);
-
- /* In a rare case it is possible that no clust rec is found
- for a delete-marked secondary index record: if in row0umod.c
- in row_undo_mod_remove_clust_low() we have already removed
- the clust rec, while purge is still cleaning and removing
- secondary index records associated with earlier versions of
- the clustered index record. In that case we know that the
- clustered index record did not exist in the read view of
- trx. */
-
- goto func_exit;
- }
-
- offsets = rec_get_offsets(clust_rec, index, offsets,
- ULINT_UNDEFINED, &heap);
-
- if (!node->read_view) {
- /* Try to place a lock on the index record */
-
- /* If innodb_locks_unsafe_for_binlog option is used
- or this session is using READ COMMITTED isolation level
- we lock only the record, i.e., next-key locking is
- not used. */
- ulint lock_type;
- trx_t* trx;
-
- trx = thr_get_trx(thr);
-
- if (srv_locks_unsafe_for_binlog
- || trx->isolation_level == TRX_ISO_READ_COMMITTED) {
- lock_type = LOCK_REC_NOT_GAP;
- } else {
- lock_type = LOCK_ORDINARY;
- }
-
- err = lock_clust_rec_read_check_and_lock(
- 0, clust_rec, index, offsets,
- node->row_lock_mode, lock_type, thr);
-
- if (err != DB_SUCCESS) {
-
- goto err_exit;
- }
- } else {
- /* This is a non-locking consistent read: if necessary, fetch
- a previous version of the record */
-
- old_vers = NULL;
-
- if (!lock_clust_rec_cons_read_sees(clust_rec, index, offsets,
- node->read_view)) {
-
- err = row_sel_build_prev_vers(
- node->read_view, index, clust_rec,
- &offsets, &heap, &plan->old_vers_heap,
- &old_vers, mtr);
-
- if (err != DB_SUCCESS) {
-
- goto err_exit;
- }
-
- clust_rec = old_vers;
-
- if (clust_rec == NULL) {
- goto func_exit;
- }
- }
-
- /* If we had to go to an earlier version of row or the
- secondary index record is delete marked, then it may be that
- the secondary index record corresponding to clust_rec
- (or old_vers) is not rec; in that case we must ignore
- such row because in our snapshot rec would not have existed.
- Remember that from rec we cannot see directly which transaction
- id corresponds to it: we have to go to the clustered index
- record. A query where we want to fetch all rows where
- the secondary index value is in some interval would return
- a wrong result if we would not drop rows which we come to
- visit through secondary index records that would not really
- exist in our snapshot. */
-
- if ((old_vers
- || rec_get_deleted_flag(rec, dict_table_is_comp(
- plan->table)))
- && !row_sel_sec_rec_is_for_clust_rec(rec, plan->index,
- clust_rec, index)) {
- goto func_exit;
- }
- }
-
- /* Fetch the columns needed in test conditions */
-
- row_sel_fetch_columns(index, clust_rec, offsets,
- UT_LIST_GET_FIRST(plan->columns));
- *out_rec = clust_rec;
-func_exit:
- err = DB_SUCCESS;
-err_exit:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(err);
-}
-
-/*************************************************************************
-Sets a lock on a record. */
-UNIV_INLINE
-ulint
-sel_set_rec_lock(
-/*=============*/
- /* out: DB_SUCCESS or error code */
- rec_t* rec, /* in: record */
- dict_index_t* index, /* in: index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- ulint mode, /* in: lock mode */
- ulint type, /* in: LOCK_ORDINARY, LOCK_GAP, or
- LOC_REC_NOT_GAP */
- que_thr_t* thr) /* in: query thread */
-{
- trx_t* trx;
- ulint err;
-
- trx = thr_get_trx(thr);
-
- if (UT_LIST_GET_LEN(trx->trx_locks) > 10000) {
- if (buf_LRU_buf_pool_running_out()) {
-
- return(DB_LOCK_TABLE_FULL);
- }
- }
-
- if (index->type & DICT_CLUSTERED) {
- err = lock_clust_rec_read_check_and_lock(
- 0, rec, index, offsets, mode, type, thr);
- } else {
- err = lock_sec_rec_read_check_and_lock(
- 0, rec, index, offsets, mode, type, thr);
- }
-
- return(err);
-}
-
-/*************************************************************************
-Opens a pcur to a table index. */
-static
-void
-row_sel_open_pcur(
-/*==============*/
- sel_node_t* node, /* in: select node */
- plan_t* plan, /* in: table plan */
- ibool search_latch_locked,
- /* in: TRUE if the thread currently
- has the search latch locked in
- s-mode */
- mtr_t* mtr) /* in: mtr */
-{
- dict_index_t* index;
- func_node_t* cond;
- que_node_t* exp;
- ulint n_fields;
- ulint has_search_latch = 0; /* RW_S_LATCH or 0 */
- ulint i;
-
- if (search_latch_locked) {
- has_search_latch = RW_S_LATCH;
- }
-
- index = plan->index;
-
- /* Calculate the value of the search tuple: the exact match columns
- get their expressions evaluated when we evaluate the right sides of
- end_conds */
-
- cond = UT_LIST_GET_FIRST(plan->end_conds);
-
- while (cond) {
- eval_exp(que_node_get_next(cond->args));
-
- cond = UT_LIST_GET_NEXT(cond_list, cond);
- }
-
- if (plan->tuple) {
- n_fields = dtuple_get_n_fields(plan->tuple);
-
- if (plan->n_exact_match < n_fields) {
- /* There is a non-exact match field which must be
- evaluated separately */
-
- eval_exp(plan->tuple_exps[n_fields - 1]);
- }
-
- for (i = 0; i < n_fields; i++) {
- exp = plan->tuple_exps[i];
-
- dfield_copy_data(dtuple_get_nth_field(plan->tuple, i),
- que_node_get_val(exp));
- }
-
- /* Open pcur to the index */
-
- btr_pcur_open_with_no_init(index, plan->tuple, plan->mode,
- node->latch_mode, &(plan->pcur),
- has_search_latch, mtr);
- } else {
- /* Open the cursor to the start or the end of the index
- (FALSE: no init) */
-
- btr_pcur_open_at_index_side(plan->asc, index, node->latch_mode,
- &(plan->pcur), FALSE, mtr);
- }
-
- ut_ad(plan->n_rows_prefetched == 0);
- ut_ad(plan->n_rows_fetched == 0);
- ut_ad(plan->cursor_at_end == FALSE);
-
- plan->pcur_is_open = TRUE;
-}
-
-/*************************************************************************
-Restores a stored pcur position to a table index. */
-static
-ibool
-row_sel_restore_pcur_pos(
-/*=====================*/
- /* out: TRUE if the cursor should be moved to
- the next record after we return from this
- function (moved to the previous, in the case
- of a descending cursor) without processing
- again the current cursor record */
- sel_node_t* node, /* in: select node */
- plan_t* plan, /* in: table plan */
- mtr_t* mtr) /* in: mtr */
-{
- ibool equal_position;
- ulint relative_position;
-
- ut_ad(!plan->cursor_at_end);
-
- relative_position = btr_pcur_get_rel_pos(&(plan->pcur));
-
- equal_position = btr_pcur_restore_position(node->latch_mode,
- &(plan->pcur), mtr);
-
- /* If the cursor is traveling upwards, and relative_position is
-
- (1) BTR_PCUR_BEFORE: this is not allowed, as we did not have a lock
- yet on the successor of the page infimum;
- (2) BTR_PCUR_AFTER: btr_pcur_restore_position placed the cursor on the
- first record GREATER than the predecessor of a page supremum; we have
- not yet processed the cursor record: no need to move the cursor to the
- next record;
- (3) BTR_PCUR_ON: btr_pcur_restore_position placed the cursor on the
- last record LESS or EQUAL to the old stored user record; (a) if
- equal_position is FALSE, this means that the cursor is now on a record
- less than the old user record, and we must move to the next record;
- (b) if equal_position is TRUE, then if
- plan->stored_cursor_rec_processed is TRUE, we must move to the next
- record, else there is no need to move the cursor. */
-
- if (plan->asc) {
- if (relative_position == BTR_PCUR_ON) {
-
- if (equal_position) {
-
- return(plan->stored_cursor_rec_processed);
- }
-
- return(TRUE);
- }
-
- ut_ad(relative_position == BTR_PCUR_AFTER
- || relative_position == BTR_PCUR_AFTER_LAST_IN_TREE);
-
- return(FALSE);
- }
-
- /* If the cursor is traveling downwards, and relative_position is
-
- (1) BTR_PCUR_BEFORE: btr_pcur_restore_position placed the cursor on
- the last record LESS than the successor of a page infimum; we have not
- processed the cursor record: no need to move the cursor;
- (2) BTR_PCUR_AFTER: btr_pcur_restore_position placed the cursor on the
- first record GREATER than the predecessor of a page supremum; we have
- processed the cursor record: we should move the cursor to the previous
- record;
- (3) BTR_PCUR_ON: btr_pcur_restore_position placed the cursor on the
- last record LESS or EQUAL to the old stored user record; (a) if
- equal_position is FALSE, this means that the cursor is now on a record
- less than the old user record, and we need not move to the previous
- record; (b) if equal_position is TRUE, then if
- plan->stored_cursor_rec_processed is TRUE, we must move to the previous
- record, else there is no need to move the cursor. */
-
- if (relative_position == BTR_PCUR_BEFORE
- || relative_position == BTR_PCUR_BEFORE_FIRST_IN_TREE) {
-
- return(FALSE);
- }
-
- if (relative_position == BTR_PCUR_ON) {
-
- if (equal_position) {
-
- return(plan->stored_cursor_rec_processed);
- }
-
- return(FALSE);
- }
-
- ut_ad(relative_position == BTR_PCUR_AFTER
- || relative_position == BTR_PCUR_AFTER_LAST_IN_TREE);
-
- return(TRUE);
-}
-
-/*************************************************************************
-Resets a plan cursor to a closed state. */
-UNIV_INLINE
-void
-plan_reset_cursor(
-/*==============*/
- plan_t* plan) /* in: plan */
-{
- plan->pcur_is_open = FALSE;
- plan->cursor_at_end = FALSE;
- plan->n_rows_fetched = 0;
- plan->n_rows_prefetched = 0;
-}
-
-/*************************************************************************
-Tries to do a shortcut to fetch a clustered index record with a unique key,
-using the hash index if possible (not always). */
-static
-ulint
-row_sel_try_search_shortcut(
-/*========================*/
- /* out: SEL_FOUND, SEL_EXHAUSTED, SEL_RETRY */
- sel_node_t* node, /* in: select node for a consistent read */
- plan_t* plan, /* in: plan for a unique search in clustered
- index */
- mtr_t* mtr) /* in: mtr */
-{
- dict_index_t* index;
- rec_t* rec;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- ulint ret;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- index = plan->index;
-
- ut_ad(node->read_view);
- ut_ad(plan->unique_search);
- ut_ad(!plan->must_get_clust);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
- row_sel_open_pcur(node, plan, TRUE, mtr);
-
- rec = btr_pcur_get_rec(&(plan->pcur));
-
- if (!page_rec_is_user_rec(rec)) {
-
- return(SEL_RETRY);
- }
-
- ut_ad(plan->mode == PAGE_CUR_GE);
-
- /* As the cursor is now placed on a user record after a search with
- the mode PAGE_CUR_GE, the up_match field in the cursor tells how many
- fields in the user record matched to the search tuple */
-
- if (btr_pcur_get_up_match(&(plan->pcur)) < plan->n_exact_match) {
-
- return(SEL_EXHAUSTED);
- }
-
- /* This is a non-locking consistent read: if necessary, fetch
- a previous version of the record */
-
- offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
-
- if (index->type & DICT_CLUSTERED) {
- if (!lock_clust_rec_cons_read_sees(rec, index, offsets,
- node->read_view)) {
- ret = SEL_RETRY;
- goto func_exit;
- }
- } else if (!lock_sec_rec_cons_read_sees(rec, index, node->read_view)) {
-
- ret = SEL_RETRY;
- goto func_exit;
- }
-
- /* Test deleted flag. Fetch the columns needed in test conditions. */
-
- row_sel_fetch_columns(index, rec, offsets,
- UT_LIST_GET_FIRST(plan->columns));
-
- if (rec_get_deleted_flag(rec, dict_table_is_comp(plan->table))) {
-
- ret = SEL_EXHAUSTED;
- goto func_exit;
- }
-
- /* Test the rest of search conditions */
-
- if (!row_sel_test_other_conds(plan)) {
-
- ret = SEL_EXHAUSTED;
- goto func_exit;
- }
-
- ut_ad(plan->pcur.latch_mode == node->latch_mode);
-
- plan->n_rows_fetched++;
- ret = SEL_FOUND;
-func_exit:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(ret);
-}
-
-/*************************************************************************
-Performs a select step. */
-static
-ulint
-row_sel(
-/*====*/
- /* out: DB_SUCCESS or error code */
- sel_node_t* node, /* in: select node */
- que_thr_t* thr) /* in: query thread */
-{
- dict_index_t* index;
- plan_t* plan;
- mtr_t mtr;
- ibool moved;
- rec_t* rec;
- rec_t* old_vers;
- rec_t* clust_rec;
- ibool search_latch_locked;
- ibool consistent_read;
-
- /* The following flag becomes TRUE when we are doing a
- consistent read from a non-clustered index and we must look
- at the clustered index to find out the previous delete mark
- state of the non-clustered record: */
-
- ibool cons_read_requires_clust_rec = FALSE;
- ulint cost_counter = 0;
- ibool cursor_just_opened;
- ibool must_go_to_next;
- ibool leaf_contains_updates = FALSE;
- /* TRUE if select_will_do_update is
- TRUE and the current clustered index
- leaf page has been updated during
- the current mtr: mtr must be committed
- at the same time as the leaf x-latch
- is released */
- ibool mtr_has_extra_clust_latch = FALSE;
- /* TRUE if the search was made using
- a non-clustered index, and we had to
- access the clustered record: now &mtr
- contains a clustered index latch, and
- &mtr must be committed before we move
- to the next non-clustered record */
- ulint found_flag;
- ulint err;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- ut_ad(thr->run_node == node);
-
- search_latch_locked = FALSE;
-
- if (node->read_view) {
- /* In consistent reads, we try to do with the hash index and
- not to use the buffer page get. This is to reduce memory bus
- load resulting from semaphore operations. The search latch
- will be s-locked when we access an index with a unique search
- condition, but not locked when we access an index with a
- less selective search condition. */
-
- consistent_read = TRUE;
- } else {
- consistent_read = FALSE;
- }
-
-table_loop:
- /* TABLE LOOP
- ----------
- This is the outer major loop in calculating a join. We come here when
- node->fetch_table changes, and after adding a row to aggregate totals
- and, of course, when this function is called. */
-
- ut_ad(leaf_contains_updates == FALSE);
- ut_ad(mtr_has_extra_clust_latch == FALSE);
-
- plan = sel_node_get_nth_plan(node, node->fetch_table);
- index = plan->index;
-
- if (plan->n_rows_prefetched > 0) {
- sel_pop_prefetched_row(plan);
-
- goto next_table_no_mtr;
- }
-
- if (plan->cursor_at_end) {
- /* The cursor has already reached the result set end: no more
- rows to process for this table cursor, as also the prefetch
- stack was empty */
-
- ut_ad(plan->pcur_is_open);
-
- goto table_exhausted_no_mtr;
- }
-
- /* Open a cursor to index, or restore an open cursor position */
-
- mtr_start(&mtr);
-
- if (consistent_read && plan->unique_search && !plan->pcur_is_open
- && !plan->must_get_clust
- && !plan->table->big_rows) {
- if (!search_latch_locked) {
- rw_lock_s_lock(&btr_search_latch);
-
- search_latch_locked = TRUE;
- } else if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_WAIT_EX) {
-
- /* There is an x-latch request waiting: release the
- s-latch for a moment; as an s-latch here is often
- kept for some 10 searches before being released,
- a waiting x-latch request would block other threads
- from acquiring an s-latch for a long time, lowering
- performance significantly in multiprocessors. */
-
- rw_lock_s_unlock(&btr_search_latch);
- rw_lock_s_lock(&btr_search_latch);
- }
-
- found_flag = row_sel_try_search_shortcut(node, plan, &mtr);
-
- if (found_flag == SEL_FOUND) {
-
- goto next_table;
-
- } else if (found_flag == SEL_EXHAUSTED) {
-
- goto table_exhausted;
- }
-
- ut_ad(found_flag == SEL_RETRY);
-
- plan_reset_cursor(plan);
-
- mtr_commit(&mtr);
- mtr_start(&mtr);
- }
-
- if (search_latch_locked) {
- rw_lock_s_unlock(&btr_search_latch);
-
- search_latch_locked = FALSE;
- }
-
- if (!plan->pcur_is_open) {
- /* Evaluate the expressions to build the search tuple and
- open the cursor */
-
- row_sel_open_pcur(node, plan, search_latch_locked, &mtr);
-
- cursor_just_opened = TRUE;
-
- /* A new search was made: increment the cost counter */
- cost_counter++;
- } else {
- /* Restore pcur position to the index */
-
- must_go_to_next = row_sel_restore_pcur_pos(node, plan, &mtr);
-
- cursor_just_opened = FALSE;
-
- if (must_go_to_next) {
- /* We have already processed the cursor record: move
- to the next */
-
- goto next_rec;
- }
- }
-
-rec_loop:
- /* RECORD LOOP
- -----------
- In this loop we use pcur and try to fetch a qualifying row, and
- also fill the prefetch buffer for this table if n_rows_fetched has
- exceeded a threshold. While we are inside this loop, the following
- holds:
- (1) &mtr is started,
- (2) pcur is positioned and open.
-
- NOTE that if cursor_just_opened is TRUE here, it means that we came
- to this point right after row_sel_open_pcur. */
-
- ut_ad(mtr_has_extra_clust_latch == FALSE);
-
- rec = btr_pcur_get_rec(&(plan->pcur));
-
- /* PHASE 1: Set a lock if specified */
-
- if (!node->asc && cursor_just_opened
- && !page_rec_is_supremum(rec)) {
-
- /* When we open a cursor for a descending search, we must set
- a next-key lock on the successor record: otherwise it would
- be possible to insert new records next to the cursor position,
- and it might be that these new records should appear in the
- search result set, resulting in the phantom problem. */
-
- if (!consistent_read) {
-
- /* If innodb_locks_unsafe_for_binlog option is used
- or this session is using READ COMMITTED isolation
- level, we lock only the record, i.e., next-key
- locking is not used. */
-
- rec_t* next_rec = page_rec_get_next(rec);
- ulint lock_type;
- trx_t* trx;
-
- trx = thr_get_trx(thr);
-
- offsets = rec_get_offsets(next_rec, index, offsets,
- ULINT_UNDEFINED, &heap);
-
- if (srv_locks_unsafe_for_binlog
- || trx->isolation_level
- == TRX_ISO_READ_COMMITTED) {
-
- if (page_rec_is_supremum(next_rec)) {
-
- goto skip_lock;
- }
-
- lock_type = LOCK_REC_NOT_GAP;
- } else {
- lock_type = LOCK_ORDINARY;
- }
-
- err = sel_set_rec_lock(next_rec, index, offsets,
- node->row_lock_mode,
- lock_type, thr);
-
- if (err != DB_SUCCESS) {
- /* Note that in this case we will store in pcur
- the PREDECESSOR of the record we are waiting
- the lock for */
-
- goto lock_wait_or_error;
- }
- }
- }
-
-skip_lock:
- if (page_rec_is_infimum(rec)) {
-
- /* The infimum record on a page cannot be in the result set,
- and neither can a record lock be placed on it: we skip such
- a record. We also increment the cost counter as we may have
- processed yet another page of index. */
-
- cost_counter++;
-
- goto next_rec;
- }
-
- if (!consistent_read) {
- /* Try to place a lock on the index record */
-
- /* If innodb_locks_unsafe_for_binlog option is used
- or this session is using READ COMMITTED isolation level,
- we lock only the record, i.e., next-key locking is
- not used. */
-
- ulint lock_type;
- trx_t* trx;
-
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
-
- trx = thr_get_trx(thr);
-
- if (srv_locks_unsafe_for_binlog
- || trx->isolation_level == TRX_ISO_READ_COMMITTED) {
-
- if (page_rec_is_supremum(rec)) {
-
- goto next_rec;
- }
-
- lock_type = LOCK_REC_NOT_GAP;
- } else {
- lock_type = LOCK_ORDINARY;
- }
-
- err = sel_set_rec_lock(rec, index, offsets,
- node->row_lock_mode, lock_type, thr);
-
- if (err != DB_SUCCESS) {
-
- goto lock_wait_or_error;
- }
- }
-
- if (page_rec_is_supremum(rec)) {
-
- /* A page supremum record cannot be in the result set: skip
- it now when we have placed a possible lock on it */
-
- goto next_rec;
- }
-
- ut_ad(page_rec_is_user_rec(rec));
-
- if (cost_counter > SEL_COST_LIMIT) {
-
- /* Now that we have placed the necessary locks, we can stop
- for a while and store the cursor position; NOTE that if we
- would store the cursor position BEFORE placing a record lock,
- it might happen that the cursor would jump over some records
- that another transaction could meanwhile insert adjacent to
- the cursor: this would result in the phantom problem. */
-
- goto stop_for_a_while;
- }
-
- /* PHASE 2: Check a mixed index mix id if needed */
-
- if (plan->unique_search && cursor_just_opened) {
-
- ut_ad(plan->mode == PAGE_CUR_GE);
-
- /* As the cursor is now placed on a user record after a search
- with the mode PAGE_CUR_GE, the up_match field in the cursor
- tells how many fields in the user record matched to the search
- tuple */
-
- if (btr_pcur_get_up_match(&(plan->pcur))
- < plan->n_exact_match) {
- goto table_exhausted;
- }
-
- /* Ok, no need to test end_conds or mix id */
-
- }
-
- /* We are ready to look at a possible new index entry in the result
- set: the cursor is now placed on a user record */
-
- /* PHASE 3: Get previous version in a consistent read */
-
- cons_read_requires_clust_rec = FALSE;
- offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
-
- if (consistent_read) {
- /* This is a non-locking consistent read: if necessary, fetch
- a previous version of the record */
-
- if (index->type & DICT_CLUSTERED) {
-
- if (!lock_clust_rec_cons_read_sees(rec, index, offsets,
- node->read_view)) {
-
- err = row_sel_build_prev_vers(
- node->read_view, index, rec,
- &offsets, &heap, &plan->old_vers_heap,
- &old_vers, &mtr);
-
- if (err != DB_SUCCESS) {
-
- goto lock_wait_or_error;
- }
-
- if (old_vers == NULL) {
- offsets = rec_get_offsets(
- rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- row_sel_fetch_columns(
- index, rec, offsets,
- UT_LIST_GET_FIRST(
- plan->columns));
-
- if (!row_sel_test_end_conds(plan)) {
-
- goto table_exhausted;
- }
-
- goto next_rec;
- }
-
- rec = old_vers;
- }
- } else if (!lock_sec_rec_cons_read_sees(rec, index,
- node->read_view)) {
- cons_read_requires_clust_rec = TRUE;
- }
- }
-
- /* PHASE 4: Test search end conditions and deleted flag */
-
- /* Fetch the columns needed in test conditions */
-
- row_sel_fetch_columns(index, rec, offsets,
- UT_LIST_GET_FIRST(plan->columns));
-
- /* Test the selection end conditions: these can only contain columns
- which already are found in the index, even though the index might be
- non-clustered */
-
- if (plan->unique_search && cursor_just_opened) {
-
- /* No test necessary: the test was already made above */
-
- } else if (!row_sel_test_end_conds(plan)) {
-
- goto table_exhausted;
- }
-
- if (rec_get_deleted_flag(rec, dict_table_is_comp(plan->table))
- && !cons_read_requires_clust_rec) {
-
- /* The record is delete marked: we can skip it if this is
- not a consistent read which might see an earlier version
- of a non-clustered index record */
-
- if (plan->unique_search) {
-
- goto table_exhausted;
- }
-
- goto next_rec;
- }
-
- /* PHASE 5: Get the clustered index record, if needed and if we did
- not do the search using the clustered index */
-
- if (plan->must_get_clust || cons_read_requires_clust_rec) {
-
- /* It was a non-clustered index and we must fetch also the
- clustered index record */
-
- err = row_sel_get_clust_rec(node, plan, rec, thr, &clust_rec,
- &mtr);
- mtr_has_extra_clust_latch = TRUE;
-
- if (err != DB_SUCCESS) {
-
- goto lock_wait_or_error;
- }
-
- /* Retrieving the clustered record required a search:
- increment the cost counter */
-
- cost_counter++;
-
- if (clust_rec == NULL) {
- /* The record did not exist in the read view */
- ut_ad(consistent_read);
-
- goto next_rec;
- }
-
- if (rec_get_deleted_flag(clust_rec,
- dict_table_is_comp(plan->table))) {
-
- /* The record is delete marked: we can skip it */
-
- goto next_rec;
- }
-
- if (node->can_get_updated) {
-
- btr_pcur_store_position(&(plan->clust_pcur), &mtr);
- }
- }
-
- /* PHASE 6: Test the rest of search conditions */
-
- if (!row_sel_test_other_conds(plan)) {
-
- if (plan->unique_search) {
-
- goto table_exhausted;
- }
-
- goto next_rec;
- }
-
- /* PHASE 7: We found a new qualifying row for the current table; push
- the row if prefetch is on, or move to the next table in the join */
-
- plan->n_rows_fetched++;
-
- ut_ad(plan->pcur.latch_mode == node->latch_mode);
-
- if (node->select_will_do_update) {
- /* This is a searched update and we can do the update in-place,
- saving CPU time */
-
- row_upd_in_place_in_select(node, thr, &mtr);
-
- leaf_contains_updates = TRUE;
-
- /* When the database is in the online backup mode, the number
- of log records for a single mtr should be small: increment the
- cost counter to ensure it */
-
- cost_counter += 1 + (SEL_COST_LIMIT / 8);
-
- if (plan->unique_search) {
-
- goto table_exhausted;
- }
-
- goto next_rec;
- }
-
- if ((plan->n_rows_fetched <= SEL_PREFETCH_LIMIT)
- || plan->unique_search || plan->no_prefetch
- || plan->table->big_rows) {
-
- /* No prefetch in operation: go to the next table */
-
- goto next_table;
- }
-
- sel_push_prefetched_row(plan);
-
- if (plan->n_rows_prefetched == SEL_MAX_N_PREFETCH) {
-
- /* The prefetch buffer is now full */
-
- sel_pop_prefetched_row(plan);
-
- goto next_table;
- }
-
-next_rec:
- ut_ad(!search_latch_locked);
-
- if (mtr_has_extra_clust_latch) {
-
- /* We must commit &mtr if we are moving to the next
- non-clustered index record, because we could break the
- latching order if we would access a different clustered
- index page right away without releasing the previous. */
-
- goto commit_mtr_for_a_while;
- }
-
- if (leaf_contains_updates
- && btr_pcur_is_after_last_on_page(&(plan->pcur), &mtr)) {
-
- /* We must commit &mtr if we are moving to a different page,
- because we have done updates to the x-latched leaf page, and
- the latch would be released in btr_pcur_move_to_next, without
- &mtr getting committed there */
-
- ut_ad(node->asc);
-
- goto commit_mtr_for_a_while;
- }
-
- if (node->asc) {
- moved = btr_pcur_move_to_next(&(plan->pcur), &mtr);
- } else {
- moved = btr_pcur_move_to_prev(&(plan->pcur), &mtr);
- }
-
- if (!moved) {
-
- goto table_exhausted;
- }
-
- cursor_just_opened = FALSE;
-
- /* END OF RECORD LOOP
- ------------------ */
- goto rec_loop;
-
-next_table:
- /* We found a record which satisfies the conditions: we can move to
- the next table or return a row in the result set */
-
- ut_ad(btr_pcur_is_on_user_rec(&(plan->pcur), &mtr));
-
- if (plan->unique_search && !node->can_get_updated) {
-
- plan->cursor_at_end = TRUE;
- } else {
- ut_ad(!search_latch_locked);
-
- plan->stored_cursor_rec_processed = TRUE;
-
- btr_pcur_store_position(&(plan->pcur), &mtr);
- }
-
- mtr_commit(&mtr);
-
- leaf_contains_updates = FALSE;
- mtr_has_extra_clust_latch = FALSE;
-
-next_table_no_mtr:
- /* If we use 'goto' to this label, it means that the row was popped
- from the prefetched rows stack, and &mtr is already committed */
-
- if (node->fetch_table + 1 == node->n_tables) {
-
- sel_eval_select_list(node);
-
- if (node->is_aggregate) {
-
- goto table_loop;
- }
-
- sel_assign_into_var_values(node->into_list, node);
-
- thr->run_node = que_node_get_parent(node);
-
- if (search_latch_locked) {
- rw_lock_s_unlock(&btr_search_latch);
- }
-
- err = DB_SUCCESS;
- goto func_exit;
- }
-
- node->fetch_table++;
-
- /* When we move to the next table, we first reset the plan cursor:
- we do not care about resetting it when we backtrack from a table */
-
- plan_reset_cursor(sel_node_get_nth_plan(node, node->fetch_table));
-
- goto table_loop;
-
-table_exhausted:
- /* The table cursor pcur reached the result set end: backtrack to the
- previous table in the join if we do not have cached prefetched rows */
-
- plan->cursor_at_end = TRUE;
-
- mtr_commit(&mtr);
-
- leaf_contains_updates = FALSE;
- mtr_has_extra_clust_latch = FALSE;
-
- if (plan->n_rows_prefetched > 0) {
- /* The table became exhausted during a prefetch */
-
- sel_pop_prefetched_row(plan);
-
- goto next_table_no_mtr;
- }
-
-table_exhausted_no_mtr:
- if (node->fetch_table == 0) {
- err = DB_SUCCESS;
-
- if (node->is_aggregate && !node->aggregate_already_fetched) {
-
- node->aggregate_already_fetched = TRUE;
-
- sel_assign_into_var_values(node->into_list, node);
-
- thr->run_node = que_node_get_parent(node);
-
- if (search_latch_locked) {
- rw_lock_s_unlock(&btr_search_latch);
- }
-
- goto func_exit;
- }
-
- node->state = SEL_NODE_NO_MORE_ROWS;
-
- thr->run_node = que_node_get_parent(node);
-
- if (search_latch_locked) {
- rw_lock_s_unlock(&btr_search_latch);
- }
-
- goto func_exit;
- }
-
- node->fetch_table--;
-
- goto table_loop;
-
-stop_for_a_while:
- /* Return control for a while to que_run_threads, so that runaway
- queries can be canceled. NOTE that when we come here, we must, in a
- locking read, have placed the necessary (possibly waiting request)
- record lock on the cursor record or its successor: when we reposition
- the cursor, this record lock guarantees that nobody can meanwhile have
- inserted new records which should have appeared in the result set,
- which would result in the phantom problem. */
-
- ut_ad(!search_latch_locked);
-
- plan->stored_cursor_rec_processed = FALSE;
- btr_pcur_store_position(&(plan->pcur), &mtr);
-
- mtr_commit(&mtr);
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(sync_thread_levels_empty_gen(TRUE));
-#endif /* UNIV_SYNC_DEBUG */
- err = DB_SUCCESS;
- goto func_exit;
-
-commit_mtr_for_a_while:
- /* Stores the cursor position and commits &mtr; this is used if
- &mtr may contain latches which would break the latching order if
- &mtr would not be committed and the latches released. */
-
- plan->stored_cursor_rec_processed = TRUE;
-
- ut_ad(!search_latch_locked);
- btr_pcur_store_position(&(plan->pcur), &mtr);
-
- mtr_commit(&mtr);
-
- leaf_contains_updates = FALSE;
- mtr_has_extra_clust_latch = FALSE;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(sync_thread_levels_empty_gen(TRUE));
-#endif /* UNIV_SYNC_DEBUG */
-
- goto table_loop;
-
-lock_wait_or_error:
- /* See the note at stop_for_a_while: the same holds for this case */
-
- ut_ad(!btr_pcur_is_before_first_on_page(&(plan->pcur), &mtr)
- || !node->asc);
- ut_ad(!search_latch_locked);
-
- plan->stored_cursor_rec_processed = FALSE;
- btr_pcur_store_position(&(plan->pcur), &mtr);
-
- mtr_commit(&mtr);
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(sync_thread_levels_empty_gen(TRUE));
-#endif /* UNIV_SYNC_DEBUG */
-
-func_exit:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(err);
-}
-
-/**************************************************************************
-Performs a select step. This is a high-level function used in SQL execution
-graphs. */
-
-que_thr_t*
-row_sel_step(
-/*=========*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr) /* in: query thread */
-{
- ulint i_lock_mode;
- sym_node_t* table_node;
- sel_node_t* node;
- ulint err;
-
- ut_ad(thr);
-
- node = thr->run_node;
-
- ut_ad(que_node_get_type(node) == QUE_NODE_SELECT);
-
- /* If this is a new time this node is executed (or when execution
- resumes after wait for a table intention lock), set intention locks
- on the tables, or assign a read view */
-
- if (node->into_list && (thr->prev_node == que_node_get_parent(node))) {
-
- node->state = SEL_NODE_OPEN;
- }
-
- if (node->state == SEL_NODE_OPEN) {
-
- /* It may be that the current session has not yet started
- its transaction, or it has been committed: */
-
- trx_start_if_not_started(thr_get_trx(thr));
-
- plan_reset_cursor(sel_node_get_nth_plan(node, 0));
-
- if (node->consistent_read) {
- /* Assign a read view for the query */
- node->read_view = trx_assign_read_view(
- thr_get_trx(thr));
- } else {
- if (node->set_x_locks) {
- i_lock_mode = LOCK_IX;
- } else {
- i_lock_mode = LOCK_IS;
- }
-
- table_node = node->table_list;
-
- while (table_node) {
- err = lock_table(0, table_node->table,
- i_lock_mode, thr);
- if (err != DB_SUCCESS) {
- thr_get_trx(thr)->error_state = err;
-
- return(NULL);
- }
-
- table_node = que_node_get_next(table_node);
- }
- }
-
- /* If this is an explicit cursor, copy stored procedure
- variable values, so that the values cannot change between
- fetches (currently, we copy them also for non-explicit
- cursors) */
-
- if (node->explicit_cursor
- && UT_LIST_GET_FIRST(node->copy_variables)) {
-
- row_sel_copy_input_variable_vals(node);
- }
-
- node->state = SEL_NODE_FETCH;
- node->fetch_table = 0;
-
- if (node->is_aggregate) {
- /* Reset the aggregate total values */
- sel_reset_aggregate_vals(node);
- }
- }
-
- err = row_sel(node, thr);
-
- /* NOTE! if queries are parallelized, the following assignment may
- have problems; the assignment should be made only if thr is the
- only top-level thr in the graph: */
-
- thr->graph->last_sel_node = node;
-
- if (err != DB_SUCCESS) {
- thr_get_trx(thr)->error_state = err;
-
- return(NULL);
- }
-
- return(thr);
-}
-
-/**************************************************************************
-Performs a fetch for a cursor. */
-
-que_thr_t*
-fetch_step(
-/*=======*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr) /* in: query thread */
-{
- sel_node_t* sel_node;
- fetch_node_t* node;
-
- ut_ad(thr);
-
- node = thr->run_node;
- sel_node = node->cursor_def;
-
- ut_ad(que_node_get_type(node) == QUE_NODE_FETCH);
-
- if (thr->prev_node != que_node_get_parent(node)) {
-
- if (sel_node->state != SEL_NODE_NO_MORE_ROWS) {
-
- if (node->into_list) {
- sel_assign_into_var_values(node->into_list,
- sel_node);
- } else {
- void* ret = (*node->func->func)(
- sel_node, node->func->arg);
-
- if (!ret) {
- sel_node->state
- = SEL_NODE_NO_MORE_ROWS;
- }
- }
- }
-
- thr->run_node = que_node_get_parent(node);
-
- return(thr);
- }
-
- /* Make the fetch node the parent of the cursor definition for
- the time of the fetch, so that execution knows to return to this
- fetch node after a row has been selected or we know that there is
- no row left */
-
- sel_node->common.parent = node;
-
- if (sel_node->state == SEL_NODE_CLOSED) {
- fprintf(stderr,
- "InnoDB: Error: fetch called on a closed cursor\n");
-
- thr_get_trx(thr)->error_state = DB_ERROR;
-
- return(NULL);
- }
-
- thr->run_node = sel_node;
-
- return(thr);
-}
-
-/********************************************************************
-Sample callback function for fetch that prints each row.*/
-
-void*
-row_fetch_print(
-/*============*/
- /* out: always returns non-NULL */
- void* row, /* in: sel_node_t* */
- void* user_arg) /* in: not used */
-{
- sel_node_t* node = row;
- que_node_t* exp;
- ulint i = 0;
-
- UT_NOT_USED(user_arg);
-
- fprintf(stderr, "row_fetch_print: row %p\n", row);
-
- exp = node->select_list;
-
- while (exp) {
- dfield_t* dfield = que_node_get_val(exp);
- dtype_t* type = dfield_get_type(dfield);
-
- fprintf(stderr, " column %lu:\n", (ulong)i);
-
- dtype_print(type);
- fprintf(stderr, "\n");
-
- if (dfield_get_len(dfield) != UNIV_SQL_NULL) {
- ut_print_buf(stderr, dfield_get_data(dfield),
- dfield_get_len(dfield));
- } else {
- fprintf(stderr, " <NULL>;");
- }
-
- fprintf(stderr, "\n");
-
- exp = que_node_get_next(exp);
- i++;
- }
-
- return((void*)42);
-}
-
-/********************************************************************
-Callback function for fetch that stores an unsigned 4 byte integer to the
-location pointed. The column's type must be DATA_INT, DATA_UNSIGNED, length
-= 4. */
-
-void*
-row_fetch_store_uint4(
-/*==================*/
- /* out: always returns NULL */
- void* row, /* in: sel_node_t* */
- void* user_arg) /* in: data pointer */
-{
- sel_node_t* node = row;
- ib_uint32_t* val = user_arg;
- ulint tmp;
-
- dfield_t* dfield = que_node_get_val(node->select_list);
- dtype_t* type = dfield_get_type(dfield);
- ulint len = dfield_get_len(dfield);
-
- ut_a(dtype_get_mtype(type) == DATA_INT);
- ut_a(dtype_get_prtype(type) & DATA_UNSIGNED);
- ut_a(len == 4);
-
- tmp = mach_read_from_4(dfield_get_data(dfield));
- *val = (ib_uint32_t) tmp;
-
- return(NULL);
-}
-
-/***************************************************************
-Prints a row in a select result. */
-
-que_thr_t*
-row_printf_step(
-/*============*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr) /* in: query thread */
-{
- row_printf_node_t* node;
- sel_node_t* sel_node;
- que_node_t* arg;
-
- ut_ad(thr);
-
- node = thr->run_node;
-
- sel_node = node->sel_node;
-
- ut_ad(que_node_get_type(node) == QUE_NODE_ROW_PRINTF);
-
- if (thr->prev_node == que_node_get_parent(node)) {
-
- /* Reset the cursor */
- sel_node->state = SEL_NODE_OPEN;
-
- /* Fetch next row to print */
-
- thr->run_node = sel_node;
-
- return(thr);
- }
-
- if (sel_node->state != SEL_NODE_FETCH) {
-
- ut_ad(sel_node->state == SEL_NODE_NO_MORE_ROWS);
-
- /* No more rows to print */
-
- thr->run_node = que_node_get_parent(node);
-
- return(thr);
- }
-
- arg = sel_node->select_list;
-
- while (arg) {
- dfield_print_also_hex(que_node_get_val(arg));
-
- fputs(" ::: ", stderr);
-
- arg = que_node_get_next(arg);
- }
-
- putc('\n', stderr);
-
- /* Fetch next row to print */
-
- thr->run_node = sel_node;
-
- return(thr);
-}
-
-/********************************************************************
-Converts a key value stored in MySQL format to an Innobase dtuple. The last
-field of the key value may be just a prefix of a fixed length field: hence
-the parameter key_len. But currently we do not allow search keys where the
-last field is only a prefix of the full key field len and print a warning if
-such appears. A counterpart of this function is
-ha_innobase::store_key_val_for_row() in ha_innodb.cc. */
-
-void
-row_sel_convert_mysql_key_to_innobase(
-/*==================================*/
- dtuple_t* tuple, /* in: tuple where to build;
- NOTE: we assume that the type info
- in the tuple is already according
- to index! */
- byte* buf, /* in: buffer to use in field
- conversions */
- ulint buf_len, /* in: buffer length */
- dict_index_t* index, /* in: index of the key value */
- byte* key_ptr, /* in: MySQL key value */
- ulint key_len, /* in: MySQL key value length */
- trx_t* trx) /* in: transaction */
-{
- byte* original_buf = buf;
- byte* original_key_ptr = key_ptr;
- dict_field_t* field;
- dfield_t* dfield;
- ulint data_offset;
- ulint data_len;
- ulint data_field_len;
- ibool is_null;
- byte* key_end;
- ulint n_fields = 0;
- ulint type;
-
- /* For documentation of the key value storage format in MySQL, see
- ha_innobase::store_key_val_for_row() in ha_innodb.cc. */
-
- key_end = key_ptr + key_len;
-
- /* Permit us to access any field in the tuple (ULINT_MAX): */
-
- dtuple_set_n_fields(tuple, ULINT_MAX);
-
- dfield = dtuple_get_nth_field(tuple, 0);
- field = dict_index_get_nth_field(index, 0);
-
- if (dfield_get_type(dfield)->mtype == DATA_SYS) {
- /* A special case: we are looking for a position in the
- generated clustered index which InnoDB automatically added
- to a table with no primary key: the first and the only
- ordering column is ROW_ID which InnoDB stored to the key_ptr
- buffer. */
-
- ut_a(key_len == DATA_ROW_ID_LEN);
-
- dfield_set_data(dfield, key_ptr, DATA_ROW_ID_LEN);
-
- dtuple_set_n_fields(tuple, 1);
-
- return;
- }
-
- while (key_ptr < key_end) {
-
- ut_a(field->col->mtype == dfield_get_type(dfield)->mtype);
-
- data_offset = 0;
- is_null = FALSE;
-
- if (!(dfield_get_type(dfield)->prtype & DATA_NOT_NULL)) {
- /* The first byte in the field tells if this is
- an SQL NULL value */
-
- data_offset = 1;
-
- if (*key_ptr != 0) {
- dfield_set_data(dfield, NULL, UNIV_SQL_NULL);
-
- is_null = TRUE;
- }
- }
-
- type = dfield_get_type(dfield)->mtype;
-
- /* Calculate data length and data field total length */
-
- if (type == DATA_BLOB) {
- /* The key field is a column prefix of a BLOB or
- TEXT */
-
- ut_a(field->prefix_len > 0);
-
- /* MySQL stores the actual data length to the first 2
- bytes after the optional SQL NULL marker byte. The
- storage format is little-endian, that is, the most
- significant byte at a higher address. In UTF-8, MySQL
- seems to reserve field->prefix_len bytes for
- storing this field in the key value buffer, even
- though the actual value only takes data_len bytes
- from the start. */
-
- data_len = key_ptr[data_offset]
- + 256 * key_ptr[data_offset + 1];
- data_field_len = data_offset + 2 + field->prefix_len;
-
- data_offset += 2;
-
- /* Now that we know the length, we store the column
- value like it would be a fixed char field */
-
- } else if (field->prefix_len > 0) {
- /* Looks like MySQL pads unused end bytes in the
- prefix with space. Therefore, also in UTF-8, it is ok
- to compare with a prefix containing full prefix_len
- bytes, and no need to take at most prefix_len / 3
- UTF-8 characters from the start.
- If the prefix is used as the upper end of a LIKE
- 'abc%' query, then MySQL pads the end with chars
- 0xff. TODO: in that case does it any harm to compare
- with the full prefix_len bytes. How do characters
- 0xff in UTF-8 behave? */
-
- data_len = field->prefix_len;
- data_field_len = data_offset + data_len;
- } else {
- data_len = dfield_get_type(dfield)->len;
- data_field_len = data_offset + data_len;
- }
-
- if (dtype_get_mysql_type(dfield_get_type(dfield))
- == DATA_MYSQL_TRUE_VARCHAR
- && dfield_get_type(dfield)->mtype != DATA_INT) {
- /* In a MySQL key value format, a true VARCHAR is
- always preceded by 2 bytes of a length field.
- dfield_get_type(dfield)->len returns the maximum
- 'payload' len in bytes. That does not include the
- 2 bytes that tell the actual data length.
-
- We added the check != DATA_INT to make sure we do
- not treat MySQL ENUM or SET as a true VARCHAR! */
-
- data_len += 2;
- data_field_len += 2;
- }
-
- /* Storing may use at most data_len bytes of buf */
-
- if (!is_null) {
- row_mysql_store_col_in_innobase_format(
- dfield, buf,
- FALSE, /* MySQL key value format col */
- key_ptr + data_offset, data_len,
- dict_table_is_comp(index->table));
- buf += data_len;
- }
-
- key_ptr += data_field_len;
-
- if (key_ptr > key_end) {
- /* The last field in key was not a complete key field
- but a prefix of it.
-
- Print a warning about this! HA_READ_PREFIX_LAST does
- not currently work in InnoDB with partial-field key
- value prefixes. Since MySQL currently uses a padding
- trick to calculate LIKE 'abc%' type queries there
- should never be partial-field prefixes in searches. */
-
- ut_print_timestamp(stderr);
-
- fputs(" InnoDB: Warning: using a partial-field"
- " key prefix in search.\n"
- "InnoDB: ", stderr);
- dict_index_name_print(stderr, trx, index);
- fprintf(stderr, ". Last data field length %lu bytes,\n"
- "InnoDB: key ptr now exceeds"
- " key end by %lu bytes.\n"
- "InnoDB: Key value in the MySQL format:\n",
- (ulong) data_field_len,
- (ulong) (key_ptr - key_end));
- fflush(stderr);
- ut_print_buf(stderr, original_key_ptr, key_len);
- fprintf(stderr, "\n");
-
- if (!is_null) {
- dfield->len -= (ulint)(key_ptr - key_end);
- }
- }
-
- n_fields++;
- field++;
- dfield++;
- }
-
- ut_a(buf <= original_buf + buf_len);
-
- /* We set the length of tuple to n_fields: we assume that the memory
- area allocated for it is big enough (usually bigger than n_fields). */
-
- dtuple_set_n_fields(tuple, n_fields);
-}
-
-/******************************************************************
-Stores the row id to the prebuilt struct. */
-static
-void
-row_sel_store_row_id_to_prebuilt(
-/*=============================*/
- row_prebuilt_t* prebuilt, /* in: prebuilt */
- rec_t* index_rec, /* in: record */
- dict_index_t* index, /* in: index of the record */
- const ulint* offsets) /* in: rec_get_offsets
- (index_rec, index) */
-{
- byte* data;
- ulint len;
-
- ut_ad(rec_offs_validate(index_rec, index, offsets));
-
- data = rec_get_nth_field(
- index_rec, offsets,
- dict_index_get_sys_col_pos(index, DATA_ROW_ID), &len);
-
- if (len != DATA_ROW_ID_LEN) {
- fprintf(stderr,
- "InnoDB: Error: Row id field is"
- " wrong length %lu in ", (ulong) len);
- dict_index_name_print(stderr, prebuilt->trx, index);
- fprintf(stderr, "\n"
- "InnoDB: Field number %lu, record:\n",
- (ulong) dict_index_get_sys_col_pos(index,
- DATA_ROW_ID));
- rec_print_new(stderr, index_rec, offsets);
- putc('\n', stderr);
- ut_error;
- }
-
- ut_memcpy(prebuilt->row_id, data, len);
-}
-
-/******************************************************************
-Stores a non-SQL-NULL field in the MySQL format. The counterpart of this
-function is row_mysql_store_col_in_innobase_format() in row0mysql.c. */
-static
-void
-row_sel_field_store_in_mysql_format(
-/*================================*/
- byte* dest, /* in/out: buffer where to store; NOTE that BLOBs
- are not in themselves stored here: the caller must
- allocate and copy the BLOB into buffer before, and pass
- the pointer to the BLOB in 'data' */
- const mysql_row_templ_t* templ, /* in: MySQL column template.
- Its following fields are referenced:
- type, is_unsigned, mysql_col_len, mbminlen, mbmaxlen */
- byte* data, /* in: data to store */
- ulint len) /* in: length of the data */
-{
- byte* ptr;
- byte* field_end;
- byte* pad_ptr;
-
- ut_ad(len != UNIV_SQL_NULL);
-
- if (templ->type == DATA_INT) {
- /* Convert integer data from Innobase to a little-endian
- format, sign bit restored to normal */
-
- ptr = dest + len;
-
- for (;;) {
- ptr--;
- *ptr = *data;
- if (ptr == dest) {
- break;
- }
- data++;
- }
-
- if (!templ->is_unsigned) {
- dest[len - 1] = (byte) (dest[len - 1] ^ 128);
- }
-
- ut_ad(templ->mysql_col_len == len);
- } else if (templ->type == DATA_VARCHAR
- || templ->type == DATA_VARMYSQL
- || templ->type == DATA_BINARY) {
-
- field_end = dest + templ->mysql_col_len;
-
- if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) {
- /* This is a >= 5.0.3 type true VARCHAR. Store the
- length of the data to the first byte or the first
- two bytes of dest. */
-
- dest = row_mysql_store_true_var_len(
- dest, len, templ->mysql_length_bytes);
- }
-
- /* Copy the actual data */
- ut_memcpy(dest, data, len);
-
- /* Pad with trailing spaces. We pad with spaces also the
- unused end of a >= 5.0.3 true VARCHAR column, just in case
- MySQL expects its contents to be deterministic. */
-
- pad_ptr = dest + len;
-
- ut_ad(templ->mbminlen <= templ->mbmaxlen);
-
- /* We handle UCS2 charset strings differently. */
- if (templ->mbminlen == 2) {
- /* A space char is two bytes, 0x0020 in UCS2 */
-
- if (len & 1) {
- /* A 0x20 has been stripped from the column.
- Pad it back. */
-
- if (pad_ptr < field_end) {
- *pad_ptr = 0x20;
- pad_ptr++;
- }
- }
-
- /* Pad the rest of the string with 0x0020 */
-
- while (pad_ptr < field_end) {
- *pad_ptr = 0x00;
- pad_ptr++;
- *pad_ptr = 0x20;
- pad_ptr++;
- }
- } else {
- ut_ad(templ->mbminlen == 1);
- /* space=0x20 */
-
- memset(pad_ptr, 0x20, field_end - pad_ptr);
- }
- } else if (templ->type == DATA_BLOB) {
- /* Store a pointer to the BLOB buffer to dest: the BLOB was
- already copied to the buffer in row_sel_store_mysql_rec */
-
- row_mysql_store_blob_ref(dest, templ->mysql_col_len, data,
- len);
- } else if (templ->type == DATA_MYSQL) {
- memcpy(dest, data, len);
-
- ut_ad(templ->mysql_col_len >= len);
- ut_ad(templ->mbmaxlen >= templ->mbminlen);
-
- ut_ad(templ->mbmaxlen > templ->mbminlen
- || templ->mysql_col_len == len);
- /* The following assertion would fail for old tables
- containing UTF-8 ENUM columns due to Bug #9526. */
- ut_ad(!templ->mbmaxlen
- || !(templ->mysql_col_len % templ->mbmaxlen));
- ut_ad(len * templ->mbmaxlen >= templ->mysql_col_len);
-
- if (templ->mbminlen != templ->mbmaxlen) {
- /* Pad with spaces. This undoes the stripping
- done in row0mysql.ic, function
- row_mysql_store_col_in_innobase_format(). */
-
- memset(dest + len, 0x20, templ->mysql_col_len - len);
- }
- } else {
- ut_ad(templ->type == DATA_CHAR
- || templ->type == DATA_FIXBINARY
- /*|| templ->type == DATA_SYS_CHILD
- || templ->type == DATA_SYS*/
- || templ->type == DATA_FLOAT
- || templ->type == DATA_DOUBLE
- || templ->type == DATA_DECIMAL);
- ut_ad(templ->mysql_col_len == len);
-
- memcpy(dest, data, len);
- }
-}
-
-/******************************************************************
-Convert a row in the Innobase format to a row in the MySQL format.
-Note that the template in prebuilt may advise us to copy only a few
-columns to mysql_rec, other columns are left blank. All columns may not
-be needed in the query. */
-static
-ibool
-row_sel_store_mysql_rec(
-/*====================*/
- /* out: TRUE if success, FALSE if
- could not allocate memory for a BLOB
- (though we may also assert in that
- case) */
- byte* mysql_rec, /* out: row in the MySQL format */
- row_prebuilt_t* prebuilt, /* in: prebuilt struct */
- rec_t* rec, /* in: Innobase record in the index
- which was described in prebuilt's
- template */
- const ulint* offsets) /* in: array returned by
- rec_get_offsets() */
-{
- mysql_row_templ_t* templ;
- mem_heap_t* extern_field_heap = NULL;
- mem_heap_t* heap;
- byte* data;
- ulint len;
- ulint i;
-
- ut_ad(prebuilt->mysql_template);
- ut_ad(prebuilt->default_rec);
- ut_ad(rec_offs_validate(rec, NULL, offsets));
-
- if (UNIV_LIKELY_NULL(prebuilt->blob_heap)) {
- mem_heap_free(prebuilt->blob_heap);
- prebuilt->blob_heap = NULL;
- }
-
- for (i = 0; i < prebuilt->n_template; i++) {
-
- templ = prebuilt->mysql_template + i;
-
- if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets,
- templ->rec_field_no))) {
-
- /* Copy an externally stored field to the temporary
- heap */
-
- ut_a(!prebuilt->trx->has_search_latch);
-
- if (UNIV_UNLIKELY(templ->type == DATA_BLOB)) {
- if (prebuilt->blob_heap == NULL) {
- prebuilt->blob_heap = mem_heap_create(
- UNIV_PAGE_SIZE);
- }
-
- heap = prebuilt->blob_heap;
- } else {
- extern_field_heap
- = mem_heap_create(UNIV_PAGE_SIZE);
-
- heap = extern_field_heap;
- }
-
- /* NOTE: if we are retrieving a big BLOB, we may
- already run out of memory in the next call, which
- causes an assert */
-
- data = btr_rec_copy_externally_stored_field(
- rec, offsets, templ->rec_field_no,
- &len, heap);
-
- ut_a(len != UNIV_SQL_NULL);
- } else {
- /* Field is stored in the row. */
-
- data = rec_get_nth_field(rec, offsets,
- templ->rec_field_no, &len);
-
- if (UNIV_UNLIKELY(templ->type == DATA_BLOB)
- && len != UNIV_SQL_NULL) {
-
- /* It is a BLOB field locally stored in the
- InnoDB record: we MUST copy its contents to
- prebuilt->blob_heap here because later code
- assumes all BLOB values have been copied to a
- safe place. */
-
- if (prebuilt->blob_heap == NULL) {
- prebuilt->blob_heap = mem_heap_create(
- UNIV_PAGE_SIZE);
- }
-
- data = memcpy(mem_heap_alloc(
- prebuilt->blob_heap, len),
- data, len);
- }
- }
-
- if (len != UNIV_SQL_NULL) {
- row_sel_field_store_in_mysql_format(
- mysql_rec + templ->mysql_col_offset,
- templ, data, len);
-
- /* Cleanup */
- if (extern_field_heap) {
- mem_heap_free(extern_field_heap);
- extern_field_heap = NULL;
- }
-
- if (templ->mysql_null_bit_mask) {
- /* It is a nullable column with a non-NULL
- value */
- mysql_rec[templ->mysql_null_byte_offset]
- &= ~(byte) templ->mysql_null_bit_mask;
- }
- } else {
- /* MySQL assumes that the field for an SQL
- NULL value is set to the default value. */
-
- mysql_rec[templ->mysql_null_byte_offset]
- |= (byte) templ->mysql_null_bit_mask;
- memcpy(mysql_rec + templ->mysql_col_offset,
- prebuilt->default_rec + templ->mysql_col_offset,
- templ->mysql_col_len);
- }
- }
-
- return(TRUE);
-}
-
-/*************************************************************************
-Builds a previous version of a clustered index record for a consistent read */
-static
-ulint
-row_sel_build_prev_vers_for_mysql(
-/*==============================*/
- /* out: DB_SUCCESS or error code */
- read_view_t* read_view, /* in: read view */
- dict_index_t* clust_index, /* in: clustered index */
- row_prebuilt_t* prebuilt, /* in: prebuilt struct */
- rec_t* rec, /* in: record in a clustered index */
- ulint** offsets, /* in/out: offsets returned by
- rec_get_offsets(rec, clust_index) */
- mem_heap_t** offset_heap, /* in/out: memory heap from which
- the offsets are allocated */
- rec_t** old_vers, /* out: old version, or NULL if the
- record does not exist in the view:
- i.e., it was freshly inserted
- afterwards */
- mtr_t* mtr) /* in: mtr */
-{
- ulint err;
-
- if (prebuilt->old_vers_heap) {
- mem_heap_empty(prebuilt->old_vers_heap);
- } else {
- prebuilt->old_vers_heap = mem_heap_create(200);
- }
-
- err = row_vers_build_for_consistent_read(
- rec, mtr, clust_index, offsets, read_view, offset_heap,
- prebuilt->old_vers_heap, old_vers);
- return(err);
-}
-
-/*************************************************************************
-Retrieves the clustered index record corresponding to a record in a
-non-clustered index. Does the necessary locking. Used in the MySQL
-interface. */
-static
-ulint
-row_sel_get_clust_rec_for_mysql(
-/*============================*/
- /* out: DB_SUCCESS or error code */
- row_prebuilt_t* prebuilt,/* in: prebuilt struct in the handle */
- dict_index_t* sec_index,/* in: secondary index where rec resides */
- rec_t* rec, /* in: record in a non-clustered index; if
- this is a locking read, then rec is not
- allowed to be delete-marked, and that would
- not make sense either */
- que_thr_t* thr, /* in: query thread */
- rec_t** out_rec,/* out: clustered record or an old version of
- it, NULL if the old version did not exist
- in the read view, i.e., it was a fresh
- inserted version */
- ulint** offsets,/* out: offsets returned by
- rec_get_offsets(out_rec, clust_index) */
- mem_heap_t** offset_heap,/* in/out: memory heap from which
- the offsets are allocated */
- mtr_t* mtr) /* in: mtr used to get access to the
- non-clustered record; the same mtr is used to
- access the clustered index */
-{
- dict_index_t* clust_index;
- rec_t* clust_rec;
- rec_t* old_vers;
- ulint err;
- trx_t* trx;
-
- *out_rec = NULL;
- trx = thr_get_trx(thr);
-
- row_build_row_ref_in_tuple(prebuilt->clust_ref, sec_index, rec, trx);
-
- clust_index = dict_table_get_first_index(sec_index->table);
-
- btr_pcur_open_with_no_init(clust_index, prebuilt->clust_ref,
- PAGE_CUR_LE, BTR_SEARCH_LEAF,
- prebuilt->clust_pcur, 0, mtr);
-
- clust_rec = btr_pcur_get_rec(prebuilt->clust_pcur);
-
- prebuilt->clust_pcur->trx_if_known = trx;
-
- /* Note: only if the search ends up on a non-infimum record is the
- low_match value the real match to the search tuple */
-
- if (!page_rec_is_user_rec(clust_rec)
- || btr_pcur_get_low_match(prebuilt->clust_pcur)
- < dict_index_get_n_unique(clust_index)) {
-
- /* In a rare case it is possible that no clust rec is found
- for a delete-marked secondary index record: if in row0umod.c
- in row_undo_mod_remove_clust_low() we have already removed
- the clust rec, while purge is still cleaning and removing
- secondary index records associated with earlier versions of
- the clustered index record. In that case we know that the
- clustered index record did not exist in the read view of
- trx. */
-
- if (!rec_get_deleted_flag(rec,
- dict_table_is_comp(sec_index->table))
- || prebuilt->select_lock_type != LOCK_NONE) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: error clustered record"
- " for sec rec not found\n"
- "InnoDB: ", stderr);
- dict_index_name_print(stderr, trx, sec_index);
- fputs("\n"
- "InnoDB: sec index record ", stderr);
- rec_print(stderr, rec, sec_index);
- fputs("\n"
- "InnoDB: clust index record ", stderr);
- rec_print(stderr, clust_rec, clust_index);
- putc('\n', stderr);
- trx_print(stderr, trx, 600);
-
- fputs("\n"
- "InnoDB: Submit a detailed bug report"
- " to http://bugs.mysql.com\n", stderr);
- }
-
- clust_rec = NULL;
-
- goto func_exit;
- }
-
- *offsets = rec_get_offsets(clust_rec, clust_index, *offsets,
- ULINT_UNDEFINED, offset_heap);
-
- if (prebuilt->select_lock_type != LOCK_NONE) {
- /* Try to place a lock on the index record; we are searching
- the clust rec with a unique condition, hence
- we set a LOCK_REC_NOT_GAP type lock */
-
- err = lock_clust_rec_read_check_and_lock(
- 0, clust_rec, clust_index, *offsets,
- prebuilt->select_lock_type, LOCK_REC_NOT_GAP, thr);
- if (err != DB_SUCCESS) {
-
- goto err_exit;
- }
- } else {
- /* This is a non-locking consistent read: if necessary, fetch
- a previous version of the record */
-
- old_vers = NULL;
-
- /* If the isolation level allows reading of uncommitted data,
- then we never look for an earlier version */
-
- if (trx->isolation_level > TRX_ISO_READ_UNCOMMITTED
- && !lock_clust_rec_cons_read_sees(
- clust_rec, clust_index, *offsets,
- trx->read_view)) {
-
- /* The following call returns 'offsets' associated with
- 'old_vers' */
- err = row_sel_build_prev_vers_for_mysql(
- trx->read_view, clust_index, prebuilt,
- clust_rec, offsets, offset_heap, &old_vers,
- mtr);
-
- if (err != DB_SUCCESS) {
-
- goto err_exit;
- }
-
- clust_rec = old_vers;
- }
-
- /* If we had to go to an earlier version of row or the
- secondary index record is delete marked, then it may be that
- the secondary index record corresponding to clust_rec
- (or old_vers) is not rec; in that case we must ignore
- such row because in our snapshot rec would not have existed.
- Remember that from rec we cannot see directly which transaction
- id corresponds to it: we have to go to the clustered index
- record. A query where we want to fetch all rows where
- the secondary index value is in some interval would return
- a wrong result if we would not drop rows which we come to
- visit through secondary index records that would not really
- exist in our snapshot. */
-
- if (clust_rec && (old_vers || rec_get_deleted_flag(
- rec,
- dict_table_is_comp(
- sec_index->table)))
- && !row_sel_sec_rec_is_for_clust_rec(
- rec, sec_index, clust_rec, clust_index)) {
- clust_rec = NULL;
- } else {
-#ifdef UNIV_SEARCH_DEBUG
- ut_a(clust_rec == NULL
- || row_sel_sec_rec_is_for_clust_rec(
- rec, sec_index, clust_rec, clust_index));
-#endif
- }
- }
-
-func_exit:
- *out_rec = clust_rec;
-
- if (prebuilt->select_lock_type != LOCK_NONE) {
- /* We may use the cursor in update or in unlock_row():
- store its position */
-
- btr_pcur_store_position(prebuilt->clust_pcur, mtr);
- }
-
- err = DB_SUCCESS;
-err_exit:
- return(err);
-}
-
-/************************************************************************
-Restores cursor position after it has been stored. We have to take into
-account that the record cursor was positioned on may have been deleted.
-Then we may have to move the cursor one step up or down. */
-static
-ibool
-sel_restore_position_for_mysql(
-/*===========================*/
- /* out: TRUE if we may need to
- process the record the cursor is
- now positioned on (i.e. we should
- not go to the next record yet) */
- ibool* same_user_rec, /* out: TRUE if we were able to restore
- the cursor on a user record with the
- same ordering prefix in in the
- B-tree index */
- ulint latch_mode, /* in: latch mode wished in
- restoration */
- btr_pcur_t* pcur, /* in: cursor whose position
- has been stored */
- ibool moves_up, /* in: TRUE if the cursor moves up
- in the index */
- mtr_t* mtr) /* in: mtr; CAUTION: may commit
- mtr temporarily! */
-{
- ibool success;
- ulint relative_position;
-
- relative_position = pcur->rel_pos;
-
- success = btr_pcur_restore_position(latch_mode, pcur, mtr);
-
- *same_user_rec = success;
-
- if (relative_position == BTR_PCUR_ON) {
- if (success) {
- return(FALSE);
- }
-
- if (moves_up) {
- btr_pcur_move_to_next(pcur, mtr);
- }
-
- return(TRUE);
- }
-
- if (relative_position == BTR_PCUR_AFTER
- || relative_position == BTR_PCUR_AFTER_LAST_IN_TREE) {
-
- if (moves_up) {
- return(TRUE);
- }
-
- if (btr_pcur_is_on_user_rec(pcur, mtr)) {
- btr_pcur_move_to_prev(pcur, mtr);
- }
-
- return(TRUE);
- }
-
- ut_ad(relative_position == BTR_PCUR_BEFORE
- || relative_position == BTR_PCUR_BEFORE_FIRST_IN_TREE);
-
- if (moves_up && btr_pcur_is_on_user_rec(pcur, mtr)) {
- btr_pcur_move_to_next(pcur, mtr);
- }
-
- return(TRUE);
-}
-
-/************************************************************************
-Pops a cached row for MySQL from the fetch cache. */
-UNIV_INLINE
-void
-row_sel_pop_cached_row_for_mysql(
-/*=============================*/
- byte* buf, /* in/out: buffer where to copy the
- row */
- row_prebuilt_t* prebuilt) /* in: prebuilt struct */
-{
- ulint i;
- mysql_row_templ_t* templ;
- byte* cached_rec;
- ut_ad(prebuilt->n_fetch_cached > 0);
- ut_ad(prebuilt->mysql_prefix_len <= prebuilt->mysql_row_len);
-
- if (UNIV_UNLIKELY(prebuilt->keep_other_fields_on_keyread)) {
- /* Copy cache record field by field, don't touch fields that
- are not covered by current key */
- cached_rec = prebuilt->fetch_cache[
- prebuilt->fetch_cache_first];
-
- for (i = 0; i < prebuilt->n_template; i++) {
- templ = prebuilt->mysql_template + i;
- ut_memcpy(buf + templ->mysql_col_offset,
- cached_rec + templ->mysql_col_offset,
- templ->mysql_col_len);
- /* Copy NULL bit of the current field from cached_rec
- to buf */
- if (templ->mysql_null_bit_mask) {
- buf[templ->mysql_null_byte_offset]
- ^= (buf[templ->mysql_null_byte_offset]
- ^ cached_rec[templ->mysql_null_byte_offset])
- & (byte)templ->mysql_null_bit_mask;
- }
- }
- }
- else {
- ut_memcpy(buf,
- prebuilt->fetch_cache[prebuilt->fetch_cache_first],
- prebuilt->mysql_prefix_len);
- }
- prebuilt->n_fetch_cached--;
- prebuilt->fetch_cache_first++;
-
- if (prebuilt->n_fetch_cached == 0) {
- prebuilt->fetch_cache_first = 0;
- }
-}
-
-/************************************************************************
-Pushes a row for MySQL to the fetch cache. */
-UNIV_INLINE
-void
-row_sel_push_cache_row_for_mysql(
-/*=============================*/
- row_prebuilt_t* prebuilt, /* in: prebuilt struct */
- rec_t* rec, /* in: record to push */
- const ulint* offsets) /* in: rec_get_offsets() */
-{
- byte* buf;
- ulint i;
-
- ut_ad(prebuilt->n_fetch_cached < MYSQL_FETCH_CACHE_SIZE);
- ut_ad(rec_offs_validate(rec, NULL, offsets));
- ut_a(!prebuilt->templ_contains_blob);
-
- if (prebuilt->fetch_cache[0] == NULL) {
- /* Allocate memory for the fetch cache */
-
- for (i = 0; i < MYSQL_FETCH_CACHE_SIZE; i++) {
-
- /* A user has reported memory corruption in these
- buffers in Linux. Put magic numbers there to help
- to track a possible bug. */
-
- buf = mem_alloc(prebuilt->mysql_row_len + 8);
-
- prebuilt->fetch_cache[i] = buf + 4;
-
- mach_write_to_4(buf, ROW_PREBUILT_FETCH_MAGIC_N);
- mach_write_to_4(buf + 4 + prebuilt->mysql_row_len,
- ROW_PREBUILT_FETCH_MAGIC_N);
- }
- }
-
- ut_ad(prebuilt->fetch_cache_first == 0);
-
- if (UNIV_UNLIKELY(!row_sel_store_mysql_rec(
- prebuilt->fetch_cache[
- prebuilt->n_fetch_cached],
- prebuilt, rec, offsets))) {
- ut_error;
- }
-
- prebuilt->n_fetch_cached++;
-}
-
-/*************************************************************************
-Tries to do a shortcut to fetch a clustered index record with a unique key,
-using the hash index if possible (not always). We assume that the search
-mode is PAGE_CUR_GE, it is a consistent read, there is a read view in trx,
-btr search latch has been locked in S-mode. */
-static
-ulint
-row_sel_try_search_shortcut_for_mysql(
-/*==================================*/
- /* out: SEL_FOUND, SEL_EXHAUSTED, SEL_RETRY */
- rec_t** out_rec,/* out: record if found */
- row_prebuilt_t* prebuilt,/* in: prebuilt struct */
- ulint** offsets,/* in/out: for rec_get_offsets(*out_rec) */
- mem_heap_t** heap, /* in/out: heap for rec_get_offsets() */
- mtr_t* mtr) /* in: started mtr */
-{
- dict_index_t* index = prebuilt->index;
- dtuple_t* search_tuple = prebuilt->search_tuple;
- btr_pcur_t* pcur = prebuilt->pcur;
- trx_t* trx = prebuilt->trx;
- rec_t* rec;
-
- ut_ad(index->type & DICT_CLUSTERED);
- ut_ad(!prebuilt->templ_contains_blob);
-
- btr_pcur_open_with_no_init(index, search_tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, pcur,
-#ifndef UNIV_SEARCH_DEBUG
- RW_S_LATCH,
-#else
- 0,
-#endif
- mtr);
- rec = btr_pcur_get_rec(pcur);
-
- if (!page_rec_is_user_rec(rec)) {
-
- return(SEL_RETRY);
- }
-
- /* As the cursor is now placed on a user record after a search with
- the mode PAGE_CUR_GE, the up_match field in the cursor tells how many
- fields in the user record matched to the search tuple */
-
- if (btr_pcur_get_up_match(pcur) < dtuple_get_n_fields(search_tuple)) {
-
- return(SEL_EXHAUSTED);
- }
-
- /* This is a non-locking consistent read: if necessary, fetch
- a previous version of the record */
-
- *offsets = rec_get_offsets(rec, index, *offsets,
- ULINT_UNDEFINED, heap);
-
- if (!lock_clust_rec_cons_read_sees(rec, index,
- *offsets, trx->read_view)) {
-
- return(SEL_RETRY);
- }
-
- if (rec_get_deleted_flag(rec, dict_table_is_comp(index->table))) {
-
- return(SEL_EXHAUSTED);
- }
-
- *out_rec = rec;
-
- return(SEL_FOUND);
-}
-
-/************************************************************************
-Searches for rows in the database. This is used in the interface to
-MySQL. This function opens a cursor, and also implements fetch next
-and fetch prev. NOTE that if we do a search with a full key value
-from a unique index (ROW_SEL_EXACT), then we will not store the cursor
-position and fetch next or fetch prev must not be tried to the cursor! */
-
-ulint
-row_search_for_mysql(
-/*=================*/
- /* out: DB_SUCCESS,
- DB_RECORD_NOT_FOUND,
- DB_END_OF_INDEX, DB_DEADLOCK,
- DB_LOCK_TABLE_FULL, DB_CORRUPTION,
- or DB_TOO_BIG_RECORD */
- byte* buf, /* in/out: buffer for the fetched
- row in the MySQL format */
- ulint mode, /* in: search mode PAGE_CUR_L, ... */
- row_prebuilt_t* prebuilt, /* in: prebuilt struct for the
- table handle; this contains the info
- of search_tuple, index; if search
- tuple contains 0 fields then we
- position the cursor at the start or
- the end of the index, depending on
- 'mode' */
- ulint match_mode, /* in: 0 or ROW_SEL_EXACT or
- ROW_SEL_EXACT_PREFIX */
- ulint direction) /* in: 0 or ROW_SEL_NEXT or
- ROW_SEL_PREV; NOTE: if this is != 0,
- then prebuilt must have a pcur
- with stored position! In opening of a
- cursor 'direction' should be 0. */
-{
- dict_index_t* index = prebuilt->index;
- ibool comp = dict_table_is_comp(index->table);
- dtuple_t* search_tuple = prebuilt->search_tuple;
- btr_pcur_t* pcur = prebuilt->pcur;
- trx_t* trx = prebuilt->trx;
- dict_index_t* clust_index;
- que_thr_t* thr;
- rec_t* rec;
- rec_t* result_rec;
- rec_t* clust_rec;
- ulint err = DB_SUCCESS;
- ibool unique_search = FALSE;
- ibool unique_search_from_clust_index = FALSE;
- ibool mtr_has_extra_clust_latch = FALSE;
- ibool moves_up = FALSE;
- ibool set_also_gap_locks = TRUE;
- /* if the query is a plain locking SELECT, and the isolation level
- is <= TRX_ISO_READ_COMMITTED, then this is set to FALSE */
- ibool did_semi_consistent_read = FALSE;
- /* if the returned record was locked and we did a semi-consistent
- read (fetch the newest committed version), then this is set to
- TRUE */
-#ifdef UNIV_SEARCH_DEBUG
- ulint cnt = 0;
-#endif /* UNIV_SEARCH_DEBUG */
- ulint next_offs;
- ibool same_user_rec;
- mtr_t mtr;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
-
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- ut_ad(index && pcur && search_tuple);
- ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
-
- if (UNIV_UNLIKELY(prebuilt->table->ibd_file_missing)) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error:\n"
- "InnoDB: MySQL is trying to use a table handle"
- " but the .ibd file for\n"
- "InnoDB: table %s does not exist.\n"
- "InnoDB: Have you deleted the .ibd file"
- " from the database directory under\n"
- "InnoDB: the MySQL datadir, or have you used"
- " DISCARD TABLESPACE?\n"
- "InnoDB: Look from\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "innodb-troubleshooting.html\n"
- "InnoDB: how you can resolve the problem.\n",
- prebuilt->table->name);
-
- return(DB_ERROR);
- }
-
- if (UNIV_UNLIKELY(prebuilt->magic_n != ROW_PREBUILT_ALLOCATED)) {
- fprintf(stderr,
- "InnoDB: Error: trying to free a corrupt\n"
- "InnoDB: table handle. Magic n %lu, table name ",
- (ulong) prebuilt->magic_n);
- ut_print_name(stderr, trx, TRUE, prebuilt->table->name);
- putc('\n', stderr);
-
- mem_analyze_corruption(prebuilt);
-
- ut_error;
- }
-
-#if 0
- /* August 19, 2005 by Heikki: temporarily disable this error
- print until the cursor lock count is done correctly.
- See bugs #12263 and #12456!*/
-
- if (trx->n_mysql_tables_in_use == 0
- && UNIV_UNLIKELY(prebuilt->select_lock_type == LOCK_NONE)) {
- /* Note that if MySQL uses an InnoDB temp table that it
- created inside LOCK TABLES, then n_mysql_tables_in_use can
- be zero; in that case select_lock_type is set to LOCK_X in
- ::start_stmt. */
-
- fputs("InnoDB: Error: MySQL is trying to perform a SELECT\n"
- "InnoDB: but it has not locked"
- " any tables in ::external_lock()!\n",
- stderr);
- trx_print(stderr, trx, 600);
- fputc('\n', stderr);
- }
-#endif
-
-#if 0
- fprintf(stderr, "Match mode %lu\n search tuple ",
- (ulong) match_mode);
- dtuple_print(search_tuple);
- fprintf(stderr, "N tables locked %lu\n",
- (ulong) trx->mysql_n_tables_locked);
-#endif
- /*-------------------------------------------------------------*/
- /* PHASE 0: Release a possible s-latch we are holding on the
- adaptive hash index latch if there is someone waiting behind */
-
- if (UNIV_UNLIKELY(rw_lock_get_writer(&btr_search_latch) != RW_LOCK_NOT_LOCKED)
- && trx->has_search_latch) {
-
- /* There is an x-latch request on the adaptive hash index:
- release the s-latch to reduce starvation and wait for
- BTR_SEA_TIMEOUT rounds before trying to keep it again over
- calls from MySQL */
-
- rw_lock_s_unlock(&btr_search_latch);
- trx->has_search_latch = FALSE;
-
- trx->search_latch_timeout = BTR_SEA_TIMEOUT;
- }
-
- /* Reset the new record lock info if srv_locks_unsafe_for_binlog
- is set or session is using a READ COMMITED isolation level. Then
- we are able to remove the record locks set here on an individual
- row. */
- prebuilt->new_rec_locks = 0;
-
- /*-------------------------------------------------------------*/
- /* PHASE 1: Try to pop the row from the prefetch cache */
-
- if (UNIV_UNLIKELY(direction == 0)) {
- trx->op_info = "starting index read";
-
- prebuilt->n_rows_fetched = 0;
- prebuilt->n_fetch_cached = 0;
- prebuilt->fetch_cache_first = 0;
-
- if (prebuilt->sel_graph == NULL) {
- /* Build a dummy select query graph */
- row_prebuild_sel_graph(prebuilt);
- }
- } else {
- trx->op_info = "fetching rows";
-
- if (prebuilt->n_rows_fetched == 0) {
- prebuilt->fetch_direction = direction;
- }
-
- if (UNIV_UNLIKELY(direction != prebuilt->fetch_direction)) {
- if (UNIV_UNLIKELY(prebuilt->n_fetch_cached > 0)) {
- ut_error;
- /* TODO: scrollable cursor: restore cursor to
- the place of the latest returned row,
- or better: prevent caching for a scroll
- cursor! */
- }
-
- prebuilt->n_rows_fetched = 0;
- prebuilt->n_fetch_cached = 0;
- prebuilt->fetch_cache_first = 0;
-
- } else if (UNIV_LIKELY(prebuilt->n_fetch_cached > 0)) {
- row_sel_pop_cached_row_for_mysql(buf, prebuilt);
-
- prebuilt->n_rows_fetched++;
-
- srv_n_rows_read++;
- err = DB_SUCCESS;
- goto func_exit;
- }
-
- if (prebuilt->fetch_cache_first > 0
- && prebuilt->fetch_cache_first < MYSQL_FETCH_CACHE_SIZE) {
-
- /* The previous returned row was popped from the fetch
- cache, but the cache was not full at the time of the
- popping: no more rows can exist in the result set */
-
- err = DB_RECORD_NOT_FOUND;
- goto func_exit;
- }
-
- prebuilt->n_rows_fetched++;
-
- if (prebuilt->n_rows_fetched > 1000000000) {
- /* Prevent wrap-over */
- prebuilt->n_rows_fetched = 500000000;
- }
-
- mode = pcur->search_mode;
- }
-
- /* In a search where at most one record in the index may match, we
- can use a LOCK_REC_NOT_GAP type record lock when locking a
- non-delete-marked matching record.
-
- Note that in a unique secondary index there may be different
- delete-marked versions of a record where only the primary key
- values differ: thus in a secondary index we must use next-key
- locks when locking delete-marked records. */
-
- if (match_mode == ROW_SEL_EXACT
- && index->type & DICT_UNIQUE
- && dtuple_get_n_fields(search_tuple)
- == dict_index_get_n_unique(index)
- && (index->type & DICT_CLUSTERED
- || !dtuple_contains_null(search_tuple))) {
-
- /* Note above that a UNIQUE secondary index can contain many
- rows with the same key value if one of the columns is the SQL
- null. A clustered index under MySQL can never contain null
- columns because we demand that all the columns in primary key
- are non-null. */
-
- unique_search = TRUE;
-
- /* Even if the condition is unique, MySQL seems to try to
- retrieve also a second row if a primary key contains more than
- 1 column. Return immediately if this is not a HANDLER
- command. */
-
- if (UNIV_UNLIKELY(direction != 0
- && !prebuilt->used_in_HANDLER)) {
-
- err = DB_RECORD_NOT_FOUND;
- goto func_exit;
- }
- }
-
- mtr_start(&mtr);
-
- /*-------------------------------------------------------------*/
- /* PHASE 2: Try fast adaptive hash index search if possible */
-
- /* Next test if this is the special case where we can use the fast
- adaptive hash index to try the search. Since we must release the
- search system latch when we retrieve an externally stored field, we
- cannot use the adaptive hash index in a search in the case the row
- may be long and there may be externally stored fields */
-
- if (UNIV_UNLIKELY(direction == 0)
- && unique_search
- && index->type & DICT_CLUSTERED
- && !prebuilt->templ_contains_blob
- && !prebuilt->used_in_HANDLER
- && (prebuilt->mysql_row_len < UNIV_PAGE_SIZE / 8)) {
-
- mode = PAGE_CUR_GE;
-
- unique_search_from_clust_index = TRUE;
-
- if (trx->mysql_n_tables_locked == 0
- && prebuilt->select_lock_type == LOCK_NONE
- && trx->isolation_level > TRX_ISO_READ_UNCOMMITTED
- && trx->read_view) {
-
- /* This is a SELECT query done as a consistent read,
- and the read view has already been allocated:
- let us try a search shortcut through the hash
- index.
- NOTE that we must also test that
- mysql_n_tables_locked == 0, because this might
- also be INSERT INTO ... SELECT ... or
- CREATE TABLE ... SELECT ... . Our algorithm is
- NOT prepared to inserts interleaved with the SELECT,
- and if we try that, we can deadlock on the adaptive
- hash index semaphore! */
-
-#ifndef UNIV_SEARCH_DEBUG
- if (!trx->has_search_latch) {
- rw_lock_s_lock(&btr_search_latch);
- trx->has_search_latch = TRUE;
- }
-#endif
- switch (row_sel_try_search_shortcut_for_mysql(
- &rec, prebuilt, &offsets, &heap,
- &mtr)) {
- case SEL_FOUND:
-#ifdef UNIV_SEARCH_DEBUG
- ut_a(0 == cmp_dtuple_rec(search_tuple,
- rec, offsets));
-#endif
- if (!row_sel_store_mysql_rec(buf, prebuilt,
- rec, offsets)) {
- err = DB_TOO_BIG_RECORD;
-
- /* We let the main loop to do the
- error handling */
- goto shortcut_fails_too_big_rec;
- }
-
- mtr_commit(&mtr);
-
- /* ut_print_name(stderr, index->name);
- fputs(" shortcut\n", stderr); */
-
- srv_n_rows_read++;
-
- if (trx->search_latch_timeout > 0
- && trx->has_search_latch) {
-
- trx->search_latch_timeout--;
-
- rw_lock_s_unlock(&btr_search_latch);
- trx->has_search_latch = FALSE;
- }
-
- /* NOTE that we do NOT store the cursor
- position */
- err = DB_SUCCESS;
- goto func_exit;
-
- case SEL_EXHAUSTED:
- mtr_commit(&mtr);
-
- /* ut_print_name(stderr, index->name);
- fputs(" record not found 2\n", stderr); */
-
- if (trx->search_latch_timeout > 0
- && trx->has_search_latch) {
-
- trx->search_latch_timeout--;
-
- rw_lock_s_unlock(&btr_search_latch);
- trx->has_search_latch = FALSE;
- }
-
- /* NOTE that we do NOT store the cursor
- position */
-
- err = DB_RECORD_NOT_FOUND;
- goto func_exit;
- }
-shortcut_fails_too_big_rec:
- mtr_commit(&mtr);
- mtr_start(&mtr);
- }
- }
-
- /*-------------------------------------------------------------*/
- /* PHASE 3: Open or restore index cursor position */
-
- if (trx->has_search_latch) {
- rw_lock_s_unlock(&btr_search_latch);
- trx->has_search_latch = FALSE;
- }
-
- trx_start_if_not_started(trx);
-
- if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
- && prebuilt->select_lock_type != LOCK_NONE
- && trx->mysql_thd != NULL
- && thd_is_select(trx->mysql_thd)) {
- /* It is a plain locking SELECT and the isolation
- level is low: do not lock gaps */
-
- set_also_gap_locks = FALSE;
- }
-
- /* Note that if the search mode was GE or G, then the cursor
- naturally moves upward (in fetch next) in alphabetical order,
- otherwise downward */
-
- if (UNIV_UNLIKELY(direction == 0)) {
- if (mode == PAGE_CUR_GE || mode == PAGE_CUR_G) {
- moves_up = TRUE;
- }
- } else if (direction == ROW_SEL_NEXT) {
- moves_up = TRUE;
- }
-
- thr = que_fork_get_first_thr(prebuilt->sel_graph);
-
- que_thr_move_to_run_state_for_mysql(thr, trx);
-
- clust_index = dict_table_get_first_index(index->table);
-
- if (UNIV_LIKELY(direction != 0)) {
- ibool need_to_process = sel_restore_position_for_mysql(
- &same_user_rec, BTR_SEARCH_LEAF,
- pcur, moves_up, &mtr);
-
- if (UNIV_UNLIKELY(need_to_process)) {
- if (UNIV_UNLIKELY(prebuilt->row_read_type
- == ROW_READ_DID_SEMI_CONSISTENT)) {
- /* We did a semi-consistent read,
- but the record was removed in
- the meantime. */
- prebuilt->row_read_type
- = ROW_READ_TRY_SEMI_CONSISTENT;
- }
- } else if (UNIV_LIKELY(prebuilt->row_read_type
- != ROW_READ_DID_SEMI_CONSISTENT)) {
-
- /* The cursor was positioned on the record
- that we returned previously. If we need
- to repeat a semi-consistent read as a
- pessimistic locking read, the record
- cannot be skipped. */
-
- goto next_rec;
- }
-
- } else if (dtuple_get_n_fields(search_tuple) > 0) {
-
- btr_pcur_open_with_no_init(index, search_tuple, mode,
- BTR_SEARCH_LEAF,
- pcur, 0, &mtr);
-
- pcur->trx_if_known = trx;
-
- rec = btr_pcur_get_rec(pcur);
-
- if (!moves_up
- && !page_rec_is_supremum(rec)
- && set_also_gap_locks
- && !(srv_locks_unsafe_for_binlog
- || trx->isolation_level == TRX_ISO_READ_COMMITTED)
- && prebuilt->select_lock_type != LOCK_NONE) {
-
- /* Try to place a gap lock on the next index record
- to prevent phantoms in ORDER BY ... DESC queries */
-
- offsets = rec_get_offsets(page_rec_get_next(rec),
- index, offsets,
- ULINT_UNDEFINED, &heap);
- err = sel_set_rec_lock(page_rec_get_next(rec),
- index, offsets,
- prebuilt->select_lock_type,
- LOCK_GAP, thr);
-
- if (err != DB_SUCCESS) {
-
- goto lock_wait_or_error;
- }
- }
- } else {
- if (mode == PAGE_CUR_G) {
- btr_pcur_open_at_index_side(
- TRUE, index, BTR_SEARCH_LEAF, pcur, FALSE,
- &mtr);
- } else if (mode == PAGE_CUR_L) {
- btr_pcur_open_at_index_side(
- FALSE, index, BTR_SEARCH_LEAF, pcur, FALSE,
- &mtr);
- }
- }
-
- if (!prebuilt->sql_stat_start) {
- /* No need to set an intention lock or assign a read view */
-
- if (trx->read_view == NULL
- && prebuilt->select_lock_type == LOCK_NONE) {
-
- fputs("InnoDB: Error: MySQL is trying to"
- " perform a consistent read\n"
- "InnoDB: but the read view is not assigned!\n",
- stderr);
- trx_print(stderr, trx, 600);
- fputc('\n', stderr);
- ut_a(0);
- }
- } else if (prebuilt->select_lock_type == LOCK_NONE) {
- /* This is a consistent read */
- /* Assign a read view for the query */
-
- trx_assign_read_view(trx);
- prebuilt->sql_stat_start = FALSE;
- } else {
- ulint lock_mode;
- if (prebuilt->select_lock_type == LOCK_S) {
- lock_mode = LOCK_IS;
- } else {
- lock_mode = LOCK_IX;
- }
- err = lock_table(0, index->table, lock_mode, thr);
-
- if (err != DB_SUCCESS) {
-
- goto lock_wait_or_error;
- }
- prebuilt->sql_stat_start = FALSE;
- }
-
-rec_loop:
- /*-------------------------------------------------------------*/
- /* PHASE 4: Look for matching records in a loop */
-
- rec = btr_pcur_get_rec(pcur);
- ut_ad(!!page_rec_is_comp(rec) == comp);
-#ifdef UNIV_SEARCH_DEBUG
- /*
- fputs("Using ", stderr);
- dict_index_name_print(stderr, index);
- fprintf(stderr, " cnt %lu ; Page no %lu\n", cnt,
- buf_frame_get_page_no(buf_frame_align(rec)));
- rec_print(rec);
- */
-#endif /* UNIV_SEARCH_DEBUG */
-
- if (page_rec_is_infimum(rec)) {
-
- /* The infimum record on a page cannot be in the result set,
- and neither can a record lock be placed on it: we skip such
- a record. */
-
- goto next_rec;
- }
-
- if (page_rec_is_supremum(rec)) {
-
- if (set_also_gap_locks
- && !(srv_locks_unsafe_for_binlog
- || trx->isolation_level == TRX_ISO_READ_COMMITTED)
- && prebuilt->select_lock_type != LOCK_NONE) {
-
- /* Try to place a lock on the index record */
-
- /* If innodb_locks_unsafe_for_binlog option is used
- or this session is using a READ COMMITTED isolation
- level we do not lock gaps. Supremum record is really
- a gap and therefore we do not set locks there. */
-
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- err = sel_set_rec_lock(rec, index, offsets,
- prebuilt->select_lock_type,
- LOCK_ORDINARY, thr);
-
- if (err != DB_SUCCESS) {
-
- goto lock_wait_or_error;
- }
- }
- /* A page supremum record cannot be in the result set: skip
- it now that we have placed a possible lock on it */
-
- goto next_rec;
- }
-
- /*-------------------------------------------------------------*/
- /* Do sanity checks in case our cursor has bumped into page
- corruption */
-
- if (comp) {
- next_offs = rec_get_next_offs(rec, TRUE);
- if (UNIV_UNLIKELY(next_offs < PAGE_NEW_SUPREMUM)) {
-
- goto wrong_offs;
- }
- } else {
- next_offs = rec_get_next_offs(rec, FALSE);
- if (UNIV_UNLIKELY(next_offs < PAGE_OLD_SUPREMUM)) {
-
- goto wrong_offs;
- }
- }
-
- if (UNIV_UNLIKELY(next_offs >= UNIV_PAGE_SIZE - PAGE_DIR)) {
-
-wrong_offs:
- if (srv_force_recovery == 0 || moves_up == FALSE) {
- ut_print_timestamp(stderr);
- buf_page_print(buf_frame_align(rec));
- fprintf(stderr,
- "\nInnoDB: rec address %p, first"
- " buffer frame %p\n"
- "InnoDB: buffer pool high end %p,"
- " buf block fix count %lu\n",
- (void*) rec, (void*) buf_pool->frame_zero,
- (void*) buf_pool->high_end,
- (ulong)buf_block_align(rec)->buf_fix_count);
- fprintf(stderr,
- "InnoDB: Index corruption: rec offs %lu"
- " next offs %lu, page no %lu,\n"
- "InnoDB: ",
- (ulong) page_offset(rec),
- (ulong) next_offs,
- (ulong) buf_frame_get_page_no(rec));
- dict_index_name_print(stderr, trx, index);
- fputs(". Run CHECK TABLE. You may need to\n"
- "InnoDB: restore from a backup, or"
- " dump + drop + reimport the table.\n",
- stderr);
-
- err = DB_CORRUPTION;
-
- goto lock_wait_or_error;
- } else {
- /* The user may be dumping a corrupt table. Jump
- over the corruption to recover as much as possible. */
-
- fprintf(stderr,
- "InnoDB: Index corruption: rec offs %lu"
- " next offs %lu, page no %lu,\n"
- "InnoDB: ",
- (ulong) page_offset(rec),
- (ulong) next_offs,
- (ulong) buf_frame_get_page_no(rec));
- dict_index_name_print(stderr, trx, index);
- fputs(". We try to skip the rest of the page.\n",
- stderr);
-
- btr_pcur_move_to_last_on_page(pcur, &mtr);
-
- goto next_rec;
- }
- }
- /*-------------------------------------------------------------*/
-
- /* Calculate the 'offsets' associated with 'rec' */
-
- offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
-
- if (UNIV_UNLIKELY(srv_force_recovery > 0)) {
- if (!rec_validate(rec, offsets)
- || !btr_index_rec_validate(rec, index, FALSE)) {
- fprintf(stderr,
- "InnoDB: Index corruption: rec offs %lu"
- " next offs %lu, page no %lu,\n"
- "InnoDB: ",
- (ulong) page_offset(rec),
- (ulong) next_offs,
- (ulong) buf_frame_get_page_no(rec));
- dict_index_name_print(stderr, trx, index);
- fputs(". We try to skip the record.\n",
- stderr);
-
- goto next_rec;
- }
- }
-
- /* Note that we cannot trust the up_match value in the cursor at this
- place because we can arrive here after moving the cursor! Thus
- we have to recompare rec and search_tuple to determine if they
- match enough. */
-
- if (match_mode == ROW_SEL_EXACT) {
- /* Test if the index record matches completely to search_tuple
- in prebuilt: if not, then we return with DB_RECORD_NOT_FOUND */
-
- /* fputs("Comparing rec and search tuple\n", stderr); */
-
- if (0 != cmp_dtuple_rec(search_tuple, rec, offsets)) {
-
- if (set_also_gap_locks
- && !(srv_locks_unsafe_for_binlog
- || trx->isolation_level
- == TRX_ISO_READ_COMMITTED)
- && prebuilt->select_lock_type != LOCK_NONE) {
-
- /* Try to place a gap lock on the index
- record only if innodb_locks_unsafe_for_binlog
- option is not set or this session is not
- using a READ COMMITTED isolation level. */
-
- err = sel_set_rec_lock(
- rec, index, offsets,
- prebuilt->select_lock_type, LOCK_GAP,
- thr);
-
- if (err != DB_SUCCESS) {
-
- goto lock_wait_or_error;
- }
- }
-
- btr_pcur_store_position(pcur, &mtr);
-
- err = DB_RECORD_NOT_FOUND;
- /* ut_print_name(stderr, index->name);
- fputs(" record not found 3\n", stderr); */
-
- goto normal_return;
- }
-
- } else if (match_mode == ROW_SEL_EXACT_PREFIX) {
-
- if (!cmp_dtuple_is_prefix_of_rec(search_tuple, rec, offsets)) {
-
- if (set_also_gap_locks
- && !(srv_locks_unsafe_for_binlog
- || trx->isolation_level
- == TRX_ISO_READ_COMMITTED)
- && prebuilt->select_lock_type != LOCK_NONE) {
-
- /* Try to place a gap lock on the index
- record only if innodb_locks_unsafe_for_binlog
- option is not set or this session is not
- using a READ COMMITTED isolation level. */
-
- err = sel_set_rec_lock(
- rec, index, offsets,
- prebuilt->select_lock_type, LOCK_GAP,
- thr);
-
- if (err != DB_SUCCESS) {
-
- goto lock_wait_or_error;
- }
- }
-
- btr_pcur_store_position(pcur, &mtr);
-
- err = DB_RECORD_NOT_FOUND;
- /* ut_print_name(stderr, index->name);
- fputs(" record not found 4\n", stderr); */
-
- goto normal_return;
- }
- }
-
- /* We are ready to look at a possible new index entry in the result
- set: the cursor is now placed on a user record */
-
- if (prebuilt->select_lock_type != LOCK_NONE) {
- /* Try to place a lock on the index record; note that delete
- marked records are a special case in a unique search. If there
- is a non-delete marked record, then it is enough to lock its
- existence with LOCK_REC_NOT_GAP. */
-
- /* If innodb_locks_unsafe_for_binlog option is used
- or this session is using a READ COMMITED isolation
- level we lock only the record, i.e., next-key locking is
- not used. */
-
- ulint lock_type;
-
- if (!set_also_gap_locks
- || srv_locks_unsafe_for_binlog
- || trx->isolation_level == TRX_ISO_READ_COMMITTED
- || (unique_search
- && !UNIV_UNLIKELY(rec_get_deleted_flag(rec, comp)))) {
-
- goto no_gap_lock;
- } else {
- lock_type = LOCK_ORDINARY;
- }
-
- /* If we are doing a 'greater or equal than a primary key
- value' search from a clustered index, and we find a record
- that has that exact primary key value, then there is no need
- to lock the gap before the record, because no insert in the
- gap can be in our search range. That is, no phantom row can
- appear that way.
-
- An example: if col1 is the primary key, the search is WHERE
- col1 >= 100, and we find a record where col1 = 100, then no
- need to lock the gap before that record. */
-
- if (index == clust_index
- && mode == PAGE_CUR_GE
- && direction == 0
- && dtuple_get_n_fields_cmp(search_tuple)
- == dict_index_get_n_unique(index)
- && 0 == cmp_dtuple_rec(search_tuple, rec, offsets)) {
-no_gap_lock:
- lock_type = LOCK_REC_NOT_GAP;
- }
-
- err = sel_set_rec_lock(rec, index, offsets,
- prebuilt->select_lock_type,
- lock_type, thr);
-
- switch (err) {
- rec_t* old_vers;
- case DB_SUCCESS:
- if (srv_locks_unsafe_for_binlog
- || trx->isolation_level == TRX_ISO_READ_COMMITTED) {
- /* Note that a record of
- prebuilt->index was locked. */
- prebuilt->new_rec_locks = 1;
- }
- break;
- case DB_LOCK_WAIT:
- if (UNIV_LIKELY(prebuilt->row_read_type
- != ROW_READ_TRY_SEMI_CONSISTENT)
- || index != clust_index) {
-
- goto lock_wait_or_error;
- }
-
- /* The following call returns 'offsets'
- associated with 'old_vers' */
- err = row_sel_build_committed_vers_for_mysql(
- clust_index, prebuilt, rec,
- &offsets, &heap, &old_vers, &mtr);
-
- if (err != DB_SUCCESS) {
-
- goto lock_wait_or_error;
- }
-
- mutex_enter(&kernel_mutex);
- if (trx->was_chosen_as_deadlock_victim) {
- mutex_exit(&kernel_mutex);
- err = DB_DEADLOCK;
-
- goto lock_wait_or_error;
- }
- if (UNIV_LIKELY(trx->wait_lock != NULL)) {
- lock_cancel_waiting_and_release(
- trx->wait_lock);
- prebuilt->new_rec_locks = 0;
- } else {
- mutex_exit(&kernel_mutex);
-
- /* The lock was granted while we were
- searching for the last committed version.
- Do a normal locking read. */
-
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED,
- &heap);
- err = DB_SUCCESS;
- /* Note that a record of
- prebuilt->index was locked. */
- prebuilt->new_rec_locks = 1;
- break;
- }
- mutex_exit(&kernel_mutex);
-
- if (old_vers == NULL) {
- /* The row was not yet committed */
-
- goto next_rec;
- }
-
- did_semi_consistent_read = TRUE;
- rec = old_vers;
- break;
- default:
-
- goto lock_wait_or_error;
- }
- } else {
- /* This is a non-locking consistent read: if necessary, fetch
- a previous version of the record */
-
- if (trx->isolation_level == TRX_ISO_READ_UNCOMMITTED) {
-
- /* Do nothing: we let a non-locking SELECT read the
- latest version of the record */
-
- } else if (index == clust_index) {
-
- /* Fetch a previous version of the row if the current
- one is not visible in the snapshot; if we have a very
- high force recovery level set, we try to avoid crashes
- by skipping this lookup */
-
- if (UNIV_LIKELY(srv_force_recovery < 5)
- && !lock_clust_rec_cons_read_sees(
- rec, index, offsets, trx->read_view)) {
-
- rec_t* old_vers;
- /* The following call returns 'offsets'
- associated with 'old_vers' */
- err = row_sel_build_prev_vers_for_mysql(
- trx->read_view, clust_index,
- prebuilt, rec, &offsets, &heap,
- &old_vers, &mtr);
-
- if (err != DB_SUCCESS) {
-
- goto lock_wait_or_error;
- }
-
- if (old_vers == NULL) {
- /* The row did not exist yet in
- the read view */
-
- goto next_rec;
- }
-
- rec = old_vers;
- }
- } else if (!lock_sec_rec_cons_read_sees(rec, index,
- trx->read_view)) {
- /* We are looking into a non-clustered index,
- and to get the right version of the record we
- have to look also into the clustered index: this
- is necessary, because we can only get the undo
- information via the clustered index record. */
-
- ut_ad(index != clust_index);
-
- goto requires_clust_rec;
- }
- }
-
- /* NOTE that at this point rec can be an old version of a clustered
- index record built for a consistent read. We cannot assume after this
- point that rec is on a buffer pool page. Functions like
- page_rec_is_comp() cannot be used! */
-
- if (UNIV_UNLIKELY(rec_get_deleted_flag(rec, comp))) {
-
- /* The record is delete-marked: we can skip it */
-
- if ((srv_locks_unsafe_for_binlog
- || trx->isolation_level == TRX_ISO_READ_COMMITTED)
- && prebuilt->select_lock_type != LOCK_NONE
- && !did_semi_consistent_read) {
-
- /* No need to keep a lock on a delete-marked record
- if we do not want to use next-key locking. */
-
- row_unlock_for_mysql(prebuilt, TRUE);
- }
-
- /* This is an optimization to skip setting the next key lock
- on the record that follows this delete-marked record. This
- optimization works because of the unique search criteria
- which precludes the presence of a range lock between this
- delete marked record and the record following it.
-
- For now this is applicable only to clustered indexes while
- doing a unique search. There is scope for further optimization
- applicable to unique secondary indexes. Current behaviour is
- to widen the scope of a lock on an already delete marked record
- if the same record is deleted twice by the same transaction */
- if (index == clust_index && unique_search) {
- err = DB_RECORD_NOT_FOUND;
-
- goto normal_return;
- }
-
- goto next_rec;
- }
-
- /* Get the clustered index record if needed, if we did not do the
- search using the clustered index. */
-
- if (index != clust_index && prebuilt->need_to_access_clustered) {
-
-requires_clust_rec:
- /* We use a 'goto' to the preceding label if a consistent
- read of a secondary index record requires us to look up old
- versions of the associated clustered index record. */
-
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- /* It was a non-clustered index and we must fetch also the
- clustered index record */
-
- mtr_has_extra_clust_latch = TRUE;
-
- /* The following call returns 'offsets' associated with
- 'clust_rec'. Note that 'clust_rec' can be an old version
- built for a consistent read. */
-
- err = row_sel_get_clust_rec_for_mysql(prebuilt, index, rec,
- thr, &clust_rec,
- &offsets, &heap, &mtr);
- if (err != DB_SUCCESS) {
-
- goto lock_wait_or_error;
- }
-
- if (clust_rec == NULL) {
- /* The record did not exist in the read view */
- ut_ad(prebuilt->select_lock_type == LOCK_NONE);
-
- goto next_rec;
- }
-
- if ((srv_locks_unsafe_for_binlog
- || trx->isolation_level == TRX_ISO_READ_COMMITTED)
- && prebuilt->select_lock_type != LOCK_NONE) {
- /* Note that both the secondary index record
- and the clustered index record were locked. */
- ut_ad(prebuilt->new_rec_locks == 1);
- prebuilt->new_rec_locks = 2;
- }
-
- if (UNIV_UNLIKELY(rec_get_deleted_flag(clust_rec, comp))) {
-
- /* The record is delete marked: we can skip it */
-
- if ((srv_locks_unsafe_for_binlog
- || trx->isolation_level == TRX_ISO_READ_COMMITTED)
- && prebuilt->select_lock_type != LOCK_NONE) {
-
- /* No need to keep a lock on a delete-marked
- record if we do not want to use next-key
- locking. */
-
- row_unlock_for_mysql(prebuilt, TRUE);
- }
-
- goto next_rec;
- }
-
- if (prebuilt->need_to_access_clustered) {
-
- result_rec = clust_rec;
-
- ut_ad(rec_offs_validate(result_rec, clust_index,
- offsets));
- } else {
- /* We used 'offsets' for the clust rec, recalculate
- them for 'rec' */
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- result_rec = rec;
- }
- } else {
- result_rec = rec;
- }
-
- /* We found a qualifying record 'result_rec'. At this point,
- 'offsets' are associated with 'result_rec'. */
-
- ut_ad(rec_offs_validate(result_rec,
- result_rec != rec ? clust_index : index,
- offsets));
-
- if ((match_mode == ROW_SEL_EXACT
- || prebuilt->n_rows_fetched >= MYSQL_FETCH_CACHE_THRESHOLD)
- && prebuilt->select_lock_type == LOCK_NONE
- && !prebuilt->templ_contains_blob
- && !prebuilt->clust_index_was_generated
- && !prebuilt->used_in_HANDLER
- && prebuilt->template_type
- != ROW_MYSQL_DUMMY_TEMPLATE) {
-
- /* Inside an update, for example, we do not cache rows,
- since we may use the cursor position to do the actual
- update, that is why we require ...lock_type == LOCK_NONE.
- Since we keep space in prebuilt only for the BLOBs of
- a single row, we cannot cache rows in the case there
- are BLOBs in the fields to be fetched. In HANDLER we do
- not cache rows because there the cursor is a scrollable
- cursor. */
-
- row_sel_push_cache_row_for_mysql(prebuilt, result_rec,
- offsets);
- if (prebuilt->n_fetch_cached == MYSQL_FETCH_CACHE_SIZE) {
-
- goto got_row;
- }
-
- goto next_rec;
- } else {
- if (prebuilt->template_type == ROW_MYSQL_DUMMY_TEMPLATE) {
- memcpy(buf + 4, result_rec
- - rec_offs_extra_size(offsets),
- rec_offs_size(offsets));
- mach_write_to_4(buf,
- rec_offs_extra_size(offsets) + 4);
- } else {
- if (!row_sel_store_mysql_rec(buf, prebuilt,
- result_rec, offsets)) {
- err = DB_TOO_BIG_RECORD;
-
- goto lock_wait_or_error;
- }
- }
-
- if (prebuilt->clust_index_was_generated) {
- if (result_rec != rec) {
- offsets = rec_get_offsets(
- rec, index, offsets, ULINT_UNDEFINED,
- &heap);
- }
- row_sel_store_row_id_to_prebuilt(prebuilt, rec,
- index, offsets);
- }
- }
-
- /* From this point on, 'offsets' are invalid. */
-
-got_row:
- /* We have an optimization to save CPU time: if this is a consistent
- read on a unique condition on the clustered index, then we do not
- store the pcur position, because any fetch next or prev will anyway
- return 'end of file'. Exceptions are locking reads and the MySQL
- HANDLER command where the user can move the cursor with PREV or NEXT
- even after a unique search. */
-
- if (!unique_search_from_clust_index
- || prebuilt->select_lock_type != LOCK_NONE
- || prebuilt->used_in_HANDLER) {
-
- /* Inside an update always store the cursor position */
-
- btr_pcur_store_position(pcur, &mtr);
- }
-
- err = DB_SUCCESS;
-
- goto normal_return;
-
-next_rec:
- /* Reset the old and new "did semi-consistent read" flags. */
- if (UNIV_UNLIKELY(prebuilt->row_read_type
- == ROW_READ_DID_SEMI_CONSISTENT)) {
- prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
- }
- did_semi_consistent_read = FALSE;
- prebuilt->new_rec_locks = 0;
-
- /*-------------------------------------------------------------*/
- /* PHASE 5: Move the cursor to the next index record */
-
- if (UNIV_UNLIKELY(mtr_has_extra_clust_latch)) {
- /* We must commit mtr if we are moving to the next
- non-clustered index record, because we could break the
- latching order if we would access a different clustered
- index page right away without releasing the previous. */
-
- btr_pcur_store_position(pcur, &mtr);
-
- mtr_commit(&mtr);
- mtr_has_extra_clust_latch = FALSE;
-
- mtr_start(&mtr);
- if (sel_restore_position_for_mysql(&same_user_rec,
- BTR_SEARCH_LEAF,
- pcur, moves_up, &mtr)) {
-#ifdef UNIV_SEARCH_DEBUG
- cnt++;
-#endif /* UNIV_SEARCH_DEBUG */
-
- goto rec_loop;
- }
- }
-
- if (moves_up) {
- if (UNIV_UNLIKELY(!btr_pcur_move_to_next(pcur, &mtr))) {
-not_moved:
- btr_pcur_store_position(pcur, &mtr);
-
- if (match_mode != 0) {
- err = DB_RECORD_NOT_FOUND;
- } else {
- err = DB_END_OF_INDEX;
- }
-
- goto normal_return;
- }
- } else {
- if (UNIV_UNLIKELY(!btr_pcur_move_to_prev(pcur, &mtr))) {
- goto not_moved;
- }
- }
-
-#ifdef UNIV_SEARCH_DEBUG
- cnt++;
-#endif /* UNIV_SEARCH_DEBUG */
-
- goto rec_loop;
-
-lock_wait_or_error:
- /* Reset the old and new "did semi-consistent read" flags. */
- if (UNIV_UNLIKELY(prebuilt->row_read_type
- == ROW_READ_DID_SEMI_CONSISTENT)) {
- prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
- }
- did_semi_consistent_read = FALSE;
-
- /*-------------------------------------------------------------*/
-
- btr_pcur_store_position(pcur, &mtr);
-
- mtr_commit(&mtr);
- mtr_has_extra_clust_latch = FALSE;
-
- trx->error_state = err;
-
- /* The following is a patch for MySQL */
-
- que_thr_stop_for_mysql(thr);
-
- thr->lock_state = QUE_THR_LOCK_ROW;
-
- if (row_mysql_handle_errors(&err, trx, thr, NULL)) {
- /* It was a lock wait, and it ended */
-
- thr->lock_state = QUE_THR_LOCK_NOLOCK;
- mtr_start(&mtr);
-
- sel_restore_position_for_mysql(&same_user_rec,
- BTR_SEARCH_LEAF, pcur,
- moves_up, &mtr);
-
- if ((srv_locks_unsafe_for_binlog
- || trx->isolation_level == TRX_ISO_READ_COMMITTED)
- && !same_user_rec) {
-
- /* Since we were not able to restore the cursor
- on the same user record, we cannot use
- row_unlock_for_mysql() to unlock any records, and
- we must thus reset the new rec lock info. Since
- in lock0lock.c we have blocked the inheriting of gap
- X-locks, we actually do not have any new record locks
- set in this case.
-
- Note that if we were able to restore on the 'same'
- user record, it is still possible that we were actually
- waiting on a delete-marked record, and meanwhile
- it was removed by purge and inserted again by some
- other user. But that is no problem, because in
- rec_loop we will again try to set a lock, and
- new_rec_lock_info in trx will be right at the end. */
-
- prebuilt->new_rec_locks = 0;
- }
-
- mode = pcur->search_mode;
-
- goto rec_loop;
- }
-
- thr->lock_state = QUE_THR_LOCK_NOLOCK;
-
-#ifdef UNIV_SEARCH_DEBUG
- /* fputs("Using ", stderr);
- dict_index_name_print(stderr, index);
- fprintf(stderr, " cnt %lu ret value %lu err\n", cnt, err); */
-#endif /* UNIV_SEARCH_DEBUG */
- goto func_exit;
-
-normal_return:
- /*-------------------------------------------------------------*/
- que_thr_stop_for_mysql_no_error(thr, trx);
-
- mtr_commit(&mtr);
-
- if (prebuilt->n_fetch_cached > 0) {
- row_sel_pop_cached_row_for_mysql(buf, prebuilt);
-
- err = DB_SUCCESS;
- }
-
-#ifdef UNIV_SEARCH_DEBUG
- /* fputs("Using ", stderr);
- dict_index_name_print(stderr, index);
- fprintf(stderr, " cnt %lu ret value %lu err\n", cnt, err); */
-#endif /* UNIV_SEARCH_DEBUG */
- if (err == DB_SUCCESS) {
- srv_n_rows_read++;
- }
-
-func_exit:
- trx->op_info = "";
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- /* Set or reset the "did semi-consistent read" flag on return.
- The flag did_semi_consistent_read is set if and only if
- the record being returned was fetched with a semi-consistent read. */
- ut_ad(prebuilt->row_read_type != ROW_READ_WITH_LOCKS
- || !did_semi_consistent_read);
-
- if (UNIV_UNLIKELY(prebuilt->row_read_type != ROW_READ_WITH_LOCKS)) {
- if (UNIV_UNLIKELY(did_semi_consistent_read)) {
- prebuilt->row_read_type = ROW_READ_DID_SEMI_CONSISTENT;
- } else {
- prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
- }
- }
- return(err);
-}
-
-/***********************************************************************
-Checks if MySQL at the moment is allowed for this table to retrieve a
-consistent read result, or store it to the query cache. */
-
-ibool
-row_search_check_if_query_cache_permitted(
-/*======================================*/
- /* out: TRUE if storing or retrieving
- from the query cache is permitted */
- trx_t* trx, /* in: transaction object */
- const char* norm_name) /* in: concatenation of database name,
- '/' char, table name */
-{
- dict_table_t* table;
- ibool ret = FALSE;
-
- table = dict_table_get(norm_name, FALSE);
-
- if (table == NULL) {
-
- return(FALSE);
- }
-
- mutex_enter(&kernel_mutex);
-
- /* Start the transaction if it is not started yet */
-
- trx_start_if_not_started_low(trx);
-
- /* If there are locks on the table or some trx has invalidated the
- cache up to our trx id, then ret = FALSE.
- We do not check what type locks there are on the table, though only
- IX type locks actually would require ret = FALSE. */
-
- if (UT_LIST_GET_LEN(table->locks) == 0
- && ut_dulint_cmp(trx->id,
- table->query_cache_inv_trx_id) >= 0) {
-
- ret = TRUE;
-
- /* If the isolation level is high, assign a read view for the
- transaction if it does not yet have one */
-
- if (trx->isolation_level >= TRX_ISO_REPEATABLE_READ
- && !trx->read_view) {
-
- trx->read_view = read_view_open_now(
- trx->id, trx->global_read_view_heap);
- trx->global_read_view = trx->read_view;
- }
- }
-
- mutex_exit(&kernel_mutex);
-
- return(ret);
-}
-
-/***********************************************************************
-Read the AUTOINC column from the current row. If the value is less than
-0 and the type is not unsigned then we reset the value to 0. */
-static
-ib_ulonglong
-row_search_autoinc_read_column(
-/*===========================*/
- /* out: value read from the column */
- dict_index_t* index, /* in: index to read from */
- const rec_t* rec, /* in: current rec */
- ulint col_no, /* in: column number */
- ibool unsigned_type) /* in: signed or unsigned flag */
-{
- ulint len;
- const byte* data;
- ib_ulonglong value;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
-
- *offsets_ = sizeof offsets_ / sizeof *offsets_;
-
- /* TODO: We have to cast away the const of rec for now. This needs
- to be fixed later.*/
- offsets = rec_get_offsets(
- (rec_t*) rec, index, offsets, ULINT_UNDEFINED, &heap);
-
- /* TODO: We have to cast away the const of rec for now. This needs
- to be fixed later.*/
- data = rec_get_nth_field((rec_t*)rec, offsets, col_no, &len);
-
- ut_a(len != UNIV_SQL_NULL);
- ut_a(len <= sizeof value);
-
- value = mach_read_int_type(data, len, unsigned_type);
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- /* We assume that the autoinc counter can't be negative. */
- if (!unsigned_type && (ib_longlong) value < 0) {
- value = 0;
- }
-
- return(value);
-}
-
-/***********************************************************************
-Get the last row. */
-static
-const rec_t*
-row_search_autoinc_get_rec(
-/*=======================*/
- /* out: current rec or NULL */
- btr_pcur_t* pcur, /* in: the current cursor */
- mtr_t* mtr) /* in: mini transaction */
-{
- do {
- const rec_t* rec = btr_pcur_get_rec(pcur);
-
- if (page_rec_is_user_rec(rec)) {
- return(rec);
- }
- } while (btr_pcur_move_to_prev(pcur, mtr));
-
- return(NULL);
-}
-
-/***********************************************************************
-Read the max AUTOINC value from an index. */
-
-ulint
-row_search_max_autoinc(
-/*===================*/
- /* out: DB_SUCCESS if all OK else
- error code, DB_RECORD_NOT_FOUND if
- column name can't be found in index */
- dict_index_t* index, /* in: index to search */
- const char* col_name, /* in: name of autoinc column */
- ib_ulonglong* value) /* out: AUTOINC value read */
-{
- ulint i;
- ulint n_cols;
- dict_field_t* dfield = NULL;
- ulint error = DB_SUCCESS;
-
- n_cols = dict_index_get_n_ordering_defined_by_user(index);
-
- /* Search the index for the AUTOINC column name */
- for (i = 0; i < n_cols; ++i) {
- dfield = dict_index_get_nth_field(index, i);
-
- if (strcmp(col_name, dfield->name) == 0) {
- break;
- }
- }
-
- *value = 0;
-
- /* Must find the AUTOINC column name */
- if (i < n_cols && dfield) {
- mtr_t mtr;
- btr_pcur_t pcur;
-
- mtr_start(&mtr);
-
- /* Open at the high/right end (FALSE), and INIT
- cursor (TRUE) */
- btr_pcur_open_at_index_side(
- FALSE, index, BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
-
- if (page_get_n_recs(btr_pcur_get_page(&pcur)) > 0) {
- const rec_t* rec;
-
- rec = row_search_autoinc_get_rec(&pcur, &mtr);
-
- if (rec != NULL) {
- ibool unsigned_type = (
- dfield->col->prtype & DATA_UNSIGNED);
-
- *value = row_search_autoinc_read_column(
- index, rec, i, unsigned_type);
- }
- }
-
- btr_pcur_close(&pcur);
-
- mtr_commit(&mtr);
- } else {
- error = DB_RECORD_NOT_FOUND;
- }
-
- return(error);
-}
diff --git a/storage/innobase/row/row0uins.c b/storage/innobase/row/row0uins.c
deleted file mode 100644
index ce9ab792204..00000000000
--- a/storage/innobase/row/row0uins.c
+++ /dev/null
@@ -1,308 +0,0 @@
-/******************************************************
-Fresh insert undo
-
-(c) 1996 Innobase Oy
-
-Created 2/25/1997 Heikki Tuuri
-*******************************************************/
-
-#include "row0uins.h"
-
-#ifdef UNIV_NONINL
-#include "row0uins.ic"
-#endif
-
-#include "dict0dict.h"
-#include "dict0boot.h"
-#include "dict0crea.h"
-#include "trx0undo.h"
-#include "trx0roll.h"
-#include "btr0btr.h"
-#include "mach0data.h"
-#include "row0undo.h"
-#include "row0vers.h"
-#include "trx0trx.h"
-#include "trx0rec.h"
-#include "row0row.h"
-#include "row0upd.h"
-#include "que0que.h"
-#include "ibuf0ibuf.h"
-#include "log0log.h"
-
-/*******************************************************************
-Removes a clustered index record. The pcur in node was positioned on the
-record, now it is detached. */
-static
-ulint
-row_undo_ins_remove_clust_rec(
-/*==========================*/
- /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
- undo_node_t* node) /* in: undo node */
-{
- btr_cur_t* btr_cur;
- ibool success;
- ulint err;
- ulint n_tries = 0;
- mtr_t mtr;
-
- mtr_start(&mtr);
-
- success = btr_pcur_restore_position(BTR_MODIFY_LEAF, &(node->pcur),
- &mtr);
- ut_a(success);
-
- if (ut_dulint_cmp(node->table->id, DICT_INDEXES_ID) == 0) {
-
- /* Drop the index tree associated with the row in
- SYS_INDEXES table: */
-
- dict_drop_index_tree(btr_pcur_get_rec(&(node->pcur)), &mtr);
-
- mtr_commit(&mtr);
-
- mtr_start(&mtr);
-
- success = btr_pcur_restore_position(BTR_MODIFY_LEAF,
- &(node->pcur), &mtr);
- ut_a(success);
- }
-
- btr_cur = btr_pcur_get_btr_cur(&(node->pcur));
-
- success = btr_cur_optimistic_delete(btr_cur, &mtr);
-
- btr_pcur_commit_specify_mtr(&(node->pcur), &mtr);
-
- if (success) {
- trx_undo_rec_release(node->trx, node->undo_no);
-
- return(DB_SUCCESS);
- }
-retry:
- /* If did not succeed, try pessimistic descent to tree */
- mtr_start(&mtr);
-
- success = btr_pcur_restore_position(BTR_MODIFY_TREE,
- &(node->pcur), &mtr);
- ut_a(success);
-
- btr_cur_pessimistic_delete(&err, FALSE, btr_cur, TRUE, &mtr);
-
- /* The delete operation may fail if we have little
- file space left: TODO: easiest to crash the database
- and restart with more file space */
-
- if (err == DB_OUT_OF_FILE_SPACE
- && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
-
- btr_pcur_commit_specify_mtr(&(node->pcur), &mtr);
-
- n_tries++;
-
- os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
-
- goto retry;
- }
-
- btr_pcur_commit_specify_mtr(&(node->pcur), &mtr);
-
- trx_undo_rec_release(node->trx, node->undo_no);
-
- return(err);
-}
-
-/*******************************************************************
-Removes a secondary index entry if found. */
-static
-ulint
-row_undo_ins_remove_sec_low(
-/*========================*/
- /* out: DB_SUCCESS, DB_FAIL, or
- DB_OUT_OF_FILE_SPACE */
- ulint mode, /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
- depending on whether we wish optimistic or
- pessimistic descent down the index tree */
- dict_index_t* index, /* in: index */
- dtuple_t* entry) /* in: index entry to remove */
-{
- btr_pcur_t pcur;
- btr_cur_t* btr_cur;
- ibool found;
- ibool success;
- ulint err;
- mtr_t mtr;
-
- log_free_check();
- mtr_start(&mtr);
-
- found = row_search_index_entry(index, entry, mode, &pcur, &mtr);
-
- btr_cur = btr_pcur_get_btr_cur(&pcur);
-
- if (!found) {
- /* Not found */
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- return(DB_SUCCESS);
- }
-
- if (mode == BTR_MODIFY_LEAF) {
- success = btr_cur_optimistic_delete(btr_cur, &mtr);
-
- if (success) {
- err = DB_SUCCESS;
- } else {
- err = DB_FAIL;
- }
- } else {
- ut_ad(mode == BTR_MODIFY_TREE);
-
- btr_cur_pessimistic_delete(&err, FALSE, btr_cur, TRUE, &mtr);
- }
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- return(err);
-}
-
-/*******************************************************************
-Removes a secondary index entry from the index if found. Tries first
-optimistic, then pessimistic descent down the tree. */
-static
-ulint
-row_undo_ins_remove_sec(
-/*====================*/
- /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
- dict_index_t* index, /* in: index */
- dtuple_t* entry) /* in: index entry to insert */
-{
- ulint err;
- ulint n_tries = 0;
-
- /* Try first optimistic descent to the B-tree */
-
- err = row_undo_ins_remove_sec_low(BTR_MODIFY_LEAF, index, entry);
-
- if (err == DB_SUCCESS) {
-
- return(err);
- }
-
- /* Try then pessimistic descent to the B-tree */
-retry:
- err = row_undo_ins_remove_sec_low(BTR_MODIFY_TREE, index, entry);
-
- /* The delete operation may fail if we have little
- file space left: TODO: easiest to crash the database
- and restart with more file space */
-
- if (err != DB_SUCCESS && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
-
- n_tries++;
-
- os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
-
- goto retry;
- }
-
- return(err);
-}
-
-/***************************************************************
-Parses the row reference and other info in a fresh insert undo record. */
-static
-void
-row_undo_ins_parse_undo_rec(
-/*========================*/
- undo_node_t* node) /* in: row undo node */
-{
- dict_index_t* clust_index;
- byte* ptr;
- dulint undo_no;
- dulint table_id;
- ulint type;
- ulint dummy;
- ibool dummy_extern;
-
- ut_ad(node);
-
- ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &dummy,
- &dummy_extern, &undo_no, &table_id);
- ut_ad(type == TRX_UNDO_INSERT_REC);
- node->rec_type = type;
-
- node->table = dict_table_get_on_id(table_id, node->trx);
-
- if (node->table == NULL) {
-
- return;
- }
-
- if (node->table->ibd_file_missing) {
- /* We skip undo operations to missing .ibd files */
- node->table = NULL;
-
- return;
- }
-
- clust_index = dict_table_get_first_index(node->table);
-
- ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
- node->heap);
-}
-
-/***************************************************************
-Undoes a fresh insert of a row to a table. A fresh insert means that
-the same clustered index unique key did not have any record, even delete
-marked, at the time of the insert. */
-
-ulint
-row_undo_ins(
-/*=========*/
- /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
- undo_node_t* node) /* in: row undo node */
-{
- dtuple_t* entry;
- ibool found;
- ulint err;
-
- ut_ad(node);
- ut_ad(node->state == UNDO_NODE_INSERT);
-
- row_undo_ins_parse_undo_rec(node);
-
- if (node->table == NULL) {
- found = FALSE;
- } else {
- found = row_undo_search_clust_to_pcur(node);
- }
-
- if (!found) {
- trx_undo_rec_release(node->trx, node->undo_no);
-
- return(DB_SUCCESS);
- }
-
- node->index = dict_table_get_next_index(
- dict_table_get_first_index(node->table));
-
- while (node->index != NULL) {
- entry = row_build_index_entry(node->row, node->index,
- node->heap);
- err = row_undo_ins_remove_sec(node->index, entry);
-
- if (err != DB_SUCCESS) {
-
- return(err);
- }
-
- node->index = dict_table_get_next_index(node->index);
- }
-
- err = row_undo_ins_remove_clust_rec(node);
-
- return(err);
-}
diff --git a/storage/innobase/row/row0umod.c b/storage/innobase/row/row0umod.c
deleted file mode 100644
index 68139da116e..00000000000
--- a/storage/innobase/row/row0umod.c
+++ /dev/null
@@ -1,762 +0,0 @@
-/******************************************************
-Undo modify of a row
-
-(c) 1997 Innobase Oy
-
-Created 2/27/1997 Heikki Tuuri
-*******************************************************/
-
-#include "row0umod.h"
-
-#ifdef UNIV_NONINL
-#include "row0umod.ic"
-#endif
-
-#include "dict0dict.h"
-#include "dict0boot.h"
-#include "trx0undo.h"
-#include "trx0roll.h"
-#include "btr0btr.h"
-#include "mach0data.h"
-#include "row0undo.h"
-#include "row0vers.h"
-#include "trx0trx.h"
-#include "trx0rec.h"
-#include "row0row.h"
-#include "row0upd.h"
-#include "que0que.h"
-#include "log0log.h"
-
-/* Considerations on undoing a modify operation.
-(1) Undoing a delete marking: all index records should be found. Some of
-them may have delete mark already FALSE, if the delete mark operation was
-stopped underway, or if the undo operation ended prematurely because of a
-system crash.
-(2) Undoing an update of a delete unmarked record: the newer version of
-an updated secondary index entry should be removed if no prior version
-of the clustered index record requires its existence. Otherwise, it should
-be delete marked.
-(3) Undoing an update of a delete marked record. In this kind of update a
-delete marked clustered index record was delete unmarked and possibly also
-some of its fields were changed. Now, it is possible that the delete marked
-version has become obsolete at the time the undo is started. */
-
-/***************************************************************
-Checks if also the previous version of the clustered index record was
-modified or inserted by the same transaction, and its undo number is such
-that it should be undone in the same rollback. */
-UNIV_INLINE
-ibool
-row_undo_mod_undo_also_prev_vers(
-/*=============================*/
- /* out: TRUE if also previous modify or
- insert of this row should be undone */
- undo_node_t* node, /* in: row undo node */
- dulint* undo_no)/* out: the undo number */
-{
- trx_undo_rec_t* undo_rec;
- trx_t* trx;
-
- trx = node->trx;
-
- if (0 != ut_dulint_cmp(node->new_trx_id, trx->id)) {
-
- *undo_no = ut_dulint_zero;
- return(FALSE);
- }
-
- undo_rec = trx_undo_get_undo_rec_low(node->new_roll_ptr, node->heap);
-
- *undo_no = trx_undo_rec_get_undo_no(undo_rec);
-
- return(ut_dulint_cmp(trx->roll_limit, *undo_no) <= 0);
-}
-
-/***************************************************************
-Undoes a modify in a clustered index record. */
-static
-ulint
-row_undo_mod_clust_low(
-/*===================*/
- /* out: DB_SUCCESS, DB_FAIL, or error code:
- we may run out of file space */
- undo_node_t* node, /* in: row undo node */
- que_thr_t* thr, /* in: query thread */
- mtr_t* mtr, /* in: mtr */
- ulint mode) /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
-{
- big_rec_t* dummy_big_rec;
- btr_pcur_t* pcur;
- btr_cur_t* btr_cur;
- ulint err;
- ibool success;
-
- pcur = &(node->pcur);
- btr_cur = btr_pcur_get_btr_cur(pcur);
-
- success = btr_pcur_restore_position(mode, pcur, mtr);
-
- ut_ad(success);
-
- if (mode == BTR_MODIFY_LEAF) {
-
- err = btr_cur_optimistic_update(BTR_NO_LOCKING_FLAG
- | BTR_NO_UNDO_LOG_FLAG
- | BTR_KEEP_SYS_FLAG,
- btr_cur, node->update,
- node->cmpl_info, thr, mtr);
- } else {
- ut_ad(mode == BTR_MODIFY_TREE);
-
- err = btr_cur_pessimistic_update(
- BTR_NO_LOCKING_FLAG
- | BTR_NO_UNDO_LOG_FLAG
- | BTR_KEEP_SYS_FLAG,
- btr_cur, &dummy_big_rec, node->update,
- node->cmpl_info, thr, mtr);
- }
-
- return(err);
-}
-
-/***************************************************************
-Removes a clustered index record after undo if possible. */
-static
-ulint
-row_undo_mod_remove_clust_low(
-/*==========================*/
- /* out: DB_SUCCESS, DB_FAIL, or error code:
- we may run out of file space */
- undo_node_t* node, /* in: row undo node */
- que_thr_t* thr __attribute__((unused)), /* in: query thread */
- mtr_t* mtr, /* in: mtr */
- ulint mode) /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
-{
- btr_pcur_t* pcur;
- btr_cur_t* btr_cur;
- ulint err;
- ibool success;
-
- pcur = &(node->pcur);
- btr_cur = btr_pcur_get_btr_cur(pcur);
-
- success = btr_pcur_restore_position(mode, pcur, mtr);
-
- if (!success) {
-
- return(DB_SUCCESS);
- }
-
- /* Find out if we can remove the whole clustered index record */
-
- if (node->rec_type == TRX_UNDO_UPD_DEL_REC
- && !row_vers_must_preserve_del_marked(node->new_trx_id, mtr)) {
-
- /* Ok, we can remove */
- } else {
- return(DB_SUCCESS);
- }
-
- if (mode == BTR_MODIFY_LEAF) {
- success = btr_cur_optimistic_delete(btr_cur, mtr);
-
- if (success) {
- err = DB_SUCCESS;
- } else {
- err = DB_FAIL;
- }
- } else {
- ut_ad(mode == BTR_MODIFY_TREE);
-
- /* Note that since this operation is analogous to purge,
- we can free also inherited externally stored fields:
- hence the last FALSE in the call below */
-
- btr_cur_pessimistic_delete(&err, FALSE, btr_cur, FALSE, mtr);
-
- /* The delete operation may fail if we have little
- file space left: TODO: easiest to crash the database
- and restart with more file space */
- }
-
- return(err);
-}
-
-/***************************************************************
-Undoes a modify in a clustered index record. Sets also the node state for the
-next round of undo. */
-static
-ulint
-row_undo_mod_clust(
-/*===============*/
- /* out: DB_SUCCESS or error code: we may run
- out of file space */
- undo_node_t* node, /* in: row undo node */
- que_thr_t* thr) /* in: query thread */
-{
- btr_pcur_t* pcur;
- mtr_t mtr;
- ulint err;
- ibool success;
- ibool more_vers;
- dulint new_undo_no;
-
- ut_ad(node && thr);
-
- /* Check if also the previous version of the clustered index record
- should be undone in this same rollback operation */
-
- more_vers = row_undo_mod_undo_also_prev_vers(node, &new_undo_no);
-
- pcur = &(node->pcur);
-
- mtr_start(&mtr);
-
- /* Try optimistic processing of the record, keeping changes within
- the index page */
-
- err = row_undo_mod_clust_low(node, thr, &mtr, BTR_MODIFY_LEAF);
-
- if (err != DB_SUCCESS) {
- btr_pcur_commit_specify_mtr(pcur, &mtr);
-
- /* We may have to modify tree structure: do a pessimistic
- descent down the index tree */
-
- mtr_start(&mtr);
-
- err = row_undo_mod_clust_low(node, thr, &mtr, BTR_MODIFY_TREE);
- }
-
- btr_pcur_commit_specify_mtr(pcur, &mtr);
-
- if (err == DB_SUCCESS && node->rec_type == TRX_UNDO_UPD_DEL_REC) {
-
- mtr_start(&mtr);
-
- err = row_undo_mod_remove_clust_low(node, thr, &mtr,
- BTR_MODIFY_LEAF);
- if (err != DB_SUCCESS) {
- btr_pcur_commit_specify_mtr(pcur, &mtr);
-
- /* We may have to modify tree structure: do a
- pessimistic descent down the index tree */
-
- mtr_start(&mtr);
-
- err = row_undo_mod_remove_clust_low(node, thr, &mtr,
- BTR_MODIFY_TREE);
- }
-
- btr_pcur_commit_specify_mtr(pcur, &mtr);
- }
-
- node->state = UNDO_NODE_FETCH_NEXT;
-
- trx_undo_rec_release(node->trx, node->undo_no);
-
- if (more_vers && err == DB_SUCCESS) {
-
- /* Reserve the undo log record to the prior version after
- committing &mtr: this is necessary to comply with the latching
- order, as &mtr may contain the fsp latch which is lower in
- the latch hierarchy than trx->undo_mutex. */
-
- success = trx_undo_rec_reserve(node->trx, new_undo_no);
-
- if (success) {
- node->state = UNDO_NODE_PREV_VERS;
- }
- }
-
- return(err);
-}
-
-/***************************************************************
-Delete marks or removes a secondary index entry if found. */
-static
-ulint
-row_undo_mod_del_mark_or_remove_sec_low(
-/*====================================*/
- /* out: DB_SUCCESS, DB_FAIL, or
- DB_OUT_OF_FILE_SPACE */
- undo_node_t* node, /* in: row undo node */
- que_thr_t* thr, /* in: query thread */
- dict_index_t* index, /* in: index */
- dtuple_t* entry, /* in: index entry */
- ulint mode) /* in: latch mode BTR_MODIFY_LEAF or
- BTR_MODIFY_TREE */
-{
- ibool found;
- btr_pcur_t pcur;
- btr_cur_t* btr_cur;
- ibool success;
- ibool old_has;
- ulint err;
- mtr_t mtr;
- mtr_t mtr_vers;
-
- log_free_check();
- mtr_start(&mtr);
-
- found = row_search_index_entry(index, entry, mode, &pcur, &mtr);
-
- btr_cur = btr_pcur_get_btr_cur(&pcur);
-
- if (!found) {
- /* Not found */
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- return(DB_SUCCESS);
- }
-
- /* We should remove the index record if no prior version of the row,
- which cannot be purged yet, requires its existence. If some requires,
- we should delete mark the record. */
-
- mtr_start(&mtr_vers);
-
- success = btr_pcur_restore_position(BTR_SEARCH_LEAF, &(node->pcur),
- &mtr_vers);
- ut_a(success);
-
- old_has = row_vers_old_has_index_entry(FALSE,
- btr_pcur_get_rec(&(node->pcur)),
- &mtr_vers, index, entry);
- if (old_has) {
- err = btr_cur_del_mark_set_sec_rec(BTR_NO_LOCKING_FLAG,
- btr_cur, TRUE, thr, &mtr);
- ut_ad(err == DB_SUCCESS);
- } else {
- /* Remove the index record */
-
- if (mode == BTR_MODIFY_LEAF) {
- success = btr_cur_optimistic_delete(btr_cur, &mtr);
- if (success) {
- err = DB_SUCCESS;
- } else {
- err = DB_FAIL;
- }
- } else {
- ut_ad(mode == BTR_MODIFY_TREE);
-
- btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
- TRUE, &mtr);
-
- /* The delete operation may fail if we have little
- file space left: TODO: easiest to crash the database
- and restart with more file space */
- }
- }
-
- btr_pcur_commit_specify_mtr(&(node->pcur), &mtr_vers);
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- return(err);
-}
-
-/***************************************************************
-Delete marks or removes a secondary index entry if found.
-NOTE that if we updated the fields of a delete-marked secondary index record
-so that alphabetically they stayed the same, e.g., 'abc' -> 'aBc', we cannot
-return to the original values because we do not know them. But this should
-not cause problems because in row0sel.c, in queries we always retrieve the
-clustered index record or an earlier version of it, if the secondary index
-record through which we do the search is delete-marked. */
-static
-ulint
-row_undo_mod_del_mark_or_remove_sec(
-/*================================*/
- /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
- undo_node_t* node, /* in: row undo node */
- que_thr_t* thr, /* in: query thread */
- dict_index_t* index, /* in: index */
- dtuple_t* entry) /* in: index entry */
-{
- ulint err;
-
- err = row_undo_mod_del_mark_or_remove_sec_low(node, thr, index,
- entry, BTR_MODIFY_LEAF);
- if (err == DB_SUCCESS) {
-
- return(err);
- }
-
- err = row_undo_mod_del_mark_or_remove_sec_low(node, thr, index,
- entry, BTR_MODIFY_TREE);
- return(err);
-}
-
-/***************************************************************
-Delete unmarks a secondary index entry which must be found. It might not be
-delete-marked at the moment, but it does not harm to unmark it anyway. We also
-need to update the fields of the secondary index record if we updated its
-fields but alphabetically they stayed the same, e.g., 'abc' -> 'aBc'. */
-static
-ulint
-row_undo_mod_del_unmark_sec_and_undo_update(
-/*========================================*/
- /* out: DB_FAIL or DB_SUCCESS or
- DB_OUT_OF_FILE_SPACE */
- ulint mode, /* in: search mode: BTR_MODIFY_LEAF or
- BTR_MODIFY_TREE */
- que_thr_t* thr, /* in: query thread */
- dict_index_t* index, /* in: index */
- dtuple_t* entry) /* in: index entry */
-{
- mem_heap_t* heap;
- btr_pcur_t pcur;
- upd_t* update;
- ulint err = DB_SUCCESS;
- ibool found;
- big_rec_t* dummy_big_rec;
- mtr_t mtr;
- trx_t* trx = thr_get_trx(thr);
-
- log_free_check();
- mtr_start(&mtr);
-
- found = row_search_index_entry(index, entry, mode, &pcur, &mtr);
-
- if (!found) {
- fputs("InnoDB: error in sec index entry del undo in\n"
- "InnoDB: ", stderr);
- dict_index_name_print(stderr, trx, index);
- fputs("\n"
- "InnoDB: tuple ", stderr);
- dtuple_print(stderr, entry);
- fputs("\n"
- "InnoDB: record ", stderr);
- rec_print(stderr, btr_pcur_get_rec(&pcur), index);
- putc('\n', stderr);
- trx_print(stderr, trx, 0);
- fputs("\n"
- "InnoDB: Submit a detailed bug report"
- " to http://bugs.mysql.com\n", stderr);
- } else {
- btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur);
-
- err = btr_cur_del_mark_set_sec_rec(BTR_NO_LOCKING_FLAG,
- btr_cur, FALSE, thr, &mtr);
- ut_a(err == DB_SUCCESS);
- heap = mem_heap_create(100);
-
- update = row_upd_build_sec_rec_difference_binary(
- index, entry, btr_cur_get_rec(btr_cur), trx, heap);
- if (upd_get_n_fields(update) == 0) {
-
- /* Do nothing */
-
- } else if (mode == BTR_MODIFY_LEAF) {
- /* Try an optimistic updating of the record, keeping
- changes within the page */
-
- err = btr_cur_optimistic_update(
- BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG,
- btr_cur, update, 0, thr, &mtr);
- if (err == DB_OVERFLOW || err == DB_UNDERFLOW) {
- err = DB_FAIL;
- }
- } else {
- ut_a(mode == BTR_MODIFY_TREE);
- err = btr_cur_pessimistic_update(
- BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG,
- btr_cur, &dummy_big_rec,
- update, 0, thr, &mtr);
- }
-
- mem_heap_free(heap);
- }
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- return(err);
-}
-
-/***************************************************************
-Undoes a modify in secondary indexes when undo record type is UPD_DEL. */
-static
-ulint
-row_undo_mod_upd_del_sec(
-/*=====================*/
- /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
- undo_node_t* node, /* in: row undo node */
- que_thr_t* thr) /* in: query thread */
-{
- mem_heap_t* heap;
- dtuple_t* entry;
- dict_index_t* index;
- ulint err;
-
- heap = mem_heap_create(1024);
-
- while (node->index != NULL) {
- index = node->index;
-
- entry = row_build_index_entry(node->row, index, heap);
-
- err = row_undo_mod_del_mark_or_remove_sec(node, thr, index,
- entry);
- if (err != DB_SUCCESS) {
-
- mem_heap_free(heap);
-
- return(err);
- }
-
- node->index = dict_table_get_next_index(node->index);
- }
-
- mem_heap_free(heap);
-
- return(DB_SUCCESS);
-}
-
-/***************************************************************
-Undoes a modify in secondary indexes when undo record type is DEL_MARK. */
-static
-ulint
-row_undo_mod_del_mark_sec(
-/*======================*/
- /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
- undo_node_t* node, /* in: row undo node */
- que_thr_t* thr) /* in: query thread */
-{
- mem_heap_t* heap;
- dtuple_t* entry;
- dict_index_t* index;
- ulint err;
-
- heap = mem_heap_create(1024);
-
- while (node->index != NULL) {
- index = node->index;
-
- entry = row_build_index_entry(node->row, index, heap);
-
- err = row_undo_mod_del_unmark_sec_and_undo_update(
- BTR_MODIFY_LEAF, thr, index, entry);
- if (err == DB_FAIL) {
- err = row_undo_mod_del_unmark_sec_and_undo_update(
- BTR_MODIFY_TREE, thr, index, entry);
- }
-
- if (err != DB_SUCCESS) {
-
- mem_heap_free(heap);
-
- return(err);
- }
-
- node->index = dict_table_get_next_index(node->index);
- }
-
- mem_heap_free(heap);
-
- return(DB_SUCCESS);
-}
-
-/***************************************************************
-Undoes a modify in secondary indexes when undo record type is UPD_EXIST. */
-static
-ulint
-row_undo_mod_upd_exist_sec(
-/*=======================*/
- /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
- undo_node_t* node, /* in: row undo node */
- que_thr_t* thr) /* in: query thread */
-{
- mem_heap_t* heap;
- dtuple_t* entry;
- dict_index_t* index;
- ulint err;
-
- if (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) {
- /* No change in secondary indexes */
-
- return(DB_SUCCESS);
- }
-
- heap = mem_heap_create(1024);
-
- while (node->index != NULL) {
- index = node->index;
-
- if (row_upd_changes_ord_field_binary(node->row, node->index,
- node->update)) {
-
- /* Build the newest version of the index entry */
- entry = row_build_index_entry(node->row, index, heap);
-
- /* NOTE that if we updated the fields of a
- delete-marked secondary index record so that
- alphabetically they stayed the same, e.g.,
- 'abc' -> 'aBc', we cannot return to the original
- values because we do not know them. But this should
- not cause problems because in row0sel.c, in queries
- we always retrieve the clustered index record or an
- earlier version of it, if the secondary index record
- through which we do the search is delete-marked. */
-
- err = row_undo_mod_del_mark_or_remove_sec(node, thr,
- index,
- entry);
- if (err != DB_SUCCESS) {
- mem_heap_free(heap);
-
- return(err);
- }
-
- /* We may have to update the delete mark in the
- secondary index record of the previous version of
- the row. We also need to update the fields of
- the secondary index record if we updated its fields
- but alphabetically they stayed the same, e.g.,
- 'abc' -> 'aBc'. */
-
- row_upd_index_replace_new_col_vals(entry, index,
- node->update, NULL);
- err = row_undo_mod_del_unmark_sec_and_undo_update(
- BTR_MODIFY_LEAF, thr, index, entry);
- if (err == DB_FAIL) {
- err = row_undo_mod_del_unmark_sec_and_undo_update(
- BTR_MODIFY_TREE, thr, index, entry);
- }
-
- if (err != DB_SUCCESS) {
- mem_heap_free(heap);
-
- return(err);
- }
- }
-
- node->index = dict_table_get_next_index(node->index);
- }
-
- mem_heap_free(heap);
-
- return(DB_SUCCESS);
-}
-
-/***************************************************************
-Parses the row reference and other info in a modify undo log record. */
-static
-void
-row_undo_mod_parse_undo_rec(
-/*========================*/
- undo_node_t* node, /* in: row undo node */
- que_thr_t* thr) /* in: query thread */
-{
- dict_index_t* clust_index;
- byte* ptr;
- dulint undo_no;
- dulint table_id;
- dulint trx_id;
- dulint roll_ptr;
- ulint info_bits;
- ulint type;
- ulint cmpl_info;
- ibool dummy_extern;
- trx_t* trx;
-
- ut_ad(node && thr);
- trx = thr_get_trx(thr);
- ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info,
- &dummy_extern, &undo_no, &table_id);
- node->rec_type = type;
-
- node->table = dict_table_get_on_id(table_id, trx);
-
- /* TODO: other fixes associated with DROP TABLE + rollback in the
- same table by another user */
-
- if (node->table == NULL) {
- /* Table was dropped */
- return;
- }
-
- if (node->table->ibd_file_missing) {
- /* We skip undo operations to missing .ibd files */
- node->table = NULL;
-
- return;
- }
-
- clust_index = dict_table_get_first_index(node->table);
-
- ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
- &info_bits);
-
- ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
- node->heap);
-
- trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id,
- roll_ptr, info_bits, trx,
- node->heap, &(node->update));
- node->new_roll_ptr = roll_ptr;
- node->new_trx_id = trx_id;
- node->cmpl_info = cmpl_info;
-}
-
-/***************************************************************
-Undoes a modify operation on a row of a table. */
-
-ulint
-row_undo_mod(
-/*=========*/
- /* out: DB_SUCCESS or error code */
- undo_node_t* node, /* in: row undo node */
- que_thr_t* thr) /* in: query thread */
-{
- ibool found;
- ulint err;
-
- ut_ad(node && thr);
- ut_ad(node->state == UNDO_NODE_MODIFY);
-
- row_undo_mod_parse_undo_rec(node, thr);
-
- if (node->table == NULL) {
- found = FALSE;
- } else {
- found = row_undo_search_clust_to_pcur(node);
- }
-
- if (!found) {
- /* It is already undone, or will be undone by another query
- thread, or table was dropped */
-
- trx_undo_rec_release(node->trx, node->undo_no);
- node->state = UNDO_NODE_FETCH_NEXT;
-
- return(DB_SUCCESS);
- }
-
- node->index = dict_table_get_next_index(
- dict_table_get_first_index(node->table));
-
- if (node->rec_type == TRX_UNDO_UPD_EXIST_REC) {
-
- err = row_undo_mod_upd_exist_sec(node, thr);
-
- } else if (node->rec_type == TRX_UNDO_DEL_MARK_REC) {
-
- err = row_undo_mod_del_mark_sec(node, thr);
- } else {
- ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC);
- err = row_undo_mod_upd_del_sec(node, thr);
- }
-
- if (err != DB_SUCCESS) {
-
- return(err);
- }
-
- err = row_undo_mod_clust(node, thr);
-
- return(err);
-}
diff --git a/storage/innobase/row/row0undo.c b/storage/innobase/row/row0undo.c
deleted file mode 100644
index f03f84ed1b0..00000000000
--- a/storage/innobase/row/row0undo.c
+++ /dev/null
@@ -1,352 +0,0 @@
-/******************************************************
-Row undo
-
-(c) 1997 Innobase Oy
-
-Created 1/8/1997 Heikki Tuuri
-*******************************************************/
-
-#include "row0undo.h"
-
-#ifdef UNIV_NONINL
-#include "row0undo.ic"
-#endif
-
-#include "fsp0fsp.h"
-#include "mach0data.h"
-#include "trx0rseg.h"
-#include "trx0trx.h"
-#include "trx0roll.h"
-#include "trx0undo.h"
-#include "trx0purge.h"
-#include "trx0rec.h"
-#include "que0que.h"
-#include "row0row.h"
-#include "row0uins.h"
-#include "row0umod.h"
-#include "row0mysql.h"
-#include "srv0srv.h"
-
-/* How to undo row operations?
-(1) For an insert, we have stored a prefix of the clustered index record
-in the undo log. Using it, we look for the clustered record, and using
-that we look for the records in the secondary indexes. The insert operation
-may have been left incomplete, if the database crashed, for example.
-We may have look at the trx id and roll ptr to make sure the record in the
-clustered index is really the one for which the undo log record was
-written. We can use the framework we get from the original insert op.
-(2) Delete marking: We can use the framework we get from the original
-delete mark op. We only have to check the trx id.
-(3) Update: This may be the most complicated. We have to use the framework
-we get from the original update op.
-
-What if the same trx repeatedly deletes and inserts an identical row.
-Then the row id changes and also roll ptr. What if the row id was not
-part of the ordering fields in the clustered index? Maybe we have to write
-it to undo log. Well, maybe not, because if we order the row id and trx id
-in descending order, then the only undeleted copy is the first in the
-index. Our searches in row operations always position the cursor before
-the first record in the result set. But, if there is no key defined for
-a table, then it would be desirable that row id is in ascending order.
-So, lets store row id in descending order only if it is not an ordering
-field in the clustered index.
-
-NOTE: Deletes and inserts may lead to situation where there are identical
-records in a secondary index. Is that a problem in the B-tree? Yes.
-Also updates can lead to this, unless trx id and roll ptr are included in
-ord fields.
-(1) Fix in clustered indexes: include row id, trx id, and roll ptr
-in node pointers of B-tree.
-(2) Fix in secondary indexes: include all fields in node pointers, and
-if an entry is inserted, check if it is equal to the right neighbor,
-in which case update the right neighbor: the neighbor must be delete
-marked, set it unmarked and write the trx id of the current transaction.
-
-What if the same trx repeatedly updates the same row, updating a secondary
-index field or not? Updating a clustered index ordering field?
-
-(1) If it does not update the secondary index and not the clustered index
-ord field. Then the secondary index record stays unchanged, but the
-trx id in the secondary index record may be smaller than in the clustered
-index record. This is no problem?
-(2) If it updates secondary index ord field but not clustered: then in
-secondary index there are delete marked records, which differ in an
-ord field. No problem.
-(3) Updates clustered ord field but not secondary, and secondary index
-is unique. Then the record in secondary index is just updated at the
-clustered ord field.
-(4)
-
-Problem with duplicate records:
-Fix 1: Add a trx op no field to all indexes. A problem: if a trx with a
-bigger trx id has inserted and delete marked a similar row, our trx inserts
-again a similar row, and a trx with an even bigger id delete marks it. Then
-the position of the row should change in the index if the trx id affects
-the alphabetical ordering.
-
-Fix 2: If an insert encounters a similar row marked deleted, we turn the
-insert into an 'update' of the row marked deleted. Then we must write undo
-info on the update. A problem: what if a purge operation tries to remove
-the delete marked row?
-
-We can think of the database row versions as a linked list which starts
-from the record in the clustered index, and is linked by roll ptrs
-through undo logs. The secondary index records are references which tell
-what kinds of records can be found in this linked list for a record
-in the clustered index.
-
-How to do the purge? A record can be removed from the clustered index
-if its linked list becomes empty, i.e., the row has been marked deleted
-and its roll ptr points to the record in the undo log we are going through,
-doing the purge. Similarly, during a rollback, a record can be removed
-if the stored roll ptr in the undo log points to a trx already (being) purged,
-or if the roll ptr is NULL, i.e., it was a fresh insert. */
-
-/************************************************************************
-Creates a row undo node to a query graph. */
-
-undo_node_t*
-row_undo_node_create(
-/*=================*/
- /* out, own: undo node */
- trx_t* trx, /* in: transaction */
- que_thr_t* parent, /* in: parent node, i.e., a thr node */
- mem_heap_t* heap) /* in: memory heap where created */
-{
- undo_node_t* undo;
-
- ut_ad(trx && parent && heap);
-
- undo = mem_heap_alloc(heap, sizeof(undo_node_t));
-
- undo->common.type = QUE_NODE_UNDO;
- undo->common.parent = parent;
-
- undo->state = UNDO_NODE_FETCH_NEXT;
- undo->trx = trx;
-
- btr_pcur_init(&(undo->pcur));
-
- undo->heap = mem_heap_create(256);
-
- return(undo);
-}
-
-/***************************************************************
-Looks for the clustered index record when node has the row reference.
-The pcur in node is used in the search. If found, stores the row to node,
-and stores the position of pcur, and detaches it. The pcur must be closed
-by the caller in any case. */
-
-ibool
-row_undo_search_clust_to_pcur(
-/*==========================*/
- /* out: TRUE if found; NOTE the node->pcur
- must be closed by the caller, regardless of
- the return value */
- undo_node_t* node) /* in: row undo node */
-{
- dict_index_t* clust_index;
- ibool found;
- mtr_t mtr;
- ibool ret;
- rec_t* rec;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- mtr_start(&mtr);
-
- clust_index = dict_table_get_first_index(node->table);
-
- found = row_search_on_row_ref(&(node->pcur), BTR_MODIFY_LEAF,
- node->table, node->ref, &mtr);
-
- rec = btr_pcur_get_rec(&(node->pcur));
-
- offsets = rec_get_offsets(rec, clust_index, offsets,
- ULINT_UNDEFINED, &heap);
-
- if (!found || 0 != ut_dulint_cmp(node->roll_ptr,
- row_get_rec_roll_ptr(rec, clust_index,
- offsets))) {
-
- /* We must remove the reservation on the undo log record
- BEFORE releasing the latch on the clustered index page: this
- is to make sure that some thread will eventually undo the
- modification corresponding to node->roll_ptr. */
-
- /* fputs("--------------------undoing a previous version\n",
- stderr); */
-
- ret = FALSE;
- } else {
- node->row = row_build(ROW_COPY_DATA, clust_index, rec,
- offsets, node->heap);
- btr_pcur_store_position(&(node->pcur), &mtr);
-
- ret = TRUE;
- }
-
- btr_pcur_commit_specify_mtr(&(node->pcur), &mtr);
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(ret);
-}
-
-/***************************************************************
-Fetches an undo log record and does the undo for the recorded operation.
-If none left, or a partial rollback completed, returns control to the
-parent node, which is always a query thread node. */
-static
-ulint
-row_undo(
-/*=====*/
- /* out: DB_SUCCESS if operation successfully
- completed, else error code */
- undo_node_t* node, /* in: row undo node */
- que_thr_t* thr) /* in: query thread */
-{
- ulint err;
- trx_t* trx;
- dulint roll_ptr;
- ibool locked_data_dict;
-
- ut_ad(node && thr);
-
- trx = node->trx;
-
- if (node->state == UNDO_NODE_FETCH_NEXT) {
-
- node->undo_rec = trx_roll_pop_top_rec_of_trx(trx,
- trx->roll_limit,
- &roll_ptr,
- node->heap);
- if (!node->undo_rec) {
- /* Rollback completed for this query thread */
-
- thr->run_node = que_node_get_parent(node);
-
- return(DB_SUCCESS);
- }
-
- node->roll_ptr = roll_ptr;
- node->undo_no = trx_undo_rec_get_undo_no(node->undo_rec);
-
- if (trx_undo_roll_ptr_is_insert(roll_ptr)) {
-
- node->state = UNDO_NODE_INSERT;
- } else {
- node->state = UNDO_NODE_MODIFY;
- }
-
- } else if (node->state == UNDO_NODE_PREV_VERS) {
-
- /* Undo should be done to the same clustered index record
- again in this same rollback, restoring the previous version */
-
- roll_ptr = node->new_roll_ptr;
-
- node->undo_rec = trx_undo_get_undo_rec_low(roll_ptr,
- node->heap);
- node->roll_ptr = roll_ptr;
- node->undo_no = trx_undo_rec_get_undo_no(node->undo_rec);
-
- if (trx_undo_roll_ptr_is_insert(roll_ptr)) {
-
- node->state = UNDO_NODE_INSERT;
- } else {
- node->state = UNDO_NODE_MODIFY;
- }
- }
-
- /* Prevent DROP TABLE etc. while we are rolling back this row.
- If we are doing a TABLE CREATE or some other dictionary operation,
- then we already have dict_operation_lock locked in x-mode. Do not
- try to lock again, because that would cause a hang. */
-
- locked_data_dict = (trx->dict_operation_lock_mode == 0);
-
- if (locked_data_dict) {
-
- row_mysql_lock_data_dictionary(trx);
- }
-
- if (node->state == UNDO_NODE_INSERT) {
-
- err = row_undo_ins(node);
-
- node->state = UNDO_NODE_FETCH_NEXT;
- } else {
- ut_ad(node->state == UNDO_NODE_MODIFY);
- err = row_undo_mod(node, thr);
- }
-
- if (locked_data_dict) {
-
- row_mysql_unlock_data_dictionary(trx);
- }
-
- /* Do some cleanup */
- btr_pcur_close(&(node->pcur));
-
- mem_heap_empty(node->heap);
-
- thr->run_node = node;
-
- return(err);
-}
-
-/***************************************************************
-Undoes a row operation in a table. This is a high-level function used
-in SQL execution graphs. */
-
-que_thr_t*
-row_undo_step(
-/*==========*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr) /* in: query thread */
-{
- ulint err;
- undo_node_t* node;
- trx_t* trx;
-
- ut_ad(thr);
-
- srv_activity_count++;
-
- trx = thr_get_trx(thr);
-
- node = thr->run_node;
-
- ut_ad(que_node_get_type(node) == QUE_NODE_UNDO);
-
- err = row_undo(node, thr);
-
- trx->error_state = err;
-
- if (err != DB_SUCCESS) {
- /* SQL error detected */
-
- fprintf(stderr, "InnoDB: Fatal error %lu in rollback.\n",
- (ulong) err);
-
- if (err == DB_OUT_OF_FILE_SPACE) {
- fprintf(stderr,
- "InnoDB: Error 13 means out of tablespace.\n"
- "InnoDB: Consider increasing"
- " your tablespace.\n");
-
- exit(1);
- }
-
- ut_error;
-
- return(NULL);
- }
-
- return(thr);
-}
diff --git a/storage/innobase/row/row0upd.c b/storage/innobase/row/row0upd.c
deleted file mode 100644
index c91cc449b96..00000000000
--- a/storage/innobase/row/row0upd.c
+++ /dev/null
@@ -1,2081 +0,0 @@
-/******************************************************
-Update of a row
-
-(c) 1996 Innobase Oy
-
-Created 12/27/1996 Heikki Tuuri
-*******************************************************/
-
-#include "row0upd.h"
-
-#ifdef UNIV_NONINL
-#include "row0upd.ic"
-#endif
-
-#include "dict0dict.h"
-#include "dict0boot.h"
-#include "dict0crea.h"
-#include "mach0data.h"
-#include "trx0undo.h"
-#include "btr0btr.h"
-#include "btr0cur.h"
-#include "que0que.h"
-#include "row0ins.h"
-#include "row0sel.h"
-#include "row0row.h"
-#include "rem0cmp.h"
-#include "lock0lock.h"
-#include "log0log.h"
-#include "pars0sym.h"
-#include "eval0eval.h"
-#include "buf0lru.h"
-
-
-/* What kind of latch and lock can we assume when the control comes to
- -------------------------------------------------------------------
-an update node?
---------------
-Efficiency of massive updates would require keeping an x-latch on a
-clustered index page through many updates, and not setting an explicit
-x-lock on clustered index records, as they anyway will get an implicit
-x-lock when they are updated. A problem is that the read nodes in the
-graph should know that they must keep the latch when passing the control
-up to the update node, and not set any record lock on the record which
-will be updated. Another problem occurs if the execution is stopped,
-as the kernel switches to another query thread, or the transaction must
-wait for a lock. Then we should be able to release the latch and, maybe,
-acquire an explicit x-lock on the record.
- Because this seems too complicated, we conclude that the less
-efficient solution of releasing all the latches when the control is
-transferred to another node, and acquiring explicit x-locks, is better. */
-
-/* How is a delete performed? If there is a delete without an
-explicit cursor, i.e., a searched delete, there are at least
-two different situations:
-the implicit select cursor may run on (1) the clustered index or
-on (2) a secondary index. The delete is performed by setting
-the delete bit in the record and substituting the id of the
-deleting transaction for the original trx id, and substituting a
-new roll ptr for previous roll ptr. The old trx id and roll ptr
-are saved in the undo log record. Thus, no physical changes occur
-in the index tree structure at the time of the delete. Only
-when the undo log is purged, the index records will be physically
-deleted from the index trees.
-
-The query graph executing a searched delete would consist of
-a delete node which has as a subtree a select subgraph.
-The select subgraph should return a (persistent) cursor
-in the clustered index, placed on page which is x-latched.
-The delete node should look for all secondary index records for
-this clustered index entry and mark them as deleted. When is
-the x-latch freed? The most efficient way for performing a
-searched delete is obviously to keep the x-latch for several
-steps of query graph execution. */
-
-/***************************************************************
-Checks if an update vector changes some of the first ordering fields of an
-index record. This is only used in foreign key checks and we can assume
-that index does not contain column prefixes. */
-static
-ibool
-row_upd_changes_first_fields_binary(
-/*================================*/
- /* out: TRUE if changes */
- dtuple_t* entry, /* in: old value of index entry */
- dict_index_t* index, /* in: index of entry */
- upd_t* update, /* in: update vector for the row */
- ulint n); /* in: how many first fields to check */
-
-
-/*************************************************************************
-Checks if index currently is mentioned as a referenced index in a foreign
-key constraint. */
-static
-ibool
-row_upd_index_is_referenced(
-/*========================*/
- /* out: TRUE if referenced; NOTE that since
- we do not hold dict_operation_lock
- when leaving the function, it may be that
- the referencing table has been dropped when
- we leave this function: this function is only
- for heuristic use! */
- dict_index_t* index, /* in: index */
- trx_t* trx) /* in: transaction */
-{
- dict_table_t* table = index->table;
- dict_foreign_t* foreign;
- ibool froze_data_dict = FALSE;
-
- if (!UT_LIST_GET_FIRST(table->referenced_list)) {
-
- return(FALSE);
- }
-
- if (trx->dict_operation_lock_mode == 0) {
- row_mysql_freeze_data_dictionary(trx);
- froze_data_dict = TRUE;
- }
-
- foreign = UT_LIST_GET_FIRST(table->referenced_list);
-
- while (foreign) {
- if (foreign->referenced_index == index) {
-
- if (froze_data_dict) {
- row_mysql_unfreeze_data_dictionary(trx);
- }
-
- return(TRUE);
- }
-
- foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
- }
-
- if (froze_data_dict) {
- row_mysql_unfreeze_data_dictionary(trx);
- }
-
- return(FALSE);
-}
-
-/*************************************************************************
-Checks if possible foreign key constraints hold after a delete of the record
-under pcur. NOTE that this function will temporarily commit mtr and lose the
-pcur position! */
-static
-ulint
-row_upd_check_references_constraints(
-/*=================================*/
- /* out: DB_SUCCESS or an error code */
- upd_node_t* node, /* in: row update node */
- btr_pcur_t* pcur, /* in: cursor positioned on a record; NOTE: the
- cursor position is lost in this function! */
- dict_table_t* table, /* in: table in question */
- dict_index_t* index, /* in: index of the cursor */
- que_thr_t* thr, /* in: query thread */
- mtr_t* mtr) /* in: mtr */
-{
- dict_foreign_t* foreign;
- mem_heap_t* heap;
- dtuple_t* entry;
- trx_t* trx;
- rec_t* rec;
- ulint err;
- ibool got_s_lock = FALSE;
-
- if (UT_LIST_GET_FIRST(table->referenced_list) == NULL) {
-
- return(DB_SUCCESS);
- }
-
- trx = thr_get_trx(thr);
-
- rec = btr_pcur_get_rec(pcur);
-
- heap = mem_heap_create(500);
-
- entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap);
-
- mtr_commit(mtr);
-
- mtr_start(mtr);
-
- if (trx->dict_operation_lock_mode == 0) {
- got_s_lock = TRUE;
-
- row_mysql_freeze_data_dictionary(trx);
- }
-
- foreign = UT_LIST_GET_FIRST(table->referenced_list);
-
- while (foreign) {
- /* Note that we may have an update which updates the index
- record, but does NOT update the first fields which are
- referenced in a foreign key constraint. Then the update does
- NOT break the constraint. */
-
- if (foreign->referenced_index == index
- && (node->is_delete
- || row_upd_changes_first_fields_binary(
- entry, index, node->update,
- foreign->n_fields))) {
-
- if (foreign->foreign_table == NULL) {
- dict_table_get(foreign->foreign_table_name,
- FALSE);
- }
-
- if (foreign->foreign_table) {
- mutex_enter(&(dict_sys->mutex));
-
- (foreign->foreign_table
- ->n_foreign_key_checks_running)++;
-
- mutex_exit(&(dict_sys->mutex));
- }
-
- /* NOTE that if the thread ends up waiting for a lock
- we will release dict_operation_lock temporarily!
- But the counter on the table protects 'foreign' from
- being dropped while the check is running. */
-
- err = row_ins_check_foreign_constraint(
- FALSE, foreign, table, entry, thr);
-
- if (foreign->foreign_table) {
- mutex_enter(&(dict_sys->mutex));
-
- ut_a(foreign->foreign_table
- ->n_foreign_key_checks_running > 0);
-
- (foreign->foreign_table
- ->n_foreign_key_checks_running)--;
-
- mutex_exit(&(dict_sys->mutex));
- }
-
- if (err != DB_SUCCESS) {
- if (got_s_lock) {
- row_mysql_unfreeze_data_dictionary(
- trx);
- }
-
- mem_heap_free(heap);
-
- return(err);
- }
- }
-
- foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
- }
-
- if (got_s_lock) {
- row_mysql_unfreeze_data_dictionary(trx);
- }
-
- mem_heap_free(heap);
-
- return(DB_SUCCESS);
-}
-
-/*************************************************************************
-Creates an update node for a query graph. */
-
-upd_node_t*
-upd_node_create(
-/*============*/
- /* out, own: update node */
- mem_heap_t* heap) /* in: mem heap where created */
-{
- upd_node_t* node;
-
- node = mem_heap_alloc(heap, sizeof(upd_node_t));
- node->common.type = QUE_NODE_UPDATE;
-
- node->state = UPD_NODE_UPDATE_CLUSTERED;
- node->select_will_do_update = FALSE;
- node->in_mysql_interface = FALSE;
-
- node->row = NULL;
- node->ext_vec = NULL;
- node->index = NULL;
- node->update = NULL;
-
- node->foreign = NULL;
- node->cascade_heap = NULL;
- node->cascade_node = NULL;
-
- node->select = NULL;
-
- node->heap = mem_heap_create(128);
- node->magic_n = UPD_NODE_MAGIC_N;
-
- node->cmpl_info = 0;
-
- return(node);
-}
-
-/*************************************************************************
-Updates the trx id and roll ptr field in a clustered index record in database
-recovery. */
-
-void
-row_upd_rec_sys_fields_in_recovery(
-/*===============================*/
- rec_t* rec, /* in: record */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- ulint pos, /* in: TRX_ID position in rec */
- dulint trx_id, /* in: transaction id */
- dulint roll_ptr)/* in: roll ptr of the undo log record */
-{
- byte* field;
- ulint len;
-
- field = rec_get_nth_field(rec, offsets, pos, &len);
- ut_ad(len == DATA_TRX_ID_LEN);
- trx_write_trx_id(field, trx_id);
-
- field = rec_get_nth_field(rec, offsets, pos + 1, &len);
- ut_ad(len == DATA_ROLL_PTR_LEN);
- trx_write_roll_ptr(field, roll_ptr);
-}
-
-/*************************************************************************
-Sets the trx id or roll ptr field of a clustered index entry. */
-
-void
-row_upd_index_entry_sys_field(
-/*==========================*/
- dtuple_t* entry, /* in: index entry, where the memory buffers
- for sys fields are already allocated:
- the function just copies the new values to
- them */
- dict_index_t* index, /* in: clustered index */
- ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */
- dulint val) /* in: value to write */
-{
- dfield_t* dfield;
- byte* field;
- ulint pos;
-
- ut_ad(index->type & DICT_CLUSTERED);
-
- pos = dict_index_get_sys_col_pos(index, type);
-
- dfield = dtuple_get_nth_field(entry, pos);
- field = dfield_get_data(dfield);
-
- if (type == DATA_TRX_ID) {
- trx_write_trx_id(field, val);
- } else {
- ut_ad(type == DATA_ROLL_PTR);
- trx_write_roll_ptr(field, val);
- }
-}
-
-/***************************************************************
-Returns TRUE if row update changes size of some field in index or if some
-field to be updated is stored externally in rec or update. */
-
-ibool
-row_upd_changes_field_size_or_external(
-/*===================================*/
- /* out: TRUE if the update changes the size of
- some field in index or the field is external
- in rec or update */
- dict_index_t* index, /* in: index */
- const ulint* offsets,/* in: rec_get_offsets(rec, index) */
- upd_t* update) /* in: update vector */
-{
- upd_field_t* upd_field;
- dfield_t* new_val;
- ulint old_len;
- ulint new_len;
- ulint n_fields;
- ulint i;
-
- ut_ad(rec_offs_validate(NULL, index, offsets));
- n_fields = upd_get_n_fields(update);
-
- for (i = 0; i < n_fields; i++) {
- upd_field = upd_get_nth_field(update, i);
-
- new_val = &(upd_field->new_val);
- new_len = new_val->len;
-
- if (new_len == UNIV_SQL_NULL && !rec_offs_comp(offsets)) {
- /* A bug fixed on Dec 31st, 2004: we looked at the
- SQL NULL size from the wrong field! We may backport
- this fix also to 4.0. The merge to 5.0 will be made
- manually immediately after we commit this to 4.1. */
-
- new_len = dict_col_get_sql_null_size(
- dict_index_get_nth_col(index,
- upd_field->field_no));
- }
-
- old_len = rec_offs_nth_size(offsets, upd_field->field_no);
-
- if (rec_offs_comp(offsets)
- && rec_offs_nth_sql_null(offsets,
- upd_field->field_no)) {
- /* Note that in the compact table format, for a
- variable length field, an SQL NULL will use zero
- bytes in the offset array at the start of the physical
- record, but a zero-length value (empty string) will
- use one byte! Thus, we cannot use update-in-place
- if we update an SQL NULL varchar to an empty string! */
-
- old_len = UNIV_SQL_NULL;
- }
-
- if (old_len != new_len) {
-
- return(TRUE);
- }
-
- if (rec_offs_nth_extern(offsets, upd_field->field_no)) {
-
- return(TRUE);
- }
-
- if (upd_field->extern_storage) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/***************************************************************
-Replaces the new column values stored in the update vector to the record
-given. No field size changes are allowed. This function is used only for
-a clustered index */
-
-void
-row_upd_rec_in_place(
-/*=================*/
- rec_t* rec, /* in/out: record where replaced */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- upd_t* update) /* in: update vector */
-{
- upd_field_t* upd_field;
- dfield_t* new_val;
- ulint n_fields;
- ulint i;
-
- ut_ad(rec_offs_validate(rec, NULL, offsets));
-
- rec_set_info_bits(rec, rec_offs_comp(offsets), update->info_bits);
-
- n_fields = upd_get_n_fields(update);
-
- for (i = 0; i < n_fields; i++) {
- upd_field = upd_get_nth_field(update, i);
- new_val = &(upd_field->new_val);
-
- rec_set_nth_field(rec, offsets, upd_field->field_no,
- dfield_get_data(new_val),
- dfield_get_len(new_val));
- }
-}
-
-/*************************************************************************
-Writes into the redo log the values of trx id and roll ptr and enough info
-to determine their positions within a clustered index record. */
-
-byte*
-row_upd_write_sys_vals_to_log(
-/*==========================*/
- /* out: new pointer to mlog */
- dict_index_t* index, /* in: clustered index */
- trx_t* trx, /* in: transaction */
- dulint roll_ptr,/* in: roll ptr of the undo log record */
- byte* log_ptr,/* pointer to a buffer of size > 20 opened
- in mlog */
- mtr_t* mtr __attribute__((unused))) /* in: mtr */
-{
- ut_ad(index->type & DICT_CLUSTERED);
- ut_ad(mtr);
-
- log_ptr += mach_write_compressed(log_ptr,
- dict_index_get_sys_col_pos(
- index, DATA_TRX_ID));
-
- trx_write_roll_ptr(log_ptr, roll_ptr);
- log_ptr += DATA_ROLL_PTR_LEN;
-
- log_ptr += mach_dulint_write_compressed(log_ptr, trx->id);
-
- return(log_ptr);
-}
-
-/*************************************************************************
-Parses the log data of system field values. */
-
-byte*
-row_upd_parse_sys_vals(
-/*===================*/
- /* out: log data end or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- ulint* pos, /* out: TRX_ID position in record */
- dulint* trx_id, /* out: trx id */
- dulint* roll_ptr)/* out: roll ptr */
-{
- ptr = mach_parse_compressed(ptr, end_ptr, pos);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- if (end_ptr < ptr + DATA_ROLL_PTR_LEN) {
-
- return(NULL);
- }
-
- *roll_ptr = trx_read_roll_ptr(ptr);
- ptr += DATA_ROLL_PTR_LEN;
-
- ptr = mach_dulint_parse_compressed(ptr, end_ptr, trx_id);
-
- return(ptr);
-}
-
-/***************************************************************
-Writes to the redo log the new values of the fields occurring in the index. */
-
-void
-row_upd_index_write_log(
-/*====================*/
- upd_t* update, /* in: update vector */
- byte* log_ptr,/* in: pointer to mlog buffer: must contain at least
- MLOG_BUF_MARGIN bytes of free space; the buffer is
- closed within this function */
- mtr_t* mtr) /* in: mtr into whose log to write */
-{
- upd_field_t* upd_field;
- dfield_t* new_val;
- ulint len;
- ulint n_fields;
- byte* buf_end;
- ulint i;
-
- n_fields = upd_get_n_fields(update);
-
- buf_end = log_ptr + MLOG_BUF_MARGIN;
-
- mach_write_to_1(log_ptr, update->info_bits);
- log_ptr++;
- log_ptr += mach_write_compressed(log_ptr, n_fields);
-
- for (i = 0; i < n_fields; i++) {
-
-#if MLOG_BUF_MARGIN <= 30
-# error "MLOG_BUF_MARGIN <= 30"
-#endif
-
- if (log_ptr + 30 > buf_end) {
- mlog_close(mtr, log_ptr);
-
- log_ptr = mlog_open(mtr, MLOG_BUF_MARGIN);
- buf_end = log_ptr + MLOG_BUF_MARGIN;
- }
-
- upd_field = upd_get_nth_field(update, i);
-
- new_val = &(upd_field->new_val);
-
- len = new_val->len;
-
- log_ptr += mach_write_compressed(log_ptr, upd_field->field_no);
- log_ptr += mach_write_compressed(log_ptr, len);
-
- if (len != UNIV_SQL_NULL) {
- if (log_ptr + len < buf_end) {
- ut_memcpy(log_ptr, new_val->data, len);
-
- log_ptr += len;
- } else {
- mlog_close(mtr, log_ptr);
-
- mlog_catenate_string(mtr, new_val->data, len);
-
- log_ptr = mlog_open(mtr, MLOG_BUF_MARGIN);
- buf_end = log_ptr + MLOG_BUF_MARGIN;
- }
- }
- }
-
- mlog_close(mtr, log_ptr);
-}
-
-/*************************************************************************
-Parses the log data written by row_upd_index_write_log. */
-
-byte*
-row_upd_index_parse(
-/*================*/
- /* out: log data end or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- mem_heap_t* heap, /* in: memory heap where update vector is
- built */
- upd_t** update_out)/* out: update vector */
-{
- upd_t* update;
- upd_field_t* upd_field;
- dfield_t* new_val;
- ulint len;
- ulint n_fields;
- byte* buf;
- ulint info_bits;
- ulint i;
-
- if (end_ptr < ptr + 1) {
-
- return(NULL);
- }
-
- info_bits = mach_read_from_1(ptr);
- ptr++;
- ptr = mach_parse_compressed(ptr, end_ptr, &n_fields);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- update = upd_create(n_fields, heap);
- update->info_bits = info_bits;
-
- for (i = 0; i < n_fields; i++) {
- upd_field = upd_get_nth_field(update, i);
- new_val = &(upd_field->new_val);
-
- ptr = mach_parse_compressed(ptr, end_ptr,
- &(upd_field->field_no));
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- ptr = mach_parse_compressed(ptr, end_ptr, &len);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- new_val->len = len;
-
- if (len != UNIV_SQL_NULL) {
-
- if (end_ptr < ptr + len) {
-
- return(NULL);
- } else {
- buf = mem_heap_alloc(heap, len);
- ut_memcpy(buf, ptr, len);
-
- ptr += len;
-
- new_val->data = buf;
- }
- }
- }
-
- *update_out = update;
-
- return(ptr);
-}
-
-/*******************************************************************
-Returns TRUE if ext_vec contains i. */
-static
-ibool
-upd_ext_vec_contains(
-/*=================*/
- /* out: TRUE if i is in ext_vec */
- ulint* ext_vec, /* in: array of indexes or NULL */
- ulint n_ext_vec, /* in: number of numbers in ext_vec */
- ulint i) /* in: a number */
-{
- ulint j;
-
- if (ext_vec == NULL) {
-
- return(FALSE);
- }
-
- for (j = 0; j < n_ext_vec; j++) {
- if (ext_vec[j] == i) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/*******************************************************************
-Builds an update vector from those fields which in a secondary index entry
-differ from a record that has the equal ordering fields. NOTE: we compare
-the fields as binary strings! */
-
-upd_t*
-row_upd_build_sec_rec_difference_binary(
-/*====================================*/
- /* out, own: update vector of differing
- fields */
- dict_index_t* index, /* in: index */
- dtuple_t* entry, /* in: entry to insert */
- rec_t* rec, /* in: secondary index record */
- trx_t* trx, /* in: transaction */
- mem_heap_t* heap) /* in: memory heap from which allocated */
-{
- upd_field_t* upd_field;
- dfield_t* dfield;
- byte* data;
- ulint len;
- upd_t* update;
- ulint n_diff;
- ulint i;
- ulint offsets_[REC_OFFS_SMALL_SIZE];
- const ulint* offsets;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- /* This function is used only for a secondary index */
- ut_a(0 == (index->type & DICT_CLUSTERED));
-
- update = upd_create(dtuple_get_n_fields(entry), heap);
-
- n_diff = 0;
- offsets = rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap);
-
- for (i = 0; i < dtuple_get_n_fields(entry); i++) {
-
- data = rec_get_nth_field(rec, offsets, i, &len);
-
- dfield = dtuple_get_nth_field(entry, i);
-
- /* NOTE that it may be that len != dfield_get_len(dfield) if we
- are updating in a character set and collation where strings of
- different length can be equal in an alphabetical comparison,
- and also in the case where we have a column prefix index
- and the last characters in the index field are spaces; the
- latter case probably caused the assertion failures reported at
- row0upd.c line 713 in versions 4.0.14 - 4.0.16. */
-
- /* NOTE: we compare the fields as binary strings!
- (No collation) */
-
- if (!dfield_data_is_binary_equal(dfield, len, data)) {
-
- upd_field = upd_get_nth_field(update, n_diff);
-
- dfield_copy(&(upd_field->new_val), dfield);
-
- upd_field_set_field_no(upd_field, i, index, trx);
-
- upd_field->extern_storage = FALSE;
-
- n_diff++;
- }
- }
-
- update->n_fields = n_diff;
-
- return(update);
-}
-
-/*******************************************************************
-Builds an update vector from those fields, excluding the roll ptr and
-trx id fields, which in an index entry differ from a record that has
-the equal ordering fields. NOTE: we compare the fields as binary strings! */
-
-upd_t*
-row_upd_build_difference_binary(
-/*============================*/
- /* out, own: update vector of differing
- fields, excluding roll ptr and trx id */
- dict_index_t* index, /* in: clustered index */
- dtuple_t* entry, /* in: entry to insert */
- ulint* ext_vec,/* in: array containing field numbers of
- externally stored fields in entry, or NULL */
- ulint n_ext_vec,/* in: number of fields in ext_vec */
- rec_t* rec, /* in: clustered index record */
- trx_t* trx, /* in: transaction */
- mem_heap_t* heap) /* in: memory heap from which allocated */
-{
- upd_field_t* upd_field;
- dfield_t* dfield;
- byte* data;
- ulint len;
- upd_t* update;
- ulint n_diff;
- ulint roll_ptr_pos;
- ulint trx_id_pos;
- ibool extern_bit;
- ulint i;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- const ulint* offsets;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- /* This function is used only for a clustered index */
- ut_a(index->type & DICT_CLUSTERED);
-
- update = upd_create(dtuple_get_n_fields(entry), heap);
-
- n_diff = 0;
-
- roll_ptr_pos = dict_index_get_sys_col_pos(index, DATA_ROLL_PTR);
- trx_id_pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
-
- offsets = rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap);
-
- for (i = 0; i < dtuple_get_n_fields(entry); i++) {
-
- data = rec_get_nth_field(rec, offsets, i, &len);
-
- dfield = dtuple_get_nth_field(entry, i);
-
- /* NOTE: we compare the fields as binary strings!
- (No collation) */
-
- if (i == trx_id_pos || i == roll_ptr_pos) {
-
- goto skip_compare;
- }
-
- extern_bit = upd_ext_vec_contains(ext_vec, n_ext_vec, i);
-
- if (UNIV_UNLIKELY(extern_bit
- == (ibool)!rec_offs_nth_extern(offsets, i))
- || !dfield_data_is_binary_equal(dfield, len, data)) {
-
- upd_field = upd_get_nth_field(update, n_diff);
-
- dfield_copy(&(upd_field->new_val), dfield);
-
- upd_field_set_field_no(upd_field, i, index, trx);
-
- upd_field->extern_storage = extern_bit;
-
- n_diff++;
- }
-skip_compare:
- ;
- }
-
- update->n_fields = n_diff;
-
- return(update);
-}
-
-/***************************************************************
-Replaces the new column values stored in the update vector to the index entry
-given. */
-
-void
-row_upd_index_replace_new_col_vals_index_pos(
-/*=========================================*/
- dtuple_t* entry, /* in/out: index entry where replaced */
- dict_index_t* index, /* in: index; NOTE that this may also be a
- non-clustered index */
- upd_t* update, /* in: an update vector built for the index so
- that the field number in an upd_field is the
- index position */
- ibool order_only,
- /* in: if TRUE, limit the replacement to
- ordering fields of index; note that this
- does not work for non-clustered indexes. */
- mem_heap_t* heap) /* in: memory heap to which we allocate and
- copy the new values, set this as NULL if you
- do not want allocation */
-{
- dict_field_t* field;
- upd_field_t* upd_field;
- dfield_t* dfield;
- dfield_t* new_val;
- ulint j;
- ulint i;
- ulint n_fields;
-
- ut_ad(index);
-
- dtuple_set_info_bits(entry, update->info_bits);
-
- if (order_only) {
- n_fields = dict_index_get_n_unique(index);
- } else {
- n_fields = dict_index_get_n_fields(index);
- }
-
- for (j = 0; j < n_fields; j++) {
-
- field = dict_index_get_nth_field(index, j);
-
- for (i = 0; i < upd_get_n_fields(update); i++) {
-
- upd_field = upd_get_nth_field(update, i);
-
- if (upd_field->field_no == j) {
-
- dfield = dtuple_get_nth_field(entry, j);
-
- new_val = &(upd_field->new_val);
-
- dfield_set_data(dfield, new_val->data,
- new_val->len);
- if (heap && new_val->len != UNIV_SQL_NULL) {
- dfield->data = mem_heap_alloc(
- heap, new_val->len);
- ut_memcpy(dfield->data, new_val->data,
- new_val->len);
- }
-
- if (field->prefix_len > 0
- && new_val->len != UNIV_SQL_NULL) {
-
- const dict_col_t* col
- = dict_field_get_col(field);
-
- dfield->len
- = dtype_get_at_most_n_mbchars(
- col->prtype,
- col->mbminlen,
- col->mbmaxlen,
- field->prefix_len,
- new_val->len,
- new_val->data);
- }
- }
- }
- }
-}
-
-/***************************************************************
-Replaces the new column values stored in the update vector to the index entry
-given. */
-
-void
-row_upd_index_replace_new_col_vals(
-/*===============================*/
- dtuple_t* entry, /* in/out: index entry where replaced */
- dict_index_t* index, /* in: index; NOTE that this may also be a
- non-clustered index */
- upd_t* update, /* in: an update vector built for the
- CLUSTERED index so that the field number in
- an upd_field is the clustered index position */
- mem_heap_t* heap) /* in: memory heap to which we allocate and
- copy the new values, set this as NULL if you
- do not want allocation */
-{
- upd_field_t* upd_field;
- dfield_t* dfield;
- dfield_t* new_val;
- ulint j;
- ulint i;
- dict_index_t* clust_index;
-
- ut_ad(index);
-
- clust_index = dict_table_get_first_index(index->table);
-
- dtuple_set_info_bits(entry, update->info_bits);
-
- for (j = 0; j < dict_index_get_n_fields(index); j++) {
-
- ulint clust_pos;
- dict_field_t* field = dict_index_get_nth_field(index, j);
-
- clust_pos = dict_col_get_clust_pos(field->col, clust_index);
-
- for (i = 0; i < upd_get_n_fields(update); i++) {
-
- upd_field = upd_get_nth_field(update, i);
-
- if (upd_field->field_no == clust_pos) {
-
- dfield = dtuple_get_nth_field(entry, j);
-
- new_val = &(upd_field->new_val);
-
- dfield_set_data(dfield, new_val->data,
- new_val->len);
- if (heap && new_val->len != UNIV_SQL_NULL) {
- dfield->data = mem_heap_alloc(
- heap, new_val->len);
- ut_memcpy(dfield->data, new_val->data,
- new_val->len);
- }
-
- if (field->prefix_len > 0
- && new_val->len != UNIV_SQL_NULL) {
-
- const dict_col_t* col
- = dict_field_get_col(field);
-
- dfield->len
- = dtype_get_at_most_n_mbchars(
- col->prtype,
- col->mbminlen,
- col->mbmaxlen,
- field->prefix_len,
- new_val->len,
- new_val->data);
- }
- }
- }
- }
-}
-
-/***************************************************************
-Checks if an update vector changes an ordering field of an index record.
-This function is fast if the update vector is short or the number of ordering
-fields in the index is small. Otherwise, this can be quadratic.
-NOTE: we compare the fields as binary strings! */
-
-ibool
-row_upd_changes_ord_field_binary(
-/*=============================*/
- /* out: TRUE if update vector changes
- an ordering field in the index record;
- NOTE: the fields are compared as binary
- strings */
- dtuple_t* row, /* in: old value of row, or NULL if the
- row and the data values in update are not
- known when this function is called, e.g., at
- compile time */
- dict_index_t* index, /* in: index of the record */
- upd_t* update) /* in: update vector for the row; NOTE: the
- field numbers in this MUST be clustered index
- positions! */
-{
- ulint n_unique;
- ulint n_upd_fields;
- ulint i, j;
- dict_index_t* clust_index;
-
- ut_ad(update && index);
-
- n_unique = dict_index_get_n_unique(index);
- n_upd_fields = upd_get_n_fields(update);
-
- clust_index = dict_table_get_first_index(index->table);
-
- for (i = 0; i < n_unique; i++) {
-
- const dict_field_t* ind_field;
- const dict_col_t* col;
- ulint col_pos;
- ulint col_no;
-
- ind_field = dict_index_get_nth_field(index, i);
- col = dict_field_get_col(ind_field);
- col_pos = dict_col_get_clust_pos(col, clust_index);
- col_no = dict_col_get_no(col);
-
- for (j = 0; j < n_upd_fields; j++) {
-
- upd_field_t* upd_field
- = upd_get_nth_field(update, j);
-
- /* Note that if the index field is a column prefix
- then it may be that row does not contain an externally
- stored part of the column value, and we cannot compare
- the datas */
-
- if (col_pos == upd_field->field_no
- && (row == NULL
- || ind_field->prefix_len > 0
- || !dfield_datas_are_binary_equal(
- dtuple_get_nth_field(row, col_no),
- &(upd_field->new_val)))) {
-
- return(TRUE);
- }
- }
- }
-
- return(FALSE);
-}
-
-/***************************************************************
-Checks if an update vector changes an ordering field of an index record.
-NOTE: we compare the fields as binary strings! */
-
-ibool
-row_upd_changes_some_index_ord_field_binary(
-/*========================================*/
- /* out: TRUE if update vector may change
- an ordering field in an index record */
- dict_table_t* table, /* in: table */
- upd_t* update) /* in: update vector for the row */
-{
- upd_field_t* upd_field;
- dict_index_t* index;
- ulint i;
-
- index = dict_table_get_first_index(table);
-
- for (i = 0; i < upd_get_n_fields(update); i++) {
-
- upd_field = upd_get_nth_field(update, i);
-
- if (dict_field_get_col(dict_index_get_nth_field(
- index, upd_field->field_no))
- ->ord_part) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/***************************************************************
-Checks if an update vector changes some of the first ordering fields of an
-index record. This is only used in foreign key checks and we can assume
-that index does not contain column prefixes. */
-static
-ibool
-row_upd_changes_first_fields_binary(
-/*================================*/
- /* out: TRUE if changes */
- dtuple_t* entry, /* in: index entry */
- dict_index_t* index, /* in: index of entry */
- upd_t* update, /* in: update vector for the row */
- ulint n) /* in: how many first fields to check */
-{
- ulint n_upd_fields;
- ulint i, j;
- dict_index_t* clust_index;
-
- ut_ad(update && index);
- ut_ad(n <= dict_index_get_n_fields(index));
-
- n_upd_fields = upd_get_n_fields(update);
- clust_index = dict_table_get_first_index(index->table);
-
- for (i = 0; i < n; i++) {
-
- const dict_field_t* ind_field;
- const dict_col_t* col;
- ulint col_pos;
-
- ind_field = dict_index_get_nth_field(index, i);
- col = dict_field_get_col(ind_field);
- col_pos = dict_col_get_clust_pos(col, clust_index);
-
- ut_a(ind_field->prefix_len == 0);
-
- for (j = 0; j < n_upd_fields; j++) {
-
- upd_field_t* upd_field
- = upd_get_nth_field(update, j);
-
- if (col_pos == upd_field->field_no
- && !dfield_datas_are_binary_equal(
- dtuple_get_nth_field(entry, i),
- &(upd_field->new_val))) {
-
- return(TRUE);
- }
- }
- }
-
- return(FALSE);
-}
-
-/*************************************************************************
-Copies the column values from a record. */
-UNIV_INLINE
-void
-row_upd_copy_columns(
-/*=================*/
- rec_t* rec, /* in: record in a clustered index */
- const ulint* offsets,/* in: array returned by rec_get_offsets() */
- sym_node_t* column) /* in: first column in a column list, or
- NULL */
-{
- byte* data;
- ulint len;
-
- while (column) {
- data = rec_get_nth_field(rec, offsets,
- column->field_nos[SYM_CLUST_FIELD_NO],
- &len);
- eval_node_copy_and_alloc_val(column, data, len);
-
- column = UT_LIST_GET_NEXT(col_var_list, column);
- }
-}
-
-/*************************************************************************
-Calculates the new values for fields to update. Note that row_upd_copy_columns
-must have been called first. */
-UNIV_INLINE
-void
-row_upd_eval_new_vals(
-/*==================*/
- upd_t* update) /* in: update vector */
-{
- que_node_t* exp;
- upd_field_t* upd_field;
- ulint n_fields;
- ulint i;
-
- n_fields = upd_get_n_fields(update);
-
- for (i = 0; i < n_fields; i++) {
- upd_field = upd_get_nth_field(update, i);
-
- exp = upd_field->exp;
-
- eval_exp(exp);
-
- dfield_copy_data(&(upd_field->new_val), que_node_get_val(exp));
- }
-}
-
-/***************************************************************
-Stores to the heap the row on which the node->pcur is positioned. */
-static
-void
-row_upd_store_row(
-/*==============*/
- upd_node_t* node) /* in: row update node */
-{
- dict_index_t* clust_index;
- upd_t* update;
- rec_t* rec;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- const ulint* offsets;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- ut_ad(node->pcur->latch_mode != BTR_NO_LATCHES);
-
- if (node->row != NULL) {
- mem_heap_empty(node->heap);
- node->row = NULL;
- }
-
- clust_index = dict_table_get_first_index(node->table);
-
- rec = btr_pcur_get_rec(node->pcur);
-
- offsets = rec_get_offsets(rec, clust_index, offsets_,
- ULINT_UNDEFINED, &heap);
- node->row = row_build(ROW_COPY_DATA, clust_index, rec, offsets,
- node->heap);
- node->ext_vec = mem_heap_alloc(node->heap, sizeof(ulint)
- * rec_offs_n_fields(offsets));
- if (node->is_delete) {
- update = NULL;
- } else {
- update = node->update;
- }
-
- node->n_ext_vec = btr_push_update_extern_fields(node->ext_vec,
- offsets, update);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-}
-
-/***************************************************************
-Updates a secondary index entry of a row. */
-static
-ulint
-row_upd_sec_index_entry(
-/*====================*/
- /* out: DB_SUCCESS if operation successfully
- completed, else error code or DB_LOCK_WAIT */
- upd_node_t* node, /* in: row update node */
- que_thr_t* thr) /* in: query thread */
-{
- ibool check_ref;
- ibool found;
- dict_index_t* index;
- dtuple_t* entry;
- btr_pcur_t pcur;
- btr_cur_t* btr_cur;
- mem_heap_t* heap;
- rec_t* rec;
- ulint err = DB_SUCCESS;
- mtr_t mtr;
- trx_t* trx = thr_get_trx(thr);
-
- index = node->index;
-
- check_ref = row_upd_index_is_referenced(index, trx);
-
- heap = mem_heap_create(1024);
-
- /* Build old index entry */
- entry = row_build_index_entry(node->row, index, heap);
-
- log_free_check();
- mtr_start(&mtr);
-
- found = row_search_index_entry(index, entry, BTR_MODIFY_LEAF, &pcur,
- &mtr);
- btr_cur = btr_pcur_get_btr_cur(&pcur);
-
- rec = btr_cur_get_rec(btr_cur);
-
- if (UNIV_UNLIKELY(!found)) {
- fputs("InnoDB: error in sec index entry update in\n"
- "InnoDB: ", stderr);
- dict_index_name_print(stderr, trx, index);
- fputs("\n"
- "InnoDB: tuple ", stderr);
- dtuple_print(stderr, entry);
- fputs("\n"
- "InnoDB: record ", stderr);
- rec_print(stderr, rec, index);
- putc('\n', stderr);
-
- trx_print(stderr, trx, 0);
-
- fputs("\n"
- "InnoDB: Submit a detailed bug report"
- " to http://bugs.mysql.com\n", stderr);
- } else {
- /* Delete mark the old index record; it can already be
- delete marked if we return after a lock wait in
- row_ins_index_entry below */
-
- if (!rec_get_deleted_flag(rec,
- dict_table_is_comp(index->table))) {
- err = btr_cur_del_mark_set_sec_rec(0, btr_cur, TRUE,
- thr, &mtr);
- if (err == DB_SUCCESS && check_ref) {
-
- /* NOTE that the following call loses
- the position of pcur ! */
- err = row_upd_check_references_constraints(
- node, &pcur, index->table,
- index, thr, &mtr);
- if (err != DB_SUCCESS) {
-
- goto close_cur;
- }
- }
-
- }
- }
-close_cur:
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- if (node->is_delete || err != DB_SUCCESS) {
-
- mem_heap_free(heap);
-
- return(err);
- }
-
- /* Build a new index entry */
- row_upd_index_replace_new_col_vals(entry, index, node->update, NULL);
-
- /* Insert new index entry */
- err = row_ins_index_entry(index, entry, NULL, 0, thr);
-
- mem_heap_free(heap);
-
- return(err);
-}
-
-/***************************************************************
-Updates the secondary index record if it is changed in the row update or
-deletes it if this is a delete. */
-UNIV_INLINE
-ulint
-row_upd_sec_step(
-/*=============*/
- /* out: DB_SUCCESS if operation successfully
- completed, else error code or DB_LOCK_WAIT */
- upd_node_t* node, /* in: row update node */
- que_thr_t* thr) /* in: query thread */
-{
- ulint err;
-
- ut_ad((node->state == UPD_NODE_UPDATE_ALL_SEC)
- || (node->state == UPD_NODE_UPDATE_SOME_SEC));
- ut_ad(!(node->index->type & DICT_CLUSTERED));
-
- if (node->state == UPD_NODE_UPDATE_ALL_SEC
- || row_upd_changes_ord_field_binary(node->row, node->index,
- node->update)) {
- err = row_upd_sec_index_entry(node, thr);
-
- return(err);
- }
-
- return(DB_SUCCESS);
-}
-
-/***************************************************************
-Marks the clustered index record deleted and inserts the updated version
-of the record to the index. This function should be used when the ordering
-fields of the clustered index record change. This should be quite rare in
-database applications. */
-static
-ulint
-row_upd_clust_rec_by_insert(
-/*========================*/
- /* out: DB_SUCCESS if operation successfully
- completed, else error code or DB_LOCK_WAIT */
- upd_node_t* node, /* in: row update node */
- dict_index_t* index, /* in: clustered index of the record */
- que_thr_t* thr, /* in: query thread */
- ibool check_ref,/* in: TRUE if index may be referenced in
- a foreign key constraint */
- mtr_t* mtr) /* in: mtr; gets committed here */
-{
- mem_heap_t* heap = NULL;
- btr_pcur_t* pcur;
- btr_cur_t* btr_cur;
- trx_t* trx;
- dict_table_t* table;
- dtuple_t* entry;
- ulint err;
-
- ut_ad(node);
- ut_ad(index->type & DICT_CLUSTERED);
-
- trx = thr_get_trx(thr);
- table = node->table;
- pcur = node->pcur;
- btr_cur = btr_pcur_get_btr_cur(pcur);
-
- if (node->state != UPD_NODE_INSERT_CLUSTERED) {
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- err = btr_cur_del_mark_set_clust_rec(BTR_NO_LOCKING_FLAG,
- btr_cur, TRUE, thr, mtr);
- if (err != DB_SUCCESS) {
- mtr_commit(mtr);
- return(err);
- }
-
- /* Mark as not-owned the externally stored fields which the new
- row inherits from the delete marked record: purge should not
- free those externally stored fields even if the delete marked
- record is removed from the index tree, or updated. */
-
- btr_cur_mark_extern_inherited_fields(
- btr_cur_get_rec(btr_cur),
- rec_get_offsets(btr_cur_get_rec(btr_cur),
- dict_table_get_first_index(table),
- offsets_, ULINT_UNDEFINED, &heap),
- node->update, mtr);
- if (check_ref) {
- /* NOTE that the following call loses
- the position of pcur ! */
- err = row_upd_check_references_constraints(
- node, pcur, table, index, thr, mtr);
- if (err != DB_SUCCESS) {
- mtr_commit(mtr);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(err);
- }
- }
-
- }
-
- mtr_commit(mtr);
-
- if (!heap) {
- heap = mem_heap_create(500);
- }
- node->state = UPD_NODE_INSERT_CLUSTERED;
-
- entry = row_build_index_entry(node->row, index, heap);
-
- row_upd_index_replace_new_col_vals(entry, index, node->update, NULL);
-
- row_upd_index_entry_sys_field(entry, index, DATA_TRX_ID, trx->id);
-
- /* If we return from a lock wait, for example, we may have
- extern fields marked as not-owned in entry (marked in the
- if-branch above). We must unmark them. */
-
- btr_cur_unmark_dtuple_extern_fields(entry, node->ext_vec,
- node->n_ext_vec);
- /* We must mark non-updated extern fields in entry as inherited,
- so that a possible rollback will not free them */
-
- btr_cur_mark_dtuple_inherited_extern(entry, node->ext_vec,
- node->n_ext_vec,
- node->update);
-
- err = row_ins_index_entry(index, entry, node->ext_vec,
- node->n_ext_vec, thr);
- mem_heap_free(heap);
-
- return(err);
-}
-
-/***************************************************************
-Updates a clustered index record of a row when the ordering fields do
-not change. */
-static
-ulint
-row_upd_clust_rec(
-/*==============*/
- /* out: DB_SUCCESS if operation successfully
- completed, else error code or DB_LOCK_WAIT */
- upd_node_t* node, /* in: row update node */
- dict_index_t* index, /* in: clustered index */
- que_thr_t* thr, /* in: query thread */
- mtr_t* mtr) /* in: mtr; gets committed here */
-{
- big_rec_t* big_rec = NULL;
- btr_pcur_t* pcur;
- btr_cur_t* btr_cur;
- ulint err;
-
- ut_ad(node);
- ut_ad(index->type & DICT_CLUSTERED);
-
- pcur = node->pcur;
- btr_cur = btr_pcur_get_btr_cur(pcur);
-
- ut_ad(!rec_get_deleted_flag(btr_pcur_get_rec(pcur),
- dict_table_is_comp(index->table)));
-
- /* Try optimistic updating of the record, keeping changes within
- the page; we do not check locks because we assume the x-lock on the
- record to update */
-
- if (node->cmpl_info & UPD_NODE_NO_SIZE_CHANGE) {
- err = btr_cur_update_in_place(BTR_NO_LOCKING_FLAG,
- btr_cur, node->update,
- node->cmpl_info, thr, mtr);
- } else {
- err = btr_cur_optimistic_update(BTR_NO_LOCKING_FLAG,
- btr_cur, node->update,
- node->cmpl_info, thr, mtr);
- }
-
- mtr_commit(mtr);
-
- if (err == DB_SUCCESS) {
-
- return(err);
- }
-
- if (buf_LRU_buf_pool_running_out()) {
-
- return(DB_LOCK_TABLE_FULL);
- }
- /* We may have to modify the tree structure: do a pessimistic descent
- down the index tree */
-
- mtr_start(mtr);
-
- /* NOTE: this transaction has an s-lock or x-lock on the record and
- therefore other transactions cannot modify the record when we have no
- latch on the page. In addition, we assume that other query threads of
- the same transaction do not modify the record in the meantime.
- Therefore we can assert that the restoration of the cursor succeeds. */
-
- ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr));
-
- ut_ad(!rec_get_deleted_flag(btr_pcur_get_rec(pcur),
- dict_table_is_comp(index->table)));
-
- err = btr_cur_pessimistic_update(BTR_NO_LOCKING_FLAG, btr_cur,
- &big_rec, node->update,
- node->cmpl_info, thr, mtr);
- mtr_commit(mtr);
-
- if (err == DB_SUCCESS && big_rec) {
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- rec_t* rec;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- mtr_start(mtr);
-
- ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr));
- rec = btr_cur_get_rec(btr_cur);
- err = btr_store_big_rec_extern_fields(
- index, rec,
- rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap),
- big_rec, mtr);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- mtr_commit(mtr);
- }
-
- if (big_rec) {
- dtuple_big_rec_free(big_rec);
- }
-
- return(err);
-}
-
-/***************************************************************
-Delete marks a clustered index record. */
-static
-ulint
-row_upd_del_mark_clust_rec(
-/*=======================*/
- /* out: DB_SUCCESS if operation successfully
- completed, else error code */
- upd_node_t* node, /* in: row update node */
- dict_index_t* index, /* in: clustered index */
- que_thr_t* thr, /* in: query thread */
- ibool check_ref,/* in: TRUE if index may be referenced in
- a foreign key constraint */
- mtr_t* mtr) /* in: mtr; gets committed here */
-{
- btr_pcur_t* pcur;
- btr_cur_t* btr_cur;
- ulint err;
-
- ut_ad(node);
- ut_ad(index->type & DICT_CLUSTERED);
- ut_ad(node->is_delete);
-
- pcur = node->pcur;
- btr_cur = btr_pcur_get_btr_cur(pcur);
-
- /* Store row because we have to build also the secondary index
- entries */
-
- row_upd_store_row(node);
-
- /* Mark the clustered index record deleted; we do not have to check
- locks, because we assume that we have an x-lock on the record */
-
- err = btr_cur_del_mark_set_clust_rec(BTR_NO_LOCKING_FLAG,
- btr_cur, TRUE, thr, mtr);
- if (err == DB_SUCCESS && check_ref) {
- /* NOTE that the following call loses the position of pcur ! */
-
- err = row_upd_check_references_constraints(node,
- pcur, index->table,
- index, thr, mtr);
- if (err != DB_SUCCESS) {
- mtr_commit(mtr);
-
- return(err);
- }
- }
-
- mtr_commit(mtr);
-
- return(err);
-}
-
-/***************************************************************
-Updates the clustered index record. */
-static
-ulint
-row_upd_clust_step(
-/*===============*/
- /* out: DB_SUCCESS if operation successfully
- completed, DB_LOCK_WAIT in case of a lock wait,
- else error code */
- upd_node_t* node, /* in: row update node */
- que_thr_t* thr) /* in: query thread */
-{
- dict_index_t* index;
- btr_pcur_t* pcur;
- ibool success;
- ibool check_ref;
- ulint err;
- mtr_t* mtr;
- mtr_t mtr_buf;
- rec_t* rec;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- const ulint* offsets;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- index = dict_table_get_first_index(node->table);
-
- check_ref = row_upd_index_is_referenced(index, thr_get_trx(thr));
-
- pcur = node->pcur;
-
- /* We have to restore the cursor to its position */
- mtr = &mtr_buf;
-
- mtr_start(mtr);
-
- /* If the restoration does not succeed, then the same
- transaction has deleted the record on which the cursor was,
- and that is an SQL error. If the restoration succeeds, it may
- still be that the same transaction has successively deleted
- and inserted a record with the same ordering fields, but in
- that case we know that the transaction has at least an
- implicit x-lock on the record. */
-
- ut_a(pcur->rel_pos == BTR_PCUR_ON);
-
- success = btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr);
-
- if (!success) {
- err = DB_RECORD_NOT_FOUND;
-
- mtr_commit(mtr);
-
- return(err);
- }
-
- /* If this is a row in SYS_INDEXES table of the data dictionary,
- then we have to free the file segments of the index tree associated
- with the index */
-
- if (node->is_delete
- && ut_dulint_cmp(node->table->id, DICT_INDEXES_ID) == 0) {
-
- dict_drop_index_tree(btr_pcur_get_rec(pcur), mtr);
-
- mtr_commit(mtr);
-
- mtr_start(mtr);
-
- success = btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur,
- mtr);
- if (!success) {
- err = DB_ERROR;
-
- mtr_commit(mtr);
-
- return(err);
- }
- }
-
- rec = btr_pcur_get_rec(pcur);
- offsets = rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap);
-
- if (!node->has_clust_rec_x_lock) {
- err = lock_clust_rec_modify_check_and_lock(
- 0, rec, index, offsets, thr);
- if (err != DB_SUCCESS) {
- mtr_commit(mtr);
- goto exit_func;
- }
- }
-
- /* NOTE: the following function calls will also commit mtr */
-
- if (node->is_delete) {
- err = row_upd_del_mark_clust_rec(node, index, thr, check_ref,
- mtr);
- if (err == DB_SUCCESS) {
- node->state = UPD_NODE_UPDATE_ALL_SEC;
- node->index = dict_table_get_next_index(index);
- }
-exit_func:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(err);
- }
-
- /* If the update is made for MySQL, we already have the update vector
- ready, else we have to do some evaluation: */
-
- if (!node->in_mysql_interface) {
- /* Copy the necessary columns from clust_rec and calculate the
- new values to set */
- row_upd_copy_columns(rec, offsets,
- UT_LIST_GET_FIRST(node->columns));
- row_upd_eval_new_vals(node->update);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- if (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) {
-
- err = row_upd_clust_rec(node, index, thr, mtr);
- return(err);
- }
-
- row_upd_store_row(node);
-
- if (row_upd_changes_ord_field_binary(node->row, index, node->update)) {
-
- /* Update causes an ordering field (ordering fields within
- the B-tree) of the clustered index record to change: perform
- the update by delete marking and inserting.
-
- TODO! What to do to the 'Halloween problem', where an update
- moves the record forward in index so that it is again
- updated when the cursor arrives there? Solution: the
- read operation must check the undo record undo number when
- choosing records to update. MySQL solves now the problem
- externally! */
-
- err = row_upd_clust_rec_by_insert(node, index, thr, check_ref,
- mtr);
- if (err != DB_SUCCESS) {
-
- return(err);
- }
-
- node->state = UPD_NODE_UPDATE_ALL_SEC;
- } else {
- err = row_upd_clust_rec(node, index, thr, mtr);
-
- if (err != DB_SUCCESS) {
-
- return(err);
- }
-
- node->state = UPD_NODE_UPDATE_SOME_SEC;
- }
-
- node->index = dict_table_get_next_index(index);
-
- return(err);
-}
-
-/***************************************************************
-Updates the affected index records of a row. When the control is transferred
-to this node, we assume that we have a persistent cursor which was on a
-record, and the position of the cursor is stored in the cursor. */
-static
-ulint
-row_upd(
-/*====*/
- /* out: DB_SUCCESS if operation successfully
- completed, else error code or DB_LOCK_WAIT */
- upd_node_t* node, /* in: row update node */
- que_thr_t* thr) /* in: query thread */
-{
- ulint err = DB_SUCCESS;
-
- ut_ad(node && thr);
-
- if (UNIV_LIKELY(node->in_mysql_interface)) {
-
- /* We do not get the cmpl_info value from the MySQL
- interpreter: we must calculate it on the fly: */
-
- if (node->is_delete
- || row_upd_changes_some_index_ord_field_binary(
- node->table, node->update)) {
- node->cmpl_info = 0;
- } else {
- node->cmpl_info = UPD_NODE_NO_ORD_CHANGE;
- }
- }
-
- if (node->state == UPD_NODE_UPDATE_CLUSTERED
- || node->state == UPD_NODE_INSERT_CLUSTERED) {
-
- err = row_upd_clust_step(node, thr);
-
- if (err != DB_SUCCESS) {
-
- goto function_exit;
- }
- }
-
- if (!node->is_delete && (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
-
- goto function_exit;
- }
-
- while (node->index != NULL) {
- err = row_upd_sec_step(node, thr);
-
- if (err != DB_SUCCESS) {
-
- goto function_exit;
- }
-
- node->index = dict_table_get_next_index(node->index);
- }
-
-function_exit:
- if (err == DB_SUCCESS) {
- /* Do some cleanup */
-
- if (node->row != NULL) {
- node->row = NULL;
- node->n_ext_vec = 0;
- mem_heap_empty(node->heap);
- }
-
- node->state = UPD_NODE_UPDATE_CLUSTERED;
- }
-
- return(err);
-}
-
-/***************************************************************
-Updates a row in a table. This is a high-level function used in SQL execution
-graphs. */
-
-que_thr_t*
-row_upd_step(
-/*=========*/
- /* out: query thread to run next or NULL */
- que_thr_t* thr) /* in: query thread */
-{
- upd_node_t* node;
- sel_node_t* sel_node;
- que_node_t* parent;
- ulint err = DB_SUCCESS;
- trx_t* trx;
-
- ut_ad(thr);
-
- trx = thr_get_trx(thr);
-
- trx_start_if_not_started(trx);
-
- node = thr->run_node;
-
- sel_node = node->select;
-
- parent = que_node_get_parent(node);
-
- ut_ad(que_node_get_type(node) == QUE_NODE_UPDATE);
-
- if (thr->prev_node == parent) {
- node->state = UPD_NODE_SET_IX_LOCK;
- }
-
- if (node->state == UPD_NODE_SET_IX_LOCK) {
-
- if (!node->has_clust_rec_x_lock) {
- /* It may be that the current session has not yet
- started its transaction, or it has been committed: */
-
- err = lock_table(0, node->table, LOCK_IX, thr);
-
- if (err != DB_SUCCESS) {
-
- goto error_handling;
- }
- }
-
- node->state = UPD_NODE_UPDATE_CLUSTERED;
-
- if (node->searched_update) {
- /* Reset the cursor */
- sel_node->state = SEL_NODE_OPEN;
-
- /* Fetch a row to update */
-
- thr->run_node = sel_node;
-
- return(thr);
- }
- }
-
- /* sel_node is NULL if we are in the MySQL interface */
-
- if (sel_node && (sel_node->state != SEL_NODE_FETCH)) {
-
- if (!node->searched_update) {
- /* An explicit cursor should be positioned on a row
- to update */
-
- ut_error;
-
- err = DB_ERROR;
-
- goto error_handling;
- }
-
- ut_ad(sel_node->state == SEL_NODE_NO_MORE_ROWS);
-
- /* No more rows to update, or the select node performed the
- updates directly in-place */
-
- thr->run_node = parent;
-
- return(thr);
- }
-
- /* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */
-
- err = row_upd(node, thr);
-
-error_handling:
- trx->error_state = err;
-
- if (err != DB_SUCCESS) {
- return(NULL);
- }
-
- /* DO THE TRIGGER ACTIONS HERE */
-
- if (node->searched_update) {
- /* Fetch next row to update */
-
- thr->run_node = sel_node;
- } else {
- /* It was an explicit cursor update */
-
- thr->run_node = parent;
- }
-
- node->state = UPD_NODE_UPDATE_CLUSTERED;
-
- return(thr);
-}
-
-/*************************************************************************
-Performs an in-place update for the current clustered index record in
-select. */
-
-void
-row_upd_in_place_in_select(
-/*=======================*/
- sel_node_t* sel_node, /* in: select node */
- que_thr_t* thr, /* in: query thread */
- mtr_t* mtr) /* in: mtr */
-{
- upd_node_t* node;
- btr_pcur_t* pcur;
- btr_cur_t* btr_cur;
- ulint err;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- ut_ad(sel_node->select_will_do_update);
- ut_ad(sel_node->latch_mode == BTR_MODIFY_LEAF);
- ut_ad(sel_node->asc);
-
- node = que_node_get_parent(sel_node);
-
- ut_ad(que_node_get_type(node) == QUE_NODE_UPDATE);
-
- pcur = node->pcur;
- btr_cur = btr_pcur_get_btr_cur(pcur);
-
- /* Copy the necessary columns from clust_rec and calculate the new
- values to set */
-
- row_upd_copy_columns(btr_pcur_get_rec(pcur),
- rec_get_offsets(btr_pcur_get_rec(pcur),
- btr_cur->index, offsets_,
- ULINT_UNDEFINED, &heap),
- UT_LIST_GET_FIRST(node->columns));
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- row_upd_eval_new_vals(node->update);
-
- ut_ad(!rec_get_deleted_flag(
- btr_pcur_get_rec(pcur),
- dict_table_is_comp(btr_cur->index->table)));
-
- ut_ad(node->cmpl_info & UPD_NODE_NO_SIZE_CHANGE);
- ut_ad(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE);
- ut_ad(node->select_will_do_update);
-
- err = btr_cur_update_in_place(BTR_NO_LOCKING_FLAG, btr_cur,
- node->update, node->cmpl_info,
- thr, mtr);
- ut_ad(err == DB_SUCCESS);
-}
diff --git a/storage/innobase/row/row0vers.c b/storage/innobase/row/row0vers.c
deleted file mode 100644
index 03d9a2f1203..00000000000
--- a/storage/innobase/row/row0vers.c
+++ /dev/null
@@ -1,665 +0,0 @@
-/******************************************************
-Row versions
-
-(c) 1997 Innobase Oy
-
-Created 2/6/1997 Heikki Tuuri
-*******************************************************/
-
-#include "row0vers.h"
-
-#ifdef UNIV_NONINL
-#include "row0vers.ic"
-#endif
-
-#include "dict0dict.h"
-#include "dict0boot.h"
-#include "btr0btr.h"
-#include "mach0data.h"
-#include "trx0rseg.h"
-#include "trx0trx.h"
-#include "trx0roll.h"
-#include "trx0undo.h"
-#include "trx0purge.h"
-#include "trx0rec.h"
-#include "que0que.h"
-#include "row0row.h"
-#include "row0upd.h"
-#include "rem0cmp.h"
-#include "read0read.h"
-#include "lock0lock.h"
-
-/*********************************************************************
-Finds out if an active transaction has inserted or modified a secondary
-index record. NOTE: the kernel mutex is temporarily released in this
-function! */
-
-trx_t*
-row_vers_impl_x_locked_off_kernel(
-/*==============================*/
- /* out: NULL if committed, else the active
- transaction; NOTE that the kernel mutex is
- temporarily released! */
- rec_t* rec, /* in: record in a secondary index */
- dict_index_t* index, /* in: the secondary index */
- const ulint* offsets)/* in: rec_get_offsets(rec, index) */
-{
- dict_index_t* clust_index;
- rec_t* clust_rec;
- ulint* clust_offsets;
- rec_t* version;
- rec_t* prev_version;
- dulint trx_id;
- dulint prev_trx_id;
- mem_heap_t* heap;
- mem_heap_t* heap2;
- dtuple_t* row;
- dtuple_t* entry = NULL; /* assignment to eliminate compiler
- warning */
- trx_t* trx;
- ulint vers_del;
- ulint rec_del;
- ulint err;
- mtr_t mtr;
- ulint comp;
-
- ut_ad(mutex_own(&kernel_mutex));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
- mutex_exit(&kernel_mutex);
-
- mtr_start(&mtr);
-
- /* Search for the clustered index record: this is a time-consuming
- operation: therefore we release the kernel mutex; also, the release
- is required by the latching order convention. The latch on the
- clustered index locks the top of the stack of versions. We also
- reserve purge_latch to lock the bottom of the version stack. */
-
- clust_rec = row_get_clust_rec(BTR_SEARCH_LEAF, rec, index,
- &clust_index, &mtr);
- if (!clust_rec) {
- /* In a rare case it is possible that no clust rec is found
- for a secondary index record: if in row0umod.c
- row_undo_mod_remove_clust_low() we have already removed the
- clust rec, while purge is still cleaning and removing
- secondary index records associated with earlier versions of
- the clustered index record. In that case there cannot be
- any implicit lock on the secondary index record, because
- an active transaction which has modified the secondary index
- record has also modified the clustered index record. And in
- a rollback we always undo the modifications to secondary index
- records before the clustered index record. */
-
- mutex_enter(&kernel_mutex);
- mtr_commit(&mtr);
-
- return(NULL);
- }
-
- heap = mem_heap_create(1024);
- clust_offsets = rec_get_offsets(clust_rec, clust_index, NULL,
- ULINT_UNDEFINED, &heap);
- trx_id = row_get_rec_trx_id(clust_rec, clust_index, clust_offsets);
-
- mtr_s_lock(&(purge_sys->latch), &mtr);
-
- mutex_enter(&kernel_mutex);
-
- trx = NULL;
- if (!trx_is_active(trx_id)) {
- /* The transaction that modified or inserted clust_rec is no
- longer active: no implicit lock on rec */
- goto exit_func;
- }
-
- if (!lock_check_trx_id_sanity(trx_id, clust_rec, clust_index,
- clust_offsets, TRUE)) {
- /* Corruption noticed: try to avoid a crash by returning */
- goto exit_func;
- }
-
- comp = page_rec_is_comp(rec);
- ut_ad(index->table == clust_index->table);
- ut_ad(!!comp == dict_table_is_comp(index->table));
- ut_ad(!comp == !page_rec_is_comp(clust_rec));
-
- /* We look up if some earlier version, which was modified by the trx_id
- transaction, of the clustered index record would require rec to be in
- a different state (delete marked or unmarked, or have different field
- values, or not existing). If there is such a version, then rec was
- modified by the trx_id transaction, and it has an implicit x-lock on
- rec. Note that if clust_rec itself would require rec to be in a
- different state, then the trx_id transaction has not yet had time to
- modify rec, and does not necessarily have an implicit x-lock on rec. */
-
- rec_del = rec_get_deleted_flag(rec, comp);
- trx = NULL;
-
- version = clust_rec;
-
- for (;;) {
- mutex_exit(&kernel_mutex);
-
- /* While we retrieve an earlier version of clust_rec, we
- release the kernel mutex, because it may take time to access
- the disk. After the release, we have to check if the trx_id
- transaction is still active. We keep the semaphore in mtr on
- the clust_rec page, so that no other transaction can update
- it and get an implicit x-lock on rec. */
-
- heap2 = heap;
- heap = mem_heap_create(1024);
- err = trx_undo_prev_version_build(clust_rec, &mtr, version,
- clust_index, clust_offsets,
- heap, &prev_version);
- mem_heap_free(heap2); /* free version and clust_offsets */
-
- if (prev_version) {
- clust_offsets = rec_get_offsets(
- prev_version, clust_index, NULL,
- ULINT_UNDEFINED, &heap);
- row = row_build(ROW_COPY_POINTERS, clust_index,
- prev_version, clust_offsets, heap);
- entry = row_build_index_entry(row, index, heap);
- }
-
- mutex_enter(&kernel_mutex);
-
- if (!trx_is_active(trx_id)) {
- /* Transaction no longer active: no implicit x-lock */
-
- break;
- }
-
- /* If the transaction is still active, the previous version
- of clust_rec must be accessible if not a fresh insert; we
- may assert the following: */
-
- ut_ad(err == DB_SUCCESS);
-
- if (prev_version == NULL) {
- /* It was a freshly inserted version: there is an
- implicit x-lock on rec */
-
- trx = trx_get_on_id(trx_id);
-
- break;
- }
-
- /* If we get here, we know that the trx_id transaction is
- still active and it has modified prev_version. Let us check
- if prev_version would require rec to be in a different
- state. */
-
- vers_del = rec_get_deleted_flag(prev_version, comp);
-
- /* We check if entry and rec are identified in the alphabetical
- ordering */
- if (0 == cmp_dtuple_rec(entry, rec, offsets)) {
- /* The delete marks of rec and prev_version should be
- equal for rec to be in the state required by
- prev_version */
-
- if (rec_del != vers_del) {
- trx = trx_get_on_id(trx_id);
-
- break;
- }
-
- /* It is possible that the row was updated so that the
- secondary index record remained the same in
- alphabetical ordering, but the field values changed
- still. For example, 'abc' -> 'ABC'. Check also that. */
-
- dtuple_set_types_binary(entry,
- dtuple_get_n_fields(entry));
- if (0 != cmp_dtuple_rec(entry, rec, offsets)) {
-
- trx = trx_get_on_id(trx_id);
-
- break;
- }
- } else if (!rec_del) {
- /* The delete mark should be set in rec for it to be
- in the state required by prev_version */
-
- trx = trx_get_on_id(trx_id);
-
- break;
- }
-
- prev_trx_id = row_get_rec_trx_id(prev_version, clust_index,
- clust_offsets);
-
- if (0 != ut_dulint_cmp(trx_id, prev_trx_id)) {
- /* The versions modified by the trx_id transaction end
- to prev_version: no implicit x-lock */
-
- break;
- }
-
- version = prev_version;
- }/* for (;;) */
-
-exit_func:
- mtr_commit(&mtr);
- mem_heap_free(heap);
-
- return(trx);
-}
-
-/*********************************************************************
-Finds out if we must preserve a delete marked earlier version of a clustered
-index record, because it is >= the purge view. */
-
-ibool
-row_vers_must_preserve_del_marked(
-/*==============================*/
- /* out: TRUE if earlier version should be preserved */
- dulint trx_id, /* in: transaction id in the version */
- mtr_t* mtr) /* in: mtr holding the latch on the clustered index
- record; it will also hold the latch on purge_view */
-{
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
- mtr_s_lock(&(purge_sys->latch), mtr);
-
- if (trx_purge_update_undo_must_exist(trx_id)) {
-
- /* A purge operation is not yet allowed to remove this
- delete marked record */
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*********************************************************************
-Finds out if a version of the record, where the version >= the current
-purge view, should have ientry as its secondary index entry. We check
-if there is any not delete marked version of the record where the trx
-id >= purge view, and the secondary index entry and ientry are identified in
-the alphabetical ordering; exactly in this case we return TRUE. */
-
-ibool
-row_vers_old_has_index_entry(
-/*=========================*/
- /* out: TRUE if earlier version should have */
- ibool also_curr,/* in: TRUE if also rec is included in the
- versions to search; otherwise only versions
- prior to it are searched */
- rec_t* rec, /* in: record in the clustered index; the
- caller must have a latch on the page */
- mtr_t* mtr, /* in: mtr holding the latch on rec; it will
- also hold the latch on purge_view */
- dict_index_t* index, /* in: the secondary index */
- dtuple_t* ientry) /* in: the secondary index entry */
-{
- rec_t* version;
- rec_t* prev_version;
- dict_index_t* clust_index;
- ulint* clust_offsets;
- mem_heap_t* heap;
- mem_heap_t* heap2;
- dtuple_t* row;
- dtuple_t* entry;
- ulint err;
- ulint comp;
-
- ut_ad(mtr_memo_contains(mtr, buf_block_align(rec), MTR_MEMO_PAGE_X_FIX)
- || mtr_memo_contains(mtr, buf_block_align(rec),
- MTR_MEMO_PAGE_S_FIX));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
- mtr_s_lock(&(purge_sys->latch), mtr);
-
- clust_index = dict_table_get_first_index(index->table);
-
- comp = page_rec_is_comp(rec);
- ut_ad(!dict_table_is_comp(index->table) == !comp);
- heap = mem_heap_create(1024);
- clust_offsets = rec_get_offsets(rec, clust_index, NULL,
- ULINT_UNDEFINED, &heap);
-
- if (also_curr && !rec_get_deleted_flag(rec, comp)) {
- row = row_build(ROW_COPY_POINTERS, clust_index,
- rec, clust_offsets, heap);
- entry = row_build_index_entry(row, index, heap);
-
- /* NOTE that we cannot do the comparison as binary
- fields because the row is maybe being modified so that
- the clustered index record has already been updated
- to a different binary value in a char field, but the
- collation identifies the old and new value anyway! */
-
- if (dtuple_datas_are_ordering_equal(ientry, entry)) {
-
- mem_heap_free(heap);
-
- return(TRUE);
- }
- }
-
- version = rec;
-
- for (;;) {
- heap2 = heap;
- heap = mem_heap_create(1024);
- err = trx_undo_prev_version_build(rec, mtr, version,
- clust_index, clust_offsets,
- heap, &prev_version);
- mem_heap_free(heap2); /* free version and clust_offsets */
-
- if (err != DB_SUCCESS || !prev_version) {
- /* Versions end here */
-
- mem_heap_free(heap);
-
- return(FALSE);
- }
-
- clust_offsets = rec_get_offsets(prev_version, clust_index,
- NULL, ULINT_UNDEFINED, &heap);
-
- if (!rec_get_deleted_flag(prev_version, comp)) {
- row = row_build(ROW_COPY_POINTERS, clust_index,
- prev_version, clust_offsets, heap);
- entry = row_build_index_entry(row, index, heap);
-
- /* NOTE that we cannot do the comparison as binary
- fields because maybe the secondary index record has
- already been updated to a different binary value in
- a char field, but the collation identifies the old
- and new value anyway! */
-
- if (dtuple_datas_are_ordering_equal(ientry, entry)) {
-
- mem_heap_free(heap);
-
- return(TRUE);
- }
- }
-
- version = prev_version;
- }
-}
-
-/*********************************************************************
-Constructs the version of a clustered index record which a consistent
-read should see. We assume that the trx id stored in rec is such that
-the consistent read should not see rec in its present version. */
-
-ulint
-row_vers_build_for_consistent_read(
-/*===============================*/
- /* out: DB_SUCCESS or DB_MISSING_HISTORY */
- rec_t* rec, /* in: record in a clustered index; the
- caller must have a latch on the page; this
- latch locks the top of the stack of versions
- of this records */
- mtr_t* mtr, /* in: mtr holding the latch on rec */
- dict_index_t* index, /* in: the clustered index */
- ulint** offsets,/* in/out: offsets returned by
- rec_get_offsets(rec, index) */
- read_view_t* view, /* in: the consistent read view */
- mem_heap_t** offset_heap,/* in/out: memory heap from which
- the offsets are allocated */
- mem_heap_t* in_heap,/* in: memory heap from which the memory for
- old_vers is allocated; memory for possible
- intermediate versions is allocated and freed
- locally within the function */
- rec_t** old_vers)/* out, own: old version, or NULL if the
- record does not exist in the view, that is,
- it was freshly inserted afterwards */
-{
- rec_t* version;
- rec_t* prev_version;
- dulint trx_id;
- mem_heap_t* heap = NULL;
- byte* buf;
- ulint err;
-
- ut_ad(index->type & DICT_CLUSTERED);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(rec), MTR_MEMO_PAGE_X_FIX)
- || mtr_memo_contains(mtr, buf_block_align(rec),
- MTR_MEMO_PAGE_S_FIX));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
- ut_ad(rec_offs_validate(rec, index, *offsets));
-
- trx_id = row_get_rec_trx_id(rec, index, *offsets);
-
- ut_ad(!read_view_sees_trx_id(view, trx_id));
-
- rw_lock_s_lock(&(purge_sys->latch));
- version = rec;
-
- for (;;) {
- mem_heap_t* heap2 = heap;
- trx_undo_rec_t* undo_rec;
- dulint roll_ptr;
- dulint undo_no;
- heap = mem_heap_create(1024);
-
- /* If we have high-granularity consistent read view and
- creating transaction of the view is the same as trx_id in
- the record we see this record only in the case when
- undo_no of the record is < undo_no in the view. */
-
- if (view->type == VIEW_HIGH_GRANULARITY
- && ut_dulint_cmp(view->creator_trx_id, trx_id) == 0) {
-
- roll_ptr = row_get_rec_roll_ptr(version, index,
- *offsets);
- undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
- undo_no = trx_undo_rec_get_undo_no(undo_rec);
- mem_heap_empty(heap);
-
- if (ut_dulint_cmp(view->undo_no, undo_no) > 0) {
- /* The view already sees this version: we can
- copy it to in_heap and return */
-
- buf = mem_heap_alloc(in_heap,
- rec_offs_size(*offsets));
- *old_vers = rec_copy(buf, version, *offsets);
- rec_offs_make_valid(*old_vers, index,
- *offsets);
- err = DB_SUCCESS;
-
- break;
- }
- }
-
- err = trx_undo_prev_version_build(rec, mtr, version, index,
- *offsets, heap,
- &prev_version);
- if (heap2) {
- mem_heap_free(heap2); /* free version */
- }
-
- if (err != DB_SUCCESS) {
- break;
- }
-
- if (prev_version == NULL) {
- /* It was a freshly inserted version */
- *old_vers = NULL;
- err = DB_SUCCESS;
-
- break;
- }
-
- *offsets = rec_get_offsets(prev_version, index, *offsets,
- ULINT_UNDEFINED, offset_heap);
-
- trx_id = row_get_rec_trx_id(prev_version, index, *offsets);
-
- if (read_view_sees_trx_id(view, trx_id)) {
-
- /* The view already sees this version: we can copy
- it to in_heap and return */
-
- buf = mem_heap_alloc(in_heap, rec_offs_size(*offsets));
- *old_vers = rec_copy(buf, prev_version, *offsets);
- rec_offs_make_valid(*old_vers, index, *offsets);
- err = DB_SUCCESS;
-
- break;
- }
-
- version = prev_version;
- }/* for (;;) */
-
- mem_heap_free(heap);
- rw_lock_s_unlock(&(purge_sys->latch));
-
- return(err);
-}
-
-/*********************************************************************
-Constructs the last committed version of a clustered index record,
-which should be seen by a semi-consistent read. */
-
-ulint
-row_vers_build_for_semi_consistent_read(
-/*====================================*/
- /* out: DB_SUCCESS or DB_MISSING_HISTORY */
- rec_t* rec, /* in: record in a clustered index; the
- caller must have a latch on the page; this
- latch locks the top of the stack of versions
- of this records */
- mtr_t* mtr, /* in: mtr holding the latch on rec */
- dict_index_t* index, /* in: the clustered index */
- ulint** offsets,/* in/out: offsets returned by
- rec_get_offsets(rec, index) */
- mem_heap_t** offset_heap,/* in/out: memory heap from which
- the offsets are allocated */
- mem_heap_t* in_heap,/* in: memory heap from which the memory for
- old_vers is allocated; memory for possible
- intermediate versions is allocated and freed
- locally within the function */
- rec_t** old_vers)/* out, own: rec, old version, or NULL if the
- record does not exist in the view, that is,
- it was freshly inserted afterwards */
-{
- rec_t* version;
- mem_heap_t* heap = NULL;
- byte* buf;
- ulint err;
- dulint rec_trx_id = ut_dulint_create(0, 0);
-
- ut_ad(index->type & DICT_CLUSTERED);
- ut_ad(mtr_memo_contains(mtr, buf_block_align(rec), MTR_MEMO_PAGE_X_FIX)
- || mtr_memo_contains(mtr, buf_block_align(rec),
- MTR_MEMO_PAGE_S_FIX));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
- ut_ad(rec_offs_validate(rec, index, *offsets));
-
- rw_lock_s_lock(&(purge_sys->latch));
- /* The S-latch on purge_sys prevents the purge view from
- changing. Thus, if we have an uncommitted transaction at
- this point, then purge cannot remove its undo log even if
- the transaction could commit now. */
-
- version = rec;
-
- for (;;) {
- trx_t* version_trx;
- mem_heap_t* heap2;
- rec_t* prev_version;
- dulint version_trx_id;
-
- version_trx_id = row_get_rec_trx_id(version, index, *offsets);
- if (rec == version) {
- rec_trx_id = version_trx_id;
- }
-
- mutex_enter(&kernel_mutex);
- version_trx = trx_get_on_id(version_trx_id);
- mutex_exit(&kernel_mutex);
-
- if (!version_trx
- || version_trx->conc_state == TRX_NOT_STARTED
- || version_trx->conc_state == TRX_COMMITTED_IN_MEMORY) {
-
- /* We found a version that belongs to a
- committed transaction: return it. */
-
- if (rec == version) {
- *old_vers = rec;
- err = DB_SUCCESS;
- break;
- }
-
- /* We assume that a rolled-back transaction stays in
- TRX_ACTIVE state until all the changes have been
- rolled back and the transaction is removed from
- the global list of transactions. */
-
- if (!ut_dulint_cmp(rec_trx_id, version_trx_id)) {
- /* The transaction was committed while
- we searched for earlier versions.
- Return the current version as a
- semi-consistent read. */
-
- version = rec;
- *offsets = rec_get_offsets(version,
- index, *offsets,
- ULINT_UNDEFINED,
- offset_heap);
- }
-
- buf = mem_heap_alloc(in_heap, rec_offs_size(*offsets));
- *old_vers = rec_copy(buf, version, *offsets);
- rec_offs_make_valid(*old_vers, index, *offsets);
- err = DB_SUCCESS;
-
- break;
- }
-
- heap2 = heap;
- heap = mem_heap_create(1024);
-
- err = trx_undo_prev_version_build(rec, mtr, version, index,
- *offsets, heap,
- &prev_version);
- if (heap2) {
- mem_heap_free(heap2); /* free version */
- }
-
- if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
- break;
- }
-
- if (prev_version == NULL) {
- /* It was a freshly inserted version */
- *old_vers = NULL;
- err = DB_SUCCESS;
-
- break;
- }
-
- version = prev_version;
- *offsets = rec_get_offsets(version, index, *offsets,
- ULINT_UNDEFINED, offset_heap);
- }/* for (;;) */
-
- if (heap) {
- mem_heap_free(heap);
- }
- rw_lock_s_unlock(&(purge_sys->latch));
-
- return(err);
-}
diff --git a/storage/innobase/srv/srv0que.c b/storage/innobase/srv/srv0que.c
deleted file mode 100644
index e2b4e217980..00000000000
--- a/storage/innobase/srv/srv0que.c
+++ /dev/null
@@ -1,110 +0,0 @@
-/******************************************************
-Server query execution
-
-(c) 1996 Innobase Oy
-
-Created 6/5/1996 Heikki Tuuri
-*******************************************************/
-
-#include "srv0que.h"
-
-#include "srv0srv.h"
-#include "sync0sync.h"
-#include "os0thread.h"
-#include "usr0sess.h"
-#include "que0que.h"
-
-/**************************************************************************
-Checks if there is work to do in the server task queue. If there is, the
-thread starts processing a task. Before leaving, it again checks the task
-queue and picks a new task if any exists. This is called by a SRV_WORKER
-thread. */
-
-void
-srv_que_task_queue_check(void)
-/*==========================*/
-{
- que_thr_t* thr;
-
- for (;;) {
- mutex_enter(&kernel_mutex);
-
- thr = UT_LIST_GET_FIRST(srv_sys->tasks);
-
- if (thr == NULL) {
- mutex_exit(&kernel_mutex);
-
- return;
- }
-
- UT_LIST_REMOVE(queue, srv_sys->tasks, thr);
-
- mutex_exit(&kernel_mutex);
-
- que_run_threads(thr);
- }
-}
-
-/**************************************************************************
-Performs round-robin on the server tasks. This is called by a SRV_WORKER
-thread every second or so. */
-
-que_thr_t*
-srv_que_round_robin(
-/*================*/
- /* out: the new (may be == thr) query thread
- to run */
- que_thr_t* thr) /* in: query thread */
-{
- que_thr_t* new_thr;
-
- ut_ad(thr);
- ut_ad(thr->state == QUE_THR_RUNNING);
-
- mutex_enter(&kernel_mutex);
-
- UT_LIST_ADD_LAST(queue, srv_sys->tasks, thr);
-
- new_thr = UT_LIST_GET_FIRST(srv_sys->tasks);
-
- mutex_exit(&kernel_mutex);
-
- return(new_thr);
-}
-
-/**************************************************************************
-Enqueues a task to server task queue and releases a worker thread, if there
-is a suspended one. */
-
-void
-srv_que_task_enqueue_low(
-/*=====================*/
- que_thr_t* thr) /* in: query thread */
-{
- ut_ad(thr);
- ut_ad(mutex_own(&kernel_mutex));
-
- UT_LIST_ADD_LAST(queue, srv_sys->tasks, thr);
-
- srv_release_threads(SRV_WORKER, 1);
-}
-
-/**************************************************************************
-Enqueues a task to server task queue and releases a worker thread, if there
-is a suspended one. */
-
-void
-srv_que_task_enqueue(
-/*=================*/
- que_thr_t* thr) /* in: query thread */
-{
- ut_ad(thr);
-
- ut_a(0); /* Under MySQL this is never called */
-
- mutex_enter(&kernel_mutex);
-
- srv_que_task_enqueue_low(thr);
-
- mutex_exit(&kernel_mutex);
-}
diff --git a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
deleted file mode 100644
index 36c3d450aae..00000000000
--- a/storage/innobase/srv/srv0srv.c
+++ /dev/null
@@ -1,2885 +0,0 @@
-/******************************************************
-The database server main program
-
-NOTE: SQL Server 7 uses something which the documentation
-calls user mode scheduled threads (UMS threads). One such
-thread is usually allocated per processor. Win32
-documentation does not know any UMS threads, which suggests
-that the concept is internal to SQL Server 7. It may mean that
-SQL Server 7 does all the scheduling of threads itself, even
-in i/o waits. We should maybe modify InnoDB to use the same
-technique, because thread switches within NT may be too slow.
-
-SQL Server 7 also mentions fibers, which are cooperatively
-scheduled threads. They can boost performance by 5 %,
-according to the Delaney and Soukup's book.
-
-Windows 2000 will have something called thread pooling
-(see msdn website), which we could possibly use.
-
-Another possibility could be to use some very fast user space
-thread library. This might confuse NT though.
-
-(c) 1995 Innobase Oy
-
-Created 10/8/1995 Heikki Tuuri
-*******************************************************/
-/* Dummy comment */
-#include "srv0srv.h"
-
-#include "ut0mem.h"
-#include "os0proc.h"
-#include "mem0mem.h"
-#include "mem0pool.h"
-#include "sync0sync.h"
-#include "thr0loc.h"
-#include "que0que.h"
-#include "srv0que.h"
-#include "log0recv.h"
-#include "pars0pars.h"
-#include "usr0sess.h"
-#include "lock0lock.h"
-#include "trx0purge.h"
-#include "ibuf0ibuf.h"
-#include "buf0flu.h"
-#include "btr0sea.h"
-#include "dict0load.h"
-#include "dict0boot.h"
-#include "srv0start.h"
-#include "row0mysql.h"
-#include "ha_prototypes.h"
-
-/* This is set to TRUE if the MySQL user has set it in MySQL; currently
-affects only FOREIGN KEY definition parsing */
-ibool srv_lower_case_table_names = FALSE;
-
-/* The following counter is incremented whenever there is some user activity
-in the server */
-ulint srv_activity_count = 0;
-
-/* The following is the maximum allowed duration of a lock wait. */
-ulint srv_fatal_semaphore_wait_threshold = 600;
-
-/* How much data manipulation language (DML) statements need to be delayed,
-in microseconds, in order to reduce the lagging of the purge thread. */
-ulint srv_dml_needed_delay = 0;
-
-ibool srv_lock_timeout_and_monitor_active = FALSE;
-ibool srv_error_monitor_active = FALSE;
-
-const char* srv_main_thread_op_info = "";
-
-/* Prefix used by MySQL to indicate pre-5.1 table name encoding */
-const char srv_mysql50_table_name_prefix[9] = "#mysql50#";
-
-/* Server parameters which are read from the initfile */
-
-/* The following three are dir paths which are catenated before file
-names, where the file name itself may also contain a path */
-
-char* srv_data_home = NULL;
-#ifdef UNIV_LOG_ARCHIVE
-char* srv_arch_dir = NULL;
-#endif /* UNIV_LOG_ARCHIVE */
-
-ibool srv_file_per_table = FALSE; /* store to its own file each table
- created by an user; data dictionary
- tables are in the system tablespace
- 0 */
-ibool srv_locks_unsafe_for_binlog = FALSE; /* Place locks to
- records only i.e. do
- not use next-key
- locking except on
- duplicate key checking
- and foreign key
- checking */
-ulint srv_n_data_files = 0;
-char** srv_data_file_names = NULL;
-ulint* srv_data_file_sizes = NULL; /* size in database pages */
-
-ibool srv_auto_extend_last_data_file = FALSE; /* if TRUE, then we
- auto-extend the last data
- file */
-ulint srv_last_file_size_max = 0; /* if != 0, this tells
- the max size auto-extending
- may increase the last data
- file size */
-ulong srv_auto_extend_increment = 8; /* If the last data file is
- auto-extended, we add this
- many pages to it at a time */
-ulint* srv_data_file_is_raw_partition = NULL;
-
-/* If the following is TRUE we do not allow inserts etc. This protects
-the user from forgetting the 'newraw' keyword to my.cnf */
-
-ibool srv_created_new_raw = FALSE;
-
-char** srv_log_group_home_dirs = NULL;
-
-ulint srv_n_log_groups = ULINT_MAX;
-ulint srv_n_log_files = ULINT_MAX;
-ulint srv_log_file_size = ULINT_MAX; /* size in database pages */
-ulint srv_log_buffer_size = ULINT_MAX; /* size in database pages */
-ulong srv_flush_log_at_trx_commit = 1;
-
-byte srv_latin1_ordering[256] /* The sort order table of the latin1
- character set. The following table is
- the MySQL order as of Feb 10th, 2002 */
-= {
- 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
-, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
-, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
-, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F
-, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27
-, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F
-, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37
-, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F
-, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47
-, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F
-, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57
-, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F
-, 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47
-, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F
-, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57
-, 0x58, 0x59, 0x5A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F
-, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87
-, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F
-, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97
-, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F
-, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7
-, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF
-, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7
-, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF
-, 0x41, 0x41, 0x41, 0x41, 0x5C, 0x5B, 0x5C, 0x43
-, 0x45, 0x45, 0x45, 0x45, 0x49, 0x49, 0x49, 0x49
-, 0x44, 0x4E, 0x4F, 0x4F, 0x4F, 0x4F, 0x5D, 0xD7
-, 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xDF
-, 0x41, 0x41, 0x41, 0x41, 0x5C, 0x5B, 0x5C, 0x43
-, 0x45, 0x45, 0x45, 0x45, 0x49, 0x49, 0x49, 0x49
-, 0x44, 0x4E, 0x4F, 0x4F, 0x4F, 0x4F, 0x5D, 0xF7
-, 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xFF
-};
-
-ulint srv_pool_size = ULINT_MAX; /* size in pages; MySQL inits
- this to size in kilobytes but
- we normalize this to pages in
- srv_boot() */
-ulint srv_awe_window_size = 0; /* size in pages; MySQL inits
- this to bytes, but we
- normalize it to pages in
- srv_boot() */
-ulint srv_mem_pool_size = ULINT_MAX; /* size in bytes */
-ulint srv_lock_table_size = ULINT_MAX;
-
-
-ulint srv_io_capacity = ULINT_MAX; /* Number of IO operations per
- second the server can do */
-
-ibool srv_extra_dirty_writes = TRUE; /* Write dirty pages to disk when pct
- dirty < max dirty pct */
-
-ulint srv_n_read_io_threads = ULINT_MAX;
-ulint srv_n_write_io_threads = ULINT_MAX;
-ulint srv_max_merged_io = 64;
-
-#ifdef UNIV_LOG_ARCHIVE
-ibool srv_log_archive_on = FALSE;
-ibool srv_archive_recovery = 0;
-dulint srv_archive_recovery_limit_lsn;
-#endif /* UNIV_LOG_ARCHIVE */
-
-ulint srv_lock_wait_timeout = 1024 * 1024 * 1024;
-
-/* This parameter is used to throttle the number of insert buffers that are
-merged in a batch. By increasing this parameter on a faster disk you can
-possibly reduce the number of I/O operations performed to complete the
-merge operation. The value of this parameter is used as is by the
-background loop when the system is idle (low load), on a busy system
-the parameter is scaled down by a factor of 4, this is to avoid putting
-a heavier load on the I/O sub system. */
-
-ulong srv_insert_buffer_batch_size = 20;
-
-char* srv_file_flush_method_str = NULL;
-ulint srv_unix_file_flush_method = SRV_UNIX_FSYNC;
-ulint srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
-
-ulint srv_max_n_open_files = 300;
-
-/* The InnoDB main thread tries to keep the ratio of modified pages
-in the buffer pool to all database pages in the buffer pool smaller than
-the following number. But it is not guaranteed that the value stays below
-that during a time of heavy update/insert activity. */
-
-ulong srv_max_buf_pool_modified_pct = 90;
-
-/* variable counts amount of data read in total (in bytes) */
-ulint srv_data_read = 0;
-
-/* here we count the amount of data written in total (in bytes) */
-ulint srv_data_written = 0;
-
-/* the number of the log write requests done */
-ulint srv_log_write_requests = 0;
-
-/* the number of physical writes to the log performed */
-ulint srv_log_writes = 0;
-
-/* amount of data written to the log files in bytes */
-ulint srv_os_log_written = 0;
-
-/* amount of writes being done to the log files */
-ulint srv_os_log_pending_writes = 0;
-
-/* we increase this counter, when there we don't have enough space in the
-log buffer and have to flush it */
-ulint srv_log_waits = 0;
-
-/* this variable counts the amount of times, when the doublewrite buffer
-was flushed */
-ulint srv_dblwr_writes = 0;
-
-/* here we store the number of pages that have been flushed to the
-doublewrite buffer */
-ulint srv_dblwr_pages_written = 0;
-
-/* in this variable we store the number of write requests issued */
-ulint srv_buf_pool_write_requests = 0;
-
-/* here we store the number of times when we had to wait for a free page
-in the buffer pool. It happens when the buffer pool is full and we need
-to make a flush, in order to be able to read or create a page. */
-ulint srv_buf_pool_wait_free = 0;
-
-/* variable to count the number of pages that were written from buffer
-pool to the disk */
-ulint srv_buf_pool_flushed = 0;
-
-/* variable to count the number of buffer pool reads that led to the
-reading of a disk page */
-ulint srv_buf_pool_reads = 0;
-
-/* variable to count the number of sequential read-aheads */
-ulint srv_read_ahead_seq = 0;
-
-/* variable to count the number of random read-aheads */
-ulint srv_read_ahead_rnd = 0;
-
-/* An option to enable the fix for "Bug#43660 SHOW INDEXES/ANALYZE does
-NOT update cardinality for indexes of InnoDB table". By default we are
-running with the fix disabled because MySQL 5.1 is frozen for such
-behavioral changes. */
-char srv_use_legacy_cardinality_algorithm = TRUE;
-
-/* structure to pass status variables to MySQL */
-export_struc export_vars;
-
-/* If the following is != 0 we do not allow inserts etc. This protects
-the user from forgetting the innodb_force_recovery keyword to my.cnf */
-
-ulint srv_force_recovery = 0;
-/*-----------------------*/
-/* We are prepared for a situation that we have this many threads waiting for
-a semaphore inside InnoDB. innobase_start_or_create_for_mysql() sets the
-value. */
-
-ulint srv_max_n_threads = 0;
-
-/* The following controls how many threads we let inside InnoDB concurrently:
-threads waiting for locks are not counted into the number because otherwise
-we could get a deadlock. MySQL creates a thread for each user session, and
-semaphore contention and convoy problems can occur withput this restriction.
-Value 10 should be good if there are less than 4 processors + 4 disks in the
-computer. Bigger computers need bigger values. Value 0 will disable the
-concurrency check. */
-
-ibool srv_thread_concurrency_timer_based = TRUE;
-ulong srv_thread_concurrency = 0;
-ulong srv_commit_concurrency = 0;
-
-os_fast_mutex_t srv_conc_mutex; /* this mutex protects srv_conc data
- structures */
-lint srv_conc_n_threads = 0; /* number of transactions that
- have declared_to_be_inside_innodb
- set. It used to be a non-error
- for this value to drop below
- zero temporarily. This is no
- longer true. We'll, however,
- keep the lint datatype to add
- assertions to catch any corner
- cases that we may have
- missed. */
-ulint srv_conc_n_waiting_threads = 0; /* number of OS threads waiting in the
- FIFO for a permission to enter InnoDB
- */
-
-typedef struct srv_conc_slot_struct srv_conc_slot_t;
-struct srv_conc_slot_struct{
- os_event_t event; /* event to wait */
- ibool reserved; /* TRUE if slot
- reserved */
- ibool wait_ended; /* TRUE when another
- thread has already set
- the event and the
- thread in this slot is
- free to proceed; but
- reserved may still be
- TRUE at that point */
- UT_LIST_NODE_T(srv_conc_slot_t) srv_conc_queue; /* queue node */
-};
-
-UT_LIST_BASE_NODE_T(srv_conc_slot_t) srv_conc_queue; /* queue of threads
- waiting to get in */
-srv_conc_slot_t* srv_conc_slots; /* array of wait
- slots */
-
-/* Number of times a thread is allowed to enter InnoDB within the same
-SQL query after it has once got the ticket at srv_conc_enter_innodb */
-#define SRV_FREE_TICKETS_TO_ENTER srv_n_free_tickets_to_enter
-#define SRV_THREAD_SLEEP_DELAY srv_thread_sleep_delay
-/*-----------------------*/
-/* If the following is set to 1 then we do not run purge and insert buffer
-merge to completion before shutdown. If it is set to 2, do not even flush the
-buffer pool to data files at the shutdown: we effectively 'crash'
-InnoDB (but lose no committed transactions). */
-ulint srv_fast_shutdown = 0;
-
-/* Generate a innodb_status.<pid> file */
-ibool srv_innodb_status = FALSE;
-
-ibool srv_use_doublewrite_buf = TRUE;
-ibool srv_use_checksums = TRUE;
-
-ibool srv_set_thread_priorities = TRUE;
-int srv_query_thread_priority = 0;
-
-/* TRUE if the Address Windowing Extensions of Windows are used; then we must
-disable adaptive hash indexes */
-ibool srv_use_awe = FALSE;
-ibool srv_use_adaptive_hash_indexes = TRUE;
-
-/*-------------------------------------------*/
-ulong srv_n_spin_wait_rounds = 30;
-ulong srv_n_free_tickets_to_enter = 500;
-ulong srv_thread_sleep_delay = 10000;
-ulint srv_spin_wait_delay = 6;
-ibool srv_priority_boost = TRUE;
-
-ibool srv_print_thread_releases = FALSE;
-ibool srv_print_lock_waits = FALSE;
-ibool srv_print_buf_io = FALSE;
-ibool srv_print_log_io = FALSE;
-ibool srv_print_latch_waits = FALSE;
-
-ulint srv_n_rows_inserted = 0;
-ulint srv_n_rows_updated = 0;
-ulint srv_n_rows_deleted = 0;
-ulint srv_n_rows_read = 0;
-#ifndef UNIV_HOTBACKUP
-static ulint srv_n_rows_inserted_old = 0;
-static ulint srv_n_rows_updated_old = 0;
-static ulint srv_n_rows_deleted_old = 0;
-static ulint srv_n_rows_read_old = 0;
-#endif /* !UNIV_HOTBACKUP */
-
-ulint srv_n_lock_wait_count = 0;
-ulint srv_n_lock_wait_current_count = 0;
-ib_longlong srv_n_lock_wait_time = 0;
-ulint srv_n_lock_max_wait_time = 0;
-
-
-/*
- Set the following to 0 if you want InnoDB to write messages on
- stderr on startup/shutdown
-*/
-ibool srv_print_verbose_log = TRUE;
-ibool srv_print_innodb_monitor = FALSE;
-ibool srv_print_innodb_lock_monitor = FALSE;
-ibool srv_print_innodb_tablespace_monitor = FALSE;
-ibool srv_print_innodb_table_monitor = FALSE;
-
-/* Array of English strings describing the current state of an
-i/o handler thread */
-
-const char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS];
-const char* srv_io_thread_function[SRV_MAX_N_IO_THREADS];
-
-time_t srv_last_monitor_time;
-
-mutex_t srv_innodb_monitor_mutex;
-
-/* Mutex for locking srv_monitor_file */
-mutex_t srv_monitor_file_mutex;
-/* Temporary file for innodb monitor output */
-FILE* srv_monitor_file;
-/* Mutex for locking srv_dict_tmpfile.
-This mutex has a very high rank; threads reserving it should not
-be holding any InnoDB latches. */
-mutex_t srv_dict_tmpfile_mutex;
-/* Temporary file for output from the data dictionary */
-FILE* srv_dict_tmpfile;
-/* Mutex for locking srv_misc_tmpfile.
-This mutex has a very low rank; threads reserving it should not
-acquire any further latches or sleep before releasing this one. */
-mutex_t srv_misc_tmpfile_mutex;
-/* Temporary file for miscellanous diagnostic output */
-FILE* srv_misc_tmpfile;
-
-ulint srv_main_thread_process_no = 0;
-ulint srv_main_thread_id = 0;
-
-/* The following count work done by srv_master_thread. */
-
-/* Iterations by the 'once per second' loop */
-ulint srv_main_1_second_loops = 0;
-/* Calls to sleep by the 'once per second' loop */
-ulint srv_main_sleeps = 0;
-/* Iterations by the 'once per 10 seconds' loop */
-ulint srv_main_10_second_loops = 0;
-/* Iterations of the loop bounded by the 'background_loop' label */
-ulint srv_main_background_loops = 0;
-/* Iterations of the loop bounded by the 'flush_loop' label */
-ulint srv_main_flush_loops = 0;
-/* Calls to log_buffer_flush_to_disk */
-ulint srv_sync_flush = 0;
-/* Calls to log_buffer_flush_maybe_sync */
-ulint srv_async_flush = 0;
-
-/* Number of microseconds threads wait because of
-innodb_thread_concurrency */
-static ib_longlong srv_thread_wait_mics = 0;
-
-/* Number of microseconds for spinlock delay */
-static ib_longlong srv_timed_spin_delay = 0;
-
-/*
- IMPLEMENTATION OF THE SERVER MAIN PROGRAM
- =========================================
-
-There is the following analogue between this database
-server and an operating system kernel:
-
-DB concept equivalent OS concept
----------- ---------------------
-transaction -- process;
-
-query thread -- thread;
-
-lock -- semaphore;
-
-transaction set to
-the rollback state -- kill signal delivered to a process;
-
-kernel -- kernel;
-
-query thread execution:
-(a) without kernel mutex
-reserved -- process executing in user mode;
-(b) with kernel mutex reserved
- -- process executing in kernel mode;
-
-The server is controlled by a master thread which runs at
-a priority higher than normal, that is, higher than user threads.
-It sleeps most of the time, and wakes up, say, every 300 milliseconds,
-to check whether there is anything happening in the server which
-requires intervention of the master thread. Such situations may be,
-for example, when flushing of dirty blocks is needed in the buffer
-pool or old version of database rows have to be cleaned away.
-
-The threads which we call user threads serve the queries of
-the clients and input from the console of the server.
-They run at normal priority. The server may have several
-communications endpoints. A dedicated set of user threads waits
-at each of these endpoints ready to receive a client request.
-Each request is taken by a single user thread, which then starts
-processing and, when the result is ready, sends it to the client
-and returns to wait at the same endpoint the thread started from.
-
-So, we do not have dedicated communication threads listening at
-the endpoints and dealing the jobs to dedicated worker threads.
-Our architecture saves one thread swithch per request, compared
-to the solution with dedicated communication threads
-which amounts to 15 microseconds on 100 MHz Pentium
-running NT. If the client
-is communicating over a network, this saving is negligible, but
-if the client resides in the same machine, maybe in an SMP machine
-on a different processor from the server thread, the saving
-can be important as the threads can communicate over shared
-memory with an overhead of a few microseconds.
-
-We may later implement a dedicated communication thread solution
-for those endpoints which communicate over a network.
-
-Our solution with user threads has two problems: for each endpoint
-there has to be a number of listening threads. If there are many
-communication endpoints, it may be difficult to set the right number
-of concurrent threads in the system, as many of the threads
-may always be waiting at less busy endpoints. Another problem
-is queuing of the messages, as the server internally does not
-offer any queue for jobs.
-
-Another group of user threads is intended for splitting the
-queries and processing them in parallel. Let us call these
-parallel communication threads. These threads are waiting for
-parallelized tasks, suspended on event semaphores.
-
-A single user thread waits for input from the console,
-like a command to shut the database.
-
-Utility threads are a different group of threads which takes
-care of the buffer pool flushing and other, mainly background
-operations, in the server.
-Some of these utility threads always run at a lower than normal
-priority, so that they are always in background. Some of them
-may dynamically boost their priority by the pri_adjust function,
-even to higher than normal priority, if their task becomes urgent.
-The running of utilities is controlled by high- and low-water marks
-of urgency. The urgency may be measured by the number of dirty blocks
-in the buffer pool, in the case of the flush thread, for example.
-When the high-water mark is exceeded, an utility starts running, until
-the urgency drops under the low-water mark. Then the utility thread
-suspend itself to wait for an event. The master thread is
-responsible of signaling this event when the utility thread is
-again needed.
-
-For each individual type of utility, some threads always remain
-at lower than normal priority. This is because pri_adjust is implemented
-so that the threads at normal or higher priority control their
-share of running time by calling sleep. Thus, if the load of the
-system sudenly drops, these threads cannot necessarily utilize
-the system fully. The background priority threads make up for this,
-starting to run when the load drops.
-
-When there is no activity in the system, also the master thread
-suspends itself to wait for an event making
-the server totally silent. The responsibility to signal this
-event is on the user thread which again receives a message
-from a client.
-
-There is still one complication in our server design. If a
-background utility thread obtains a resource (e.g., mutex) needed by a user
-thread, and there is also some other user activity in the system,
-the user thread may have to wait indefinitely long for the
-resource, as the OS does not schedule a background thread if
-there is some other runnable user thread. This problem is called
-priority inversion in real-time programming.
-
-One solution to the priority inversion problem would be to
-keep record of which thread owns which resource and
-in the above case boost the priority of the background thread
-so that it will be scheduled and it can release the resource.
-This solution is called priority inheritance in real-time programming.
-A drawback of this solution is that the overhead of acquiring a mutex
-increases slightly, maybe 0.2 microseconds on a 100 MHz Pentium, because
-the thread has to call os_thread_get_curr_id.
-This may be compared to 0.5 microsecond overhead for a mutex lock-unlock
-pair. Note that the thread
-cannot store the information in the resource, say mutex, itself,
-because competing threads could wipe out the information if it is
-stored before acquiring the mutex, and if it stored afterwards,
-the information is outdated for the time of one machine instruction,
-at least. (To be precise, the information could be stored to
-lock_word in mutex if the machine supports atomic swap.)
-
-The above solution with priority inheritance may become actual in the
-future, but at the moment we plan to implement a more coarse solution,
-which could be called a global priority inheritance. If a thread
-has to wait for a long time, say 300 milliseconds, for a resource,
-we just guess that it may be waiting for a resource owned by a background
-thread, and boost the the priority of all runnable background threads
-to the normal level. The background threads then themselves adjust
-their fixed priority back to background after releasing all resources
-they had (or, at some fixed points in their program code).
-
-What is the performance of the global priority inheritance solution?
-We may weigh the length of the wait time 300 milliseconds, during
-which the system processes some other thread
-to the cost of boosting the priority of each runnable background
-thread, rescheduling it, and lowering the priority again.
-On 100 MHz Pentium + NT this overhead may be of the order 100
-microseconds per thread. So, if the number of runnable background
-threads is not very big, say < 100, the cost is tolerable.
-Utility threads probably will access resources used by
-user threads not very often, so collisions of user threads
-to preempted utility threads should not happen very often.
-
-The thread table contains
-information of the current status of each thread existing in the system,
-and also the event semaphores used in suspending the master thread
-and utility and parallel communication threads when they have nothing to do.
-The thread table can be seen as an analogue to the process table
-in a traditional Unix implementation.
-
-The thread table is also used in the global priority inheritance
-scheme. This brings in one additional complication: threads accessing
-the thread table must have at least normal fixed priority,
-because the priority inheritance solution does not work if a background
-thread is preempted while possessing the mutex protecting the thread table.
-So, if a thread accesses the thread table, its priority has to be
-boosted at least to normal. This priority requirement can be seen similar to
-the privileged mode used when processing the kernel calls in traditional
-Unix.*/
-
-/* Thread slot in the thread table */
-struct srv_slot_struct{
- os_thread_id_t id; /* thread id */
- os_thread_t handle; /* thread handle */
- ulint type; /* thread type: user, utility etc. */
- ibool in_use; /* TRUE if this slot is in use */
- ibool suspended; /* TRUE if the thread is waiting
- for the event of this slot */
- ib_time_t suspend_time; /* time when the thread was
- suspended */
- os_event_t event; /* event used in suspending the
- thread when it has nothing to do */
- que_thr_t* thr; /* suspended query thread (only
- used for MySQL threads) */
-};
-
-/* Table for MySQL threads where they will be suspended to wait for locks */
-srv_slot_t* srv_mysql_table = NULL;
-
-os_event_t srv_lock_timeout_thread_event;
-
-srv_sys_t* srv_sys = NULL;
-
-byte srv_pad1[64]; /* padding to prevent other memory update
- hotspots from residing on the same memory
- cache line */
-mutex_t* kernel_mutex_temp;/* mutex protecting the server, trx structs,
- query threads, and lock table */
-byte srv_pad2[64]; /* padding to prevent other memory update
- hotspots from residing on the same memory
- cache line */
-
-/* The following three values measure the urgency of the jobs of
-buffer, version, and insert threads. They may vary from 0 - 1000.
-The server mutex protects all these variables. The low-water values
-tell that the server can acquiesce the utility when the value
-drops below this low-water mark. */
-
-ulint srv_meter[SRV_MASTER + 1];
-ulint srv_meter_low_water[SRV_MASTER + 1];
-ulint srv_meter_high_water[SRV_MASTER + 1];
-ulint srv_meter_high_water2[SRV_MASTER + 1];
-ulint srv_meter_foreground[SRV_MASTER + 1];
-
-/* The following values give info about the activity going on in
-the database. They are protected by the server mutex. The arrays
-are indexed by the type of the thread. */
-
-ulint srv_n_threads_active[SRV_MASTER + 1];
-ulint srv_n_threads[SRV_MASTER + 1];
-
-static void time_spin_delay()
-{
- ulint start_sec, end_sec;
- ulint start_usec, end_usec;
- int i;
-
- srv_timed_spin_delay = 0;
-
- if (ut_usectime(&start_sec, &start_usec))
- return;
-
- for (i = 0; i < (int)SYNC_SPIN_ROUNDS; ++i)
- ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
-
- if (ut_usectime(&end_sec, &end_usec))
- return;
-
- srv_timed_spin_delay =ut_usecdiff(end_sec, end_usec,
- start_sec, start_usec);
-}
-
-/*************************************************************************
-Prints counters for work done by srv_master_thread. */
-
-static
-void
-srv_print_extra(
-/*===================*/
- FILE *file) /* in: output stream */
-{
- fprintf(file, "srv_master_thread loops: %lu 1_second, %lu sleeps, "
- "%lu 10_second, %lu background, %lu flush\n",
- srv_main_1_second_loops, srv_main_sleeps,
- srv_main_10_second_loops, srv_main_background_loops,
- srv_main_flush_loops);
- fprintf(file, "srv_master_thread log flush: %lu sync, %lu async\n",
- srv_sync_flush, srv_async_flush);
- fprintf(file, "srv_wait_thread_mics %lld microseconds, %.1f seconds\n",
- srv_thread_wait_mics,
- (double) srv_thread_wait_mics / 1000000.0);
- fprintf(file,
- "spinlock delay for %d delay %d rounds is %lld mics\n",
- (int)srv_spin_wait_delay,
- (int)SYNC_SPIN_ROUNDS,
- srv_timed_spin_delay);
-}
-
-/*************************************************************************
-Sets the info describing an i/o thread current state. */
-
-void
-srv_set_io_thread_op_info(
-/*======================*/
- ulint i, /* in: the 'segment' of the i/o thread */
- const char* str) /* in: constant char string describing the
- state */
-{
- ut_a(i < SRV_MAX_N_IO_THREADS);
-
- srv_io_thread_op_info[i] = str;
-}
-
-/*************************************************************************
-Accessor function to get pointer to n'th slot in the server thread
-table. */
-static
-srv_slot_t*
-srv_table_get_nth_slot(
-/*===================*/
- /* out: pointer to the slot */
- ulint index) /* in: index of the slot */
-{
- ut_a(index < OS_THREAD_MAX_N);
-
- return(srv_sys->threads + index);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*************************************************************************
-Gets the number of threads in the system. */
-
-ulint
-srv_get_n_threads(void)
-/*===================*/
-{
- ulint i;
- ulint n_threads = 0;
-
- mutex_enter(&kernel_mutex);
-
- for (i = SRV_COM; i < SRV_MASTER + 1; i++) {
-
- n_threads += srv_n_threads[i];
- }
-
- mutex_exit(&kernel_mutex);
-
- return(n_threads);
-}
-
-/*************************************************************************
-Reserves a slot in the thread table for the current thread. Also creates the
-thread local storage struct for the current thread. NOTE! The server mutex
-has to be reserved by the caller! */
-static
-ulint
-srv_table_reserve_slot(
-/*===================*/
- /* out: reserved slot index */
- ulint type) /* in: type of the thread: one of SRV_COM, ... */
-{
- srv_slot_t* slot;
- ulint i;
-
- ut_a(type > 0);
- ut_a(type <= SRV_MASTER);
-
- i = 0;
- slot = srv_table_get_nth_slot(i);
-
- while (slot->in_use) {
- i++;
- slot = srv_table_get_nth_slot(i);
- }
-
- ut_a(slot->in_use == FALSE);
-
- slot->in_use = TRUE;
- slot->suspended = FALSE;
- slot->id = os_thread_get_curr_id();
- slot->handle = os_thread_get_curr();
- slot->type = type;
-
- thr_local_create();
-
- thr_local_set_slot_no(os_thread_get_curr_id(), i);
-
- return(i);
-}
-
-/*************************************************************************
-Suspends the calling thread to wait for the event in its thread slot.
-NOTE! The server mutex has to be reserved by the caller! */
-static
-os_event_t
-srv_suspend_thread(void)
-/*====================*/
- /* out: event for the calling thread to wait */
-{
- srv_slot_t* slot;
- os_event_t event;
- ulint slot_no;
- ulint type;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- slot_no = thr_local_get_slot_no(os_thread_get_curr_id());
-
- if (srv_print_thread_releases) {
- fprintf(stderr,
- "Suspending thread %lu to slot %lu meter %lu\n",
- (ulong) os_thread_get_curr_id(), (ulong) slot_no,
- (ulong) srv_meter[SRV_RECOVERY]);
- }
-
- slot = srv_table_get_nth_slot(slot_no);
-
- type = slot->type;
-
- ut_ad(type >= SRV_WORKER);
- ut_ad(type <= SRV_MASTER);
-
- event = slot->event;
-
- slot->suspended = TRUE;
-
- ut_ad(srv_n_threads_active[type] > 0);
-
- srv_n_threads_active[type]--;
-
- os_event_reset(event);
-
- return(event);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*************************************************************************
-Releases threads of the type given from suspension in the thread table.
-NOTE! The server mutex has to be reserved by the caller! */
-
-ulint
-srv_release_threads(
-/*================*/
- /* out: number of threads released: this may be
- < n if not enough threads were suspended at the
- moment */
- ulint type, /* in: thread type */
- ulint n) /* in: number of threads to release */
-{
- srv_slot_t* slot;
- ulint i;
- ulint count = 0;
-
- ut_ad(type >= SRV_WORKER);
- ut_ad(type <= SRV_MASTER);
- ut_ad(n > 0);
- ut_ad(mutex_own(&kernel_mutex));
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
-
- slot = srv_table_get_nth_slot(i);
-
- if (slot->in_use && slot->type == type && slot->suspended) {
-
- slot->suspended = FALSE;
-
- srv_n_threads_active[type]++;
-
- os_event_set(slot->event);
-
- if (srv_print_thread_releases) {
- fprintf(stderr,
- "Releasing thread %lu type %lu"
- " from slot %lu meter %lu\n",
- (ulong) slot->id, (ulong) type,
- (ulong) i,
- (ulong) srv_meter[SRV_RECOVERY]);
- }
-
- count++;
-
- if (count == n) {
- break;
- }
- }
- }
-
- return(count);
-}
-
-/*************************************************************************
-Returns the calling thread type. */
-
-ulint
-srv_get_thread_type(void)
-/*=====================*/
- /* out: SRV_COM, ... */
-{
- ulint slot_no;
- srv_slot_t* slot;
- ulint type;
-
- mutex_enter(&kernel_mutex);
-
- slot_no = thr_local_get_slot_no(os_thread_get_curr_id());
-
- slot = srv_table_get_nth_slot(slot_no);
-
- type = slot->type;
-
- ut_ad(type >= SRV_WORKER);
- ut_ad(type <= SRV_MASTER);
-
- mutex_exit(&kernel_mutex);
-
- return(type);
-}
-
-/*************************************************************************
-Initializes the server. */
-
-void
-srv_init(void)
-/*==========*/
-{
- srv_conc_slot_t* conc_slot;
- srv_slot_t* slot;
- dict_table_t* table;
- ulint i;
-
- time_spin_delay();
-
- srv_sys = mem_alloc(sizeof(srv_sys_t));
-
- kernel_mutex_temp = mem_alloc(sizeof(mutex_t));
- mutex_create(&kernel_mutex, SYNC_KERNEL);
-
- mutex_create(&srv_innodb_monitor_mutex, SYNC_NO_ORDER_CHECK);
-
- srv_sys->threads = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_slot_t));
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
- slot = srv_table_get_nth_slot(i);
- slot->in_use = FALSE;
- slot->type=0; /* Avoid purify errors */
- slot->event = os_event_create(NULL);
- ut_a(slot->event);
- }
-
- srv_mysql_table = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_slot_t));
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
- slot = srv_mysql_table + i;
- slot->in_use = FALSE;
- slot->type = 0;
- slot->event = os_event_create(NULL);
- ut_a(slot->event);
- }
-
- srv_lock_timeout_thread_event = os_event_create(NULL);
-
- for (i = 0; i < SRV_MASTER + 1; i++) {
- srv_n_threads_active[i] = 0;
- srv_n_threads[i] = 0;
- srv_meter[i] = 30;
- srv_meter_low_water[i] = 50;
- srv_meter_high_water[i] = 100;
- srv_meter_high_water2[i] = 200;
- srv_meter_foreground[i] = 250;
- }
-
- UT_LIST_INIT(srv_sys->tasks);
-
- /* create dummy table and index for old-style infimum and supremum */
- table = dict_mem_table_create("SYS_DUMMY1",
- DICT_HDR_SPACE, 1, 0);
- dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR,
- DATA_ENGLISH | DATA_NOT_NULL, 8);
-
- srv_sys->dummy_ind1 = dict_mem_index_create(
- "SYS_DUMMY1", "SYS_DUMMY1", DICT_HDR_SPACE, 0, 1);
- dict_index_add_col(srv_sys->dummy_ind1, table, (dict_col_t*)
- dict_table_get_nth_col(table, 0), 0);
- srv_sys->dummy_ind1->table = table;
- /* create dummy table and index for new-style infimum and supremum */
- table = dict_mem_table_create("SYS_DUMMY2",
- DICT_HDR_SPACE, 1, DICT_TF_COMPACT);
- dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR,
- DATA_ENGLISH | DATA_NOT_NULL, 8);
- srv_sys->dummy_ind2 = dict_mem_index_create(
- "SYS_DUMMY2", "SYS_DUMMY2", DICT_HDR_SPACE, 0, 1);
- dict_index_add_col(srv_sys->dummy_ind2, table, (dict_col_t*)
- dict_table_get_nth_col(table, 0), 0);
- srv_sys->dummy_ind2->table = table;
-
- /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
- srv_sys->dummy_ind1->cached = srv_sys->dummy_ind2->cached = TRUE;
-
- /* Init the server concurrency restriction data structures */
-
- os_fast_mutex_init(&srv_conc_mutex);
-
- UT_LIST_INIT(srv_conc_queue);
-
- srv_conc_slots = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_conc_slot_t));
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
- conc_slot = srv_conc_slots + i;
- conc_slot->reserved = FALSE;
- conc_slot->event = os_event_create(NULL);
- ut_a(conc_slot->event);
- }
-}
-
-/*************************************************************************
-Frees the OS fast mutex created in srv_init(). */
-
-void
-srv_free(void)
-/*==========*/
-{
- os_fast_mutex_free(&srv_conc_mutex);
-}
-
-/*************************************************************************
-Initializes the synchronization primitives, memory system, and the thread
-local storage. */
-
-void
-srv_general_init(void)
-/*==================*/
-{
- os_sync_init();
- sync_init();
- mem_init(srv_mem_pool_size);
- thr_local_init();
-}
-
-/*======================= InnoDB Server FIFO queue =======================*/
-
-/* Maximum allowable purge history length. <=0 means 'infinite'. */
-ulong srv_max_purge_lag = 0;
-
-/*************************************************************************
-Puts an OS thread to wait if there are too many concurrent threads
-(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */
-
-#ifdef UNIV_SYNC_ATOMIC
-static void
-inc_srv_conc_n_threads(lint *n_threads)
-{
- *n_threads = os_atomic_increment(&srv_conc_n_threads, 1);
-}
-
-static void
-dec_srv_conc_n_threads()
-{
- os_atomic_increment(&srv_conc_n_threads, -1);
-}
-#endif
-
-static void
-print_already_in_error(trx_t* trx)
-{
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: trying to declare trx"
- " to enter InnoDB, but\n"
- "InnoDB: it already is declared.\n", stderr);
- trx_print(stderr, trx, 0);
- putc('\n', stderr);
- return;
-}
-
-#ifdef UNIV_SYNC_ATOMIC
-static void
-enter_innodb_with_tickets(trx_t* trx)
-{
- trx->declared_to_be_inside_innodb = TRUE;
- trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER;
- return;
-}
-
-static void
-srv_conc_enter_innodb_timer_based(trx_t* trx)
-{
- lint conc_n_threads;
- ibool has_yielded = FALSE;
- ulint has_slept = 0;
-
- if (trx->declared_to_be_inside_innodb) {
- print_already_in_error(trx);
- }
-retry:
- if (srv_conc_n_threads < (lint) srv_thread_concurrency) {
- inc_srv_conc_n_threads(&conc_n_threads);
- if (conc_n_threads <= (lint) srv_thread_concurrency) {
- enter_innodb_with_tickets(trx);
- return;
- }
- dec_srv_conc_n_threads(&conc_n_threads);
- }
- if (!has_yielded)
- {
- has_yielded = TRUE;
- os_thread_yield();
- goto retry;
- }
- if (trx->has_search_latch
- || NULL != UT_LIST_GET_FIRST(trx->trx_locks)) {
-
- inc_srv_conc_n_threads(&conc_n_threads);
- enter_innodb_with_tickets(trx);
- return;
- }
- if (has_slept < 2)
- {
- trx->op_info = "sleeping before entering InnoDB";
- os_thread_sleep(10000);
- trx->op_info = "";
- has_slept++;
- }
- inc_srv_conc_n_threads(&conc_n_threads);
- enter_innodb_with_tickets(trx);
- return;
-}
-
-static void
-srv_conc_exit_innodb_timer_based(trx_t* trx)
-{
- dec_srv_conc_n_threads();
- trx->declared_to_be_inside_innodb = FALSE;
- trx->n_tickets_to_enter_innodb = 0;
- return;
-}
-#endif
-
-void
-srv_conc_enter_innodb(
-/*==================*/
- trx_t* trx) /* in: transaction object associated with the
- thread */
-{
- ibool has_slept = FALSE;
- srv_conc_slot_t* slot = NULL;
- ulint i;
-
- if (trx->mysql_thd != NULL
- && thd_is_replication_slave_thread(trx->mysql_thd)) {
-
- /* TODO Do something more interesting (based on a config
- parameter). Some users what to give the replication
- thread very low priority, see http://bugs.mysql.com/25078
- This can be done by introducing
- innodb_replication_delay(ms) config parameter */
- return;
- }
-
- /* If trx has 'free tickets' to enter the engine left, then use one
- such ticket */
-
- if (trx->n_tickets_to_enter_innodb > 0) {
- trx->n_tickets_to_enter_innodb--;
-
- return;
- }
-
-#ifdef UNIV_SYNC_ATOMIC
- if (srv_thread_concurrency_timer_based) {
- srv_conc_enter_innodb_timer_based(trx);
- return;
- }
-#endif
-
- os_fast_mutex_lock(&srv_conc_mutex);
-retry:
- if (trx->declared_to_be_inside_innodb) {
- print_already_in_error(trx);
- os_fast_mutex_unlock(&srv_conc_mutex);
-
- return;
- }
-
- ut_ad(srv_conc_n_threads >= 0);
-
- if (srv_conc_n_threads < (lint)srv_thread_concurrency) {
-
- srv_conc_n_threads++;
- trx->declared_to_be_inside_innodb = TRUE;
- trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER;
-
- os_fast_mutex_unlock(&srv_conc_mutex);
-
- return;
- }
-
- /* If the transaction is not holding resources, let it sleep
- for SRV_THREAD_SLEEP_DELAY microseconds, and try again then */
-
- if (!has_slept && !trx->has_search_latch
- && NULL == UT_LIST_GET_FIRST(trx->trx_locks)) {
-
- has_slept = TRUE; /* We let it sleep only once to avoid
- starvation */
-
- srv_conc_n_waiting_threads++;
-
- os_fast_mutex_unlock(&srv_conc_mutex);
-
- trx->op_info = "sleeping before joining InnoDB queue";
-
- /* Peter Zaitsev suggested that we take the sleep away
- altogether. But the sleep may be good in pathological
- situations of lots of thread switches. Simply put some
- threads aside for a while to reduce the number of thread
- switches. */
- if (SRV_THREAD_SLEEP_DELAY > 0) {
- os_thread_sleep(SRV_THREAD_SLEEP_DELAY);
- }
-
- trx->op_info = "";
-
- os_fast_mutex_lock(&srv_conc_mutex);
-
- srv_conc_n_waiting_threads--;
-
- goto retry;
- }
-
- /* Too many threads inside: put the current thread to a queue */
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
- slot = srv_conc_slots + i;
-
- if (!slot->reserved) {
-
- break;
- }
- }
-
- if (i == OS_THREAD_MAX_N) {
- /* Could not find a free wait slot, we must let the
- thread enter */
-
- srv_conc_n_threads++;
- trx->declared_to_be_inside_innodb = TRUE;
- trx->n_tickets_to_enter_innodb = 0;
-
- os_fast_mutex_unlock(&srv_conc_mutex);
-
- return;
- }
-
- /* Release possible search system latch this thread has */
- if (trx->has_search_latch) {
- trx_search_latch_release_if_reserved(trx);
- }
-
- /* Add to the queue */
- slot->reserved = TRUE;
- slot->wait_ended = FALSE;
-
- UT_LIST_ADD_LAST(srv_conc_queue, srv_conc_queue, slot);
-
- os_event_reset(slot->event);
-
- srv_conc_n_waiting_threads++;
-
- os_fast_mutex_unlock(&srv_conc_mutex);
-
- /* Go to wait for the event; when a thread leaves InnoDB it will
- release this thread */
-
- trx->op_info = "waiting in InnoDB queue";
-
- os_event_wait(slot->event);
-
- trx->op_info = "";
-
- os_fast_mutex_lock(&srv_conc_mutex);
-
- srv_conc_n_waiting_threads--;
-
- /* NOTE that the thread which released this thread already
- incremented the thread counter on behalf of this thread */
-
- slot->reserved = FALSE;
-
- UT_LIST_REMOVE(srv_conc_queue, srv_conc_queue, slot);
-
- trx->declared_to_be_inside_innodb = TRUE;
- trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER;
-
- os_fast_mutex_unlock(&srv_conc_mutex);
-}
-
-/*************************************************************************
-This lets a thread enter InnoDB regardless of the number of threads inside
-InnoDB. This must be called when a thread ends a lock wait. */
-
-void
-srv_conc_force_enter_innodb(
-/*========================*/
- trx_t* trx) /* in: transaction object associated with the
- thread */
-{
-
- if (UNIV_LIKELY(!srv_thread_concurrency)) {
-
- return;
- }
-
- ut_ad(srv_conc_n_threads >= 0);
-#ifdef UNIV_SYNC_ATOMIC
- if (srv_thread_concurrency_timer_based) {
- lint conc_n_threads;
-
- inc_srv_conc_n_threads(&conc_n_threads);
- trx->declared_to_be_inside_innodb = TRUE;
- trx->n_tickets_to_enter_innodb = 1;
- return;
- }
-#endif
- os_fast_mutex_lock(&srv_conc_mutex);
- srv_conc_n_threads++;
- trx->declared_to_be_inside_innodb = TRUE;
- trx->n_tickets_to_enter_innodb = 1;
- os_fast_mutex_unlock(&srv_conc_mutex);
-}
-
-/*************************************************************************
-This must be called when a thread exits InnoDB in a lock wait or at the
-end of an SQL statement. */
-
-void
-srv_conc_force_exit_innodb(
-/*=======================*/
- trx_t* trx) /* in: transaction object associated with the
- thread */
-{
- srv_conc_slot_t* slot = NULL;
-
- if (trx->mysql_thd != NULL
- && thd_is_replication_slave_thread(trx->mysql_thd)) {
-
- return;
- }
-
- if (trx->declared_to_be_inside_innodb == FALSE) {
-
- return;
- }
-
-#ifdef UNIV_SYNC_ATOMIC
- if (srv_thread_concurrency_timer_based)
- {
- srv_conc_exit_innodb_timer_based(trx);
- return;
- }
-#endif
-
- os_fast_mutex_lock(&srv_conc_mutex);
-
- ut_ad(srv_conc_n_threads > 0);
- srv_conc_n_threads--;
- trx->declared_to_be_inside_innodb = FALSE;
- trx->n_tickets_to_enter_innodb = 0;
-
- if (srv_conc_n_threads < (lint)srv_thread_concurrency) {
- /* Look for a slot where a thread is waiting and no other
- thread has yet released the thread */
-
- slot = UT_LIST_GET_FIRST(srv_conc_queue);
-
- while (slot && slot->wait_ended == TRUE) {
- slot = UT_LIST_GET_NEXT(srv_conc_queue, slot);
- }
-
- if (slot != NULL) {
- slot->wait_ended = TRUE;
-
- /* We increment the count on behalf of the released
- thread */
-
- srv_conc_n_threads++;
- }
- }
-
- os_fast_mutex_unlock(&srv_conc_mutex);
-
- if (slot != NULL) {
- os_event_set(slot->event);
- }
-}
-
-/*************************************************************************
-This must be called when a thread exits InnoDB. */
-
-void
-srv_conc_exit_innodb(
-/*=================*/
- trx_t* trx) /* in: transaction object associated with the
- thread */
-{
- if (trx->n_tickets_to_enter_innodb > 0) {
- /* We will pretend the thread is still inside InnoDB though it
- now leaves the InnoDB engine. In this way we save
- a lot of semaphore operations. srv_conc_force_exit_innodb is
- used to declare the thread definitely outside InnoDB. It
- should be called when there is a lock wait or an SQL statement
- ends. */
-
- return;
- }
-
- srv_conc_force_exit_innodb(trx);
-}
-
-/*========================================================================*/
-
-/*************************************************************************
-Normalizes init parameter values to use units we use inside InnoDB. */
-static
-ulint
-srv_normalize_init_values(void)
-/*===========================*/
- /* out: DB_SUCCESS or error code */
-{
- ulint n;
- ulint i;
-
- n = srv_n_data_files;
-
- for (i = 0; i < n; i++) {
- srv_data_file_sizes[i] = srv_data_file_sizes[i]
- * ((1024 * 1024) / UNIV_PAGE_SIZE);
- }
-
- srv_last_file_size_max = srv_last_file_size_max
- * ((1024 * 1024) / UNIV_PAGE_SIZE);
-
- srv_log_file_size = srv_log_file_size / UNIV_PAGE_SIZE;
-
- srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE;
-
- srv_pool_size = srv_pool_size / (UNIV_PAGE_SIZE / 1024);
-
- srv_awe_window_size = srv_awe_window_size / UNIV_PAGE_SIZE;
-
- if (srv_use_awe) {
- /* If we are using AWE we must save memory in the 32-bit
- address space of the process, and cannot bind the lock
- table size to the real buffer pool size. */
-
- srv_lock_table_size = 20 * srv_awe_window_size;
- } else {
- srv_lock_table_size = 5 * srv_pool_size;
- }
-
- return(DB_SUCCESS);
-}
-
-/*************************************************************************
-Boots the InnoDB server. */
-
-ulint
-srv_boot(void)
-/*==========*/
- /* out: DB_SUCCESS or error code */
-{
- ulint err;
-
- /* Transform the init parameter values given by MySQL to
- use units we use inside InnoDB: */
-
- err = srv_normalize_init_values();
-
- if (err != DB_SUCCESS) {
- return(err);
- }
-
- /* Initialize synchronization primitives, memory management, and thread
- local storage */
-
- srv_general_init();
-
- /* Initialize this module */
-
- srv_init();
-
- return(DB_SUCCESS);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*************************************************************************
-Reserves a slot in the thread table for the current MySQL OS thread.
-NOTE! The kernel mutex has to be reserved by the caller! */
-static
-srv_slot_t*
-srv_table_reserve_slot_for_mysql(void)
-/*==================================*/
- /* out: reserved slot */
-{
- srv_slot_t* slot;
- ulint i;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- i = 0;
- slot = srv_mysql_table + i;
-
- while (slot->in_use) {
- i++;
-
- if (i >= OS_THREAD_MAX_N) {
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: There appear to be %lu MySQL"
- " threads currently waiting\n"
- "InnoDB: inside InnoDB, which is the"
- " upper limit. Cannot continue operation.\n"
- "InnoDB: We intentionally generate"
- " a seg fault to print a stack trace\n"
- "InnoDB: on Linux. But first we print"
- " a list of waiting threads.\n", (ulong) i);
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
-
- slot = srv_mysql_table + i;
-
- fprintf(stderr,
- "Slot %lu: thread id %lu, type %lu,"
- " in use %lu, susp %lu, time %lu\n",
- (ulong) i,
- (ulong) os_thread_pf(slot->id),
- (ulong) slot->type,
- (ulong) slot->in_use,
- (ulong) slot->suspended,
- (ulong) difftime(ut_time(),
- slot->suspend_time));
- }
-
- ut_error;
- }
-
- slot = srv_mysql_table + i;
- }
-
- ut_a(slot->in_use == FALSE);
-
- slot->in_use = TRUE;
- slot->id = os_thread_get_curr_id();
- slot->handle = os_thread_get_curr();
-
- return(slot);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*******************************************************************
-Puts a MySQL OS thread to wait for a lock to be released. If an error
-occurs during the wait trx->error_state associated with thr is
-!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK
-are possible errors. DB_DEADLOCK is returned if selective deadlock
-resolution chose this transaction as a victim. */
-
-void
-srv_suspend_mysql_thread(
-/*=====================*/
- que_thr_t* thr) /* in: query thread associated with the MySQL
- OS thread */
-{
-#ifndef UNIV_HOTBACKUP
- srv_slot_t* slot;
- os_event_t event;
- double wait_time;
- trx_t* trx;
- ibool had_dict_lock = FALSE;
- ibool was_declared_inside_innodb = FALSE;
- ib_longlong start_time = 0;
- ib_longlong finish_time;
- ulint diff_time;
- ulint sec;
- ulint ms;
-
- ut_ad(!mutex_own(&kernel_mutex));
-
- trx = thr_get_trx(thr);
-
- os_event_set(srv_lock_timeout_thread_event);
-
- mutex_enter(&kernel_mutex);
-
- trx->error_state = DB_SUCCESS;
-
- if (thr->state == QUE_THR_RUNNING) {
-
- ut_ad(thr->is_active == TRUE);
-
- /* The lock has already been released or this transaction
- was chosen as a deadlock victim: no need to suspend */
-
- if (trx->was_chosen_as_deadlock_victim) {
-
- trx->error_state = DB_DEADLOCK;
- trx->was_chosen_as_deadlock_victim = FALSE;
- }
-
- mutex_exit(&kernel_mutex);
-
- return;
- }
-
- ut_ad(thr->is_active == FALSE);
-
- slot = srv_table_reserve_slot_for_mysql();
-
- event = slot->event;
-
- slot->thr = thr;
-
- os_event_reset(event);
-
- slot->suspend_time = ut_time();
-
- if (thr->lock_state == QUE_THR_LOCK_ROW) {
- srv_n_lock_wait_count++;
- srv_n_lock_wait_current_count++;
-
- if (ut_usectime(&sec, &ms) == -1) {
- start_time = -1;
- } else {
- start_time = (ib_longlong)sec * 1000000 + ms;
- }
- }
- /* Wake the lock timeout monitor thread, if it is suspended */
-
- os_event_set(srv_lock_timeout_thread_event);
-
- mutex_exit(&kernel_mutex);
-
- if (trx->declared_to_be_inside_innodb) {
-
- was_declared_inside_innodb = TRUE;
-
- /* We must declare this OS thread to exit InnoDB, since a
- possible other thread holding a lock which this thread waits
- for must be allowed to enter, sooner or later */
-
- srv_conc_force_exit_innodb(trx);
- }
-
- /* Release possible foreign key check latch */
- if (trx->dict_operation_lock_mode == RW_S_LATCH) {
-
- had_dict_lock = TRUE;
-
- row_mysql_unfreeze_data_dictionary(trx);
- }
-
- ut_a(trx->dict_operation_lock_mode == 0);
-
- /* Wait for the release */
-
- os_event_wait(event);
-
- if (had_dict_lock) {
-
- row_mysql_freeze_data_dictionary(trx);
- }
-
- if (was_declared_inside_innodb) {
-
- /* Return back inside InnoDB */
-
- srv_conc_force_enter_innodb(trx);
- }
-
- mutex_enter(&kernel_mutex);
-
- /* Release the slot for others to use */
-
- slot->in_use = FALSE;
-
- wait_time = ut_difftime(ut_time(), slot->suspend_time);
-
- if (thr->lock_state == QUE_THR_LOCK_ROW) {
- if (ut_usectime(&sec, &ms) == -1) {
- finish_time = -1;
- } else {
- finish_time = (ib_longlong)sec * 1000000 + ms;
- }
-
- diff_time = (ulint) (finish_time - start_time);
-
- srv_n_lock_wait_current_count--;
- srv_n_lock_wait_time = srv_n_lock_wait_time + diff_time;
- if (diff_time > srv_n_lock_max_wait_time &&
- /* only update the variable if we successfully
- retrieved the start and finish times. See Bug#36819. */
- start_time != -1 && finish_time != -1) {
- srv_n_lock_max_wait_time = diff_time;
- }
- }
-
- if (trx->was_chosen_as_deadlock_victim) {
-
- trx->error_state = DB_DEADLOCK;
- trx->was_chosen_as_deadlock_victim = FALSE;
- }
-
- mutex_exit(&kernel_mutex);
-
- if (srv_lock_wait_timeout < 100000000
- && wait_time > (double)srv_lock_wait_timeout) {
-
- trx->error_state = DB_LOCK_WAIT_TIMEOUT;
- }
-#else /* UNIV_HOTBACKUP */
- /* This function depends on MySQL code that is not included in
- InnoDB Hot Backup builds. Besides, this function should never
- be called in InnoDB Hot Backup. */
- ut_error;
-#endif /* UNIV_HOTBACKUP */
-}
-
-/************************************************************************
-Releases a MySQL OS thread waiting for a lock to be released, if the
-thread is already suspended. */
-
-void
-srv_release_mysql_thread_if_suspended(
-/*==================================*/
- que_thr_t* thr) /* in: query thread associated with the
- MySQL OS thread */
-{
-#ifndef UNIV_HOTBACKUP
- srv_slot_t* slot;
- ulint i;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
-
- slot = srv_mysql_table + i;
-
- if (slot->in_use && slot->thr == thr) {
- /* Found */
-
- os_event_set(slot->event);
-
- return;
- }
- }
-
- /* not found */
-#else /* UNIV_HOTBACKUP */
- /* This function depends on MySQL code that is not included in
- InnoDB Hot Backup builds. Besides, this function should never
- be called in InnoDB Hot Backup. */
- ut_error;
-#endif /* UNIV_HOTBACKUP */
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************
-Refreshes the values used to calculate per-second averages. */
-static
-ibool
-srv_refresh_innodb_monitor_stats(void)
-/*==================================*/
-{
- /* Sometimes we will skip stats update to avoid deadlock, since
- since this function is called by the background wake-up thread */
- if (mutex_enter_nowait(&srv_innodb_monitor_mutex)) {
- /* mutex_enter_nowait returns 1 on failure */
- return FALSE;
- }
-
- srv_last_monitor_time = time(NULL);
-
- os_aio_refresh_stats();
-
- btr_cur_n_sea_old = btr_cur_n_sea;
- btr_cur_n_non_sea_old = btr_cur_n_non_sea;
-
- log_refresh_stats();
-
- buf_refresh_io_stats();
-
- srv_n_rows_inserted_old = srv_n_rows_inserted;
- srv_n_rows_updated_old = srv_n_rows_updated;
- srv_n_rows_deleted_old = srv_n_rows_deleted;
- srv_n_rows_read_old = srv_n_rows_read;
-
- mutex_exit(&srv_innodb_monitor_mutex);
- return TRUE;
-}
-
-/**********************************************************************
-Outputs to a file the output of the InnoDB Monitor. */
-
-void
-srv_printf_innodb_monitor(
-/*======================*/
- FILE* file) /* in: output stream */
-{
- double time_elapsed;
- time_t current_time;
- ulint n_reserved;
-
- mutex_enter(&srv_innodb_monitor_mutex);
-
- current_time = time(NULL);
-
- /* We add 0.001 seconds to time_elapsed to prevent division
- by zero if two users happen to call SHOW INNODB STATUS at the same
- time */
-
- time_elapsed = difftime(current_time, srv_last_monitor_time)
- + 0.001;
-
- srv_last_monitor_time = time(NULL);
-
- fputs("\n=====================================\n", file);
-
- ut_print_timestamp(file);
- fprintf(file,
- " INNODB MONITOR OUTPUT\n"
- "=====================================\n"
- "Per second averages calculated from the last %lu seconds\n",
- (ulong)time_elapsed);
-
- fputs("----------\n"
- "BACKGROUND THREAD\n"
- "----------\n", file);
- srv_print_extra(file);
-
- fputs("----------\n"
- "SEMAPHORES\n"
- "----------\n", file);
- sync_print(file);
-
- /* Conceptually, srv_innodb_monitor_mutex has a very high latching
- order level in sync0sync.h, while dict_foreign_err_mutex has a very
- low level 135. Therefore we can reserve the latter mutex here without
- a danger of a deadlock of threads. */
-
- mutex_enter(&dict_foreign_err_mutex);
-
- if (ftell(dict_foreign_err_file) != 0L) {
- fputs("------------------------\n"
- "LATEST FOREIGN KEY ERROR\n"
- "------------------------\n", file);
- ut_copy_file(file, dict_foreign_err_file);
- }
-
- mutex_exit(&dict_foreign_err_mutex);
-
- /* Print open transaction details */
- lock_print_info_summary(file);
-
- if (trx_start) {
- long t = ftell(file);
- if (t < 0) {
- *trx_start = ULINT_UNDEFINED;
- } else {
- *trx_start = (ulint) t;
- }
- }
- lock_print_info_all_transactions(file);
- if (trx_end) {
- long t = ftell(file);
- if (t < 0) {
- *trx_end = ULINT_UNDEFINED;
- } else {
- *trx_end = (ulint) t;
- }
- }
-
- fputs("--------\n"
- "FILE I/O\n"
- "--------\n", file);
- os_aio_print(file);
-
- fputs("-------------------------------------\n"
- "INSERT BUFFER AND ADAPTIVE HASH INDEX\n"
- "-------------------------------------\n", file);
- ibuf_print(file);
-
- ha_print_info(file, btr_search_sys->hash_index);
-
- fprintf(file,
- "%.2f hash searches/s, %.2f non-hash searches/s\n",
- (btr_cur_n_sea - btr_cur_n_sea_old)
- / time_elapsed,
- (btr_cur_n_non_sea - btr_cur_n_non_sea_old)
- / time_elapsed);
- btr_cur_n_sea_old = btr_cur_n_sea;
- btr_cur_n_non_sea_old = btr_cur_n_non_sea;
-
- fputs("---\n"
- "LOG\n"
- "---\n", file);
- log_print(file);
-
- fputs("----------------------\n"
- "BUFFER POOL AND MEMORY\n"
- "----------------------\n", file);
- fprintf(file,
- "Total memory allocated " ULINTPF
- "; in additional pool allocated " ULINTPF "\n",
- ut_total_allocated_memory,
- mem_pool_get_reserved(mem_comm_pool));
- fprintf(file, "Dictionary memory allocated " ULINTPF "\n",
- dict_sys->size);
-
- if (srv_use_awe) {
- fprintf(file,
- "In addition to that %lu MB of AWE memory allocated\n",
- (ulong) (srv_pool_size
- / ((1024 * 1024) / UNIV_PAGE_SIZE)));
- }
-
- buf_print_io(file);
-
- fputs("--------------\n"
- "ROW OPERATIONS\n"
- "--------------\n", file);
- fprintf(file, "%ld queries inside InnoDB, %lu queries in queue\n",
- (long) srv_conc_n_threads,
- (ulong) srv_conc_n_waiting_threads);
-
- fprintf(file, "%lu read views open inside InnoDB\n",
- UT_LIST_GET_LEN(trx_sys->view_list));
-
- n_reserved = fil_space_get_n_reserved_extents(0);
- if (n_reserved > 0) {
- fprintf(file,
- "%lu tablespace extents now reserved for"
- " B-tree split operations\n",
- (ulong) n_reserved);
- }
-
-#ifdef UNIV_LINUX
- fprintf(file, "Main thread process no. %lu, id %lu, state: %s\n",
- (ulong) srv_main_thread_process_no,
- (ulong) srv_main_thread_id,
- srv_main_thread_op_info);
-#else
- fprintf(file, "Main thread id %lu, state: %s\n",
- (ulong) srv_main_thread_id,
- srv_main_thread_op_info);
-#endif
- fprintf(file,
- "Number of rows inserted " ULINTPF
- ", updated " ULINTPF ", deleted " ULINTPF
- ", read " ULINTPF "\n",
- srv_n_rows_inserted,
- srv_n_rows_updated,
- srv_n_rows_deleted,
- srv_n_rows_read);
- fprintf(file,
- "%.2f inserts/s, %.2f updates/s,"
- " %.2f deletes/s, %.2f reads/s\n",
- (srv_n_rows_inserted - srv_n_rows_inserted_old)
- / time_elapsed,
- (srv_n_rows_updated - srv_n_rows_updated_old)
- / time_elapsed,
- (srv_n_rows_deleted - srv_n_rows_deleted_old)
- / time_elapsed,
- (srv_n_rows_read - srv_n_rows_read_old)
- / time_elapsed);
-
- srv_n_rows_inserted_old = srv_n_rows_inserted;
- srv_n_rows_updated_old = srv_n_rows_updated;
- srv_n_rows_deleted_old = srv_n_rows_deleted;
- srv_n_rows_read_old = srv_n_rows_read;
-
- fputs("----------------------------\n"
- "END OF INNODB MONITOR OUTPUT\n"
- "============================\n", file);
- mutex_exit(&srv_innodb_monitor_mutex);
- fflush(file);
-}
-
-/**********************************************************************
-Function to pass InnoDB status variables to MySQL */
-
-void
-srv_export_innodb_status(void)
-{
- mutex_enter(&srv_innodb_monitor_mutex);
-
- export_vars.innodb_data_pending_reads
- = os_n_pending_reads;
- export_vars.innodb_data_pending_writes
- = os_n_pending_writes;
- export_vars.innodb_data_pending_fsyncs
- = fil_n_pending_log_flushes
- + fil_n_pending_tablespace_flushes;
- export_vars.innodb_data_fsyncs = os_n_fsyncs;
- export_vars.innodb_data_read = srv_data_read;
- export_vars.innodb_data_reads = os_n_file_reads;
- export_vars.innodb_data_writes = os_n_file_writes;
- export_vars.innodb_data_written = srv_data_written;
- export_vars.innodb_buffer_pool_read_requests = buf_pool->n_page_gets;
- export_vars.innodb_buffer_pool_write_requests
- = srv_buf_pool_write_requests;
- export_vars.innodb_buffer_pool_wait_free = srv_buf_pool_wait_free;
- export_vars.innodb_buffer_pool_pages_flushed = srv_buf_pool_flushed;
- export_vars.innodb_buffer_pool_reads = srv_buf_pool_reads;
- export_vars.innodb_buffer_pool_read_ahead_rnd = srv_read_ahead_rnd;
- export_vars.innodb_buffer_pool_read_ahead_seq = srv_read_ahead_seq;
- export_vars.innodb_buffer_pool_pages_data
- = UT_LIST_GET_LEN(buf_pool->LRU);
- export_vars.innodb_buffer_pool_pages_dirty
- = UT_LIST_GET_LEN(buf_pool->flush_list);
- export_vars.innodb_buffer_pool_pages_free
- = UT_LIST_GET_LEN(buf_pool->free);
-#ifdef UNIV_DEBUG
- export_vars.innodb_buffer_pool_pages_latched
- = buf_get_latched_pages_number();
-#endif /* UNIV_DEBUG */
- export_vars.innodb_buffer_pool_pages_total = buf_pool->curr_size;
-
- export_vars.innodb_buffer_pool_pages_misc = buf_pool->max_size
- - UT_LIST_GET_LEN(buf_pool->LRU)
- - UT_LIST_GET_LEN(buf_pool->free);
-#ifdef UNIV_SYNC_ATOMIC
- export_vars.innodb_have_sync_atomic = 1;
-#else
- export_vars.innodb_have_sync_atomic = 0;
-#endif
-#ifdef UNIV_DISABLE_MEM_POOL
- export_vars.innodb_heap_enabled = 0;
-#else
- export_vars.innodb_heap_enabled = 1;
-#endif
- export_vars.innodb_page_size = UNIV_PAGE_SIZE;
- export_vars.innodb_log_waits = srv_log_waits;
- export_vars.innodb_os_log_written = srv_os_log_written;
- export_vars.innodb_os_log_fsyncs = fil_n_log_flushes;
- export_vars.innodb_os_log_pending_fsyncs = fil_n_pending_log_flushes;
- export_vars.innodb_os_log_pending_writes = srv_os_log_pending_writes;
- export_vars.innodb_log_write_requests = srv_log_write_requests;
- export_vars.innodb_log_writes = srv_log_writes;
- export_vars.innodb_dblwr_pages_written = srv_dblwr_pages_written;
- export_vars.innodb_dblwr_writes = srv_dblwr_writes;
- export_vars.innodb_pages_created = buf_pool->n_pages_created;
- export_vars.innodb_pages_read = buf_pool->n_pages_read;
- export_vars.innodb_pages_written = buf_pool->n_pages_written;
- export_vars.innodb_row_lock_waits = srv_n_lock_wait_count;
- export_vars.innodb_row_lock_current_waits
- = srv_n_lock_wait_current_count;
- export_vars.innodb_row_lock_time = srv_n_lock_wait_time / 1000;
- if (srv_n_lock_wait_count > 0) {
- export_vars.innodb_row_lock_time_avg = (ulint)
- (srv_n_lock_wait_time / 1000 / srv_n_lock_wait_count);
- } else {
- export_vars.innodb_row_lock_time_avg = 0;
- }
- export_vars.innodb_row_lock_time_max
- = srv_n_lock_max_wait_time / 1000;
- export_vars.innodb_rows_read = srv_n_rows_read;
- export_vars.innodb_rows_inserted = srv_n_rows_inserted;
- export_vars.innodb_rows_updated = srv_n_rows_updated;
- export_vars.innodb_rows_deleted = srv_n_rows_deleted;
- export_vars.innodb_wake_ups = sync_wake_ups;
-
- mutex_exit(&srv_innodb_monitor_mutex);
-}
-
-/*************************************************************************
-A thread which wakes up threads whose lock wait may have lasted too long.
-This also prints the info output by various InnoDB monitors. */
-
-os_thread_ret_t
-srv_lock_timeout_and_monitor_thread(
-/*================================*/
- /* out: a dummy parameter */
- void* arg __attribute__((unused)))
- /* in: a dummy parameter required by
- os_thread_create */
-{
- srv_slot_t* slot;
- double time_elapsed;
- time_t current_time;
- time_t last_table_monitor_time;
- time_t last_tablespace_monitor_time;
- time_t last_monitor_time;
- ibool some_waits;
- double wait_time;
- ulint i;
-
-#ifdef UNIV_DEBUG_THREAD_CREATION
- fprintf(stderr, "Lock timeout thread starts, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
-#endif
- UT_NOT_USED(arg);
- srv_last_monitor_time = time(NULL);
- last_table_monitor_time = time(NULL);
- last_tablespace_monitor_time = time(NULL);
- last_monitor_time = time(NULL);
-loop:
- srv_lock_timeout_and_monitor_active = TRUE;
-
- /* When someone is waiting for a lock, we wake up every second
- and check if a timeout has passed for a lock wait */
-
- os_thread_sleep(1000000);
-
- current_time = time(NULL);
-
- time_elapsed = difftime(current_time, last_monitor_time);
-
- if (time_elapsed > 15) {
- last_monitor_time = time(NULL);
-
- if (srv_print_innodb_monitor) {
- srv_printf_innodb_monitor(stderr);
- }
-
- if (srv_innodb_status) {
- mutex_enter(&srv_monitor_file_mutex);
- rewind(srv_monitor_file);
- srv_printf_innodb_monitor(srv_monitor_file);
- os_file_set_eof(srv_monitor_file);
- mutex_exit(&srv_monitor_file_mutex);
- }
-
- if (srv_print_innodb_tablespace_monitor
- && difftime(current_time,
- last_tablespace_monitor_time) > 60) {
- last_tablespace_monitor_time = time(NULL);
-
- fputs("========================"
- "========================\n",
- stderr);
-
- ut_print_timestamp(stderr);
-
- fputs(" INNODB TABLESPACE MONITOR OUTPUT\n"
- "========================"
- "========================\n",
- stderr);
-
- fsp_print(0);
- fputs("Validating tablespace\n", stderr);
- fsp_validate(0);
- fputs("Validation ok\n"
- "---------------------------------------\n"
- "END OF INNODB TABLESPACE MONITOR OUTPUT\n"
- "=======================================\n",
- stderr);
- }
-
- if (srv_print_innodb_table_monitor
- && difftime(current_time, last_table_monitor_time) > 60) {
-
- last_table_monitor_time = time(NULL);
-
- fputs("===========================================\n",
- stderr);
-
- ut_print_timestamp(stderr);
-
- fputs(" INNODB TABLE MONITOR OUTPUT\n"
- "===========================================\n",
- stderr);
- dict_print();
-
- fputs("-----------------------------------\n"
- "END OF INNODB TABLE MONITOR OUTPUT\n"
- "==================================\n",
- stderr);
- }
- }
-
- mutex_enter(&kernel_mutex);
-
- some_waits = FALSE;
-
- /* Check of all slots if a thread is waiting there, and if it
- has exceeded the time limit */
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
-
- slot = srv_mysql_table + i;
-
- if (slot->in_use) {
- some_waits = TRUE;
-
- wait_time = ut_difftime(ut_time(), slot->suspend_time);
-
- if (srv_lock_wait_timeout < 100000000
- && (wait_time > (double) srv_lock_wait_timeout
- || wait_time < 0)) {
-
- /* Timeout exceeded or a wrap-around in system
- time counter: cancel the lock request queued
- by the transaction and release possible
- other transactions waiting behind; it is
- possible that the lock has already been
- granted: in that case do nothing */
-
- if (thr_get_trx(slot->thr)->wait_lock) {
- lock_cancel_waiting_and_release(
- thr_get_trx(slot->thr)
- ->wait_lock);
- }
- }
- }
- }
-
- os_event_reset(srv_lock_timeout_thread_event);
-
- mutex_exit(&kernel_mutex);
-
- if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
- goto exit_func;
- }
-
- if (some_waits || srv_print_innodb_monitor
- || srv_print_innodb_lock_monitor
- || srv_print_innodb_tablespace_monitor
- || srv_print_innodb_table_monitor) {
- goto loop;
- }
-
- /* No one was waiting for a lock and no monitor was active:
- suspend this thread */
-
- srv_lock_timeout_and_monitor_active = FALSE;
-
-#if 0
- /* The following synchronisation is disabled, since
- the InnoDB monitor output is to be updated every 15 seconds. */
- os_event_wait(srv_lock_timeout_thread_event);
-#endif
- goto loop;
-
-exit_func:
- srv_lock_timeout_and_monitor_active = FALSE;
-
- /* We count the number of threads in os_thread_exit(). A created
- thread should always use that to exit and not use return() to exit. */
-
- os_thread_exit(NULL);
-
- OS_THREAD_DUMMY_RETURN;
-}
-
-/*************************************************************************
-A thread which prints warnings about semaphore waits which have lasted
-too long. These can be used to track bugs which cause hangs.
-NOTE: This thread should not wait for any innodb mutexes or rw_locks.
-A deadlock could arise where the thread holding that lock requires waking
-by this background thread while this thread is blocked on that lock. */
-
-os_thread_ret_t
-srv_error_monitor_thread(
-/*=====================*/
- /* out: a dummy parameter */
- void* arg __attribute__((unused)))
- /* in: a dummy parameter required by
- os_thread_create */
-{
- /* number of successive fatal timeouts observed */
- ulint fatal_cnt = 0;
-
-#ifdef UNIV_DEBUG_THREAD_CREATION
- fprintf(stderr, "Error monitor thread starts, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
-#endif
-loop:
- srv_error_monitor_active = TRUE;
-
- if (difftime(time(NULL), srv_last_monitor_time) > 60) {
- /* We refresh InnoDB Monitor values so that averages are
- printed from at most 60 last seconds */
-
- srv_refresh_innodb_monitor_stats();
- }
-
- /* In case mutex_exit is not a memory barrier, it is
- theoretically possible some threads are left waiting though
- the semaphore is already released. Wake up those threads: */
-
- sync_arr_wake_threads_if_sema_free();
-
- if (sync_array_print_long_waits()) {
- fatal_cnt++;
- if (fatal_cnt > 10) {
-
- fprintf(stderr,
- "InnoDB: Error: semaphore wait has lasted"
- " > %lu seconds\n"
- "InnoDB: We intentionally crash the server,"
- " because it appears to be hung.\n",
- (ulong) srv_fatal_semaphore_wait_threshold);
-
- ut_error;
- }
- } else {
- fatal_cnt = 0;
- }
-
- /* Flush stderr so that a database user gets the output
- to possible MySQL error file */
-
- fflush(stderr);
-
- os_thread_sleep(1000000);
-
- if (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP) {
-
- goto loop;
- }
-
- srv_error_monitor_active = FALSE;
-
- /* We count the number of threads in os_thread_exit(). A created
- thread should always use that to exit and not use return() to exit. */
-
- os_thread_exit(NULL);
-
- OS_THREAD_DUMMY_RETURN;
-}
-
-/***********************************************************************
-Tells the InnoDB server that there has been activity in the database
-and wakes up the master thread if it is suspended (not sleeping). Used
-in the MySQL interface. Note that there is a small chance that the master
-thread stays suspended (we do not protect our operation with the kernel
-mutex, for performace reasons). */
-
-void
-srv_active_wake_master_thread(void)
-/*===============================*/
-{
- srv_activity_count++;
-
- if (srv_n_threads_active[SRV_MASTER] == 0) {
-
- mutex_enter(&kernel_mutex);
-
- srv_release_threads(SRV_MASTER, 1);
-
- mutex_exit(&kernel_mutex);
- }
-}
-
-/***********************************************************************
-Wakes up the master thread if it is suspended or being suspended. */
-
-void
-srv_wake_master_thread(void)
-/*========================*/
-{
- srv_activity_count++;
-
- mutex_enter(&kernel_mutex);
-
- srv_release_threads(SRV_MASTER, 1);
-
- mutex_exit(&kernel_mutex);
-}
-
-/*************************************************************************
-Returns the number of IO operations that is X percent of the capacity.
-
-PCT_IO(5) -> returns the number of IO operations that is 5% of the max
-where max is srv_io_capacity.
-*/
-#define PCT_IO(pct) ((ulint) (srv_io_capacity * ((double) pct / 100.0)))
-
-/*************************************************************************
-The master thread controlling the server. */
-
-os_thread_ret_t
-srv_master_thread(
-/*==============*/
- /* out: a dummy parameter */
- void* arg __attribute__((unused)))
- /* in: a dummy parameter required by
- os_thread_create */
-{
- os_event_t event;
- time_t last_flush_time;
- time_t current_time;
- ulint old_activity_count;
- ulint n_pages_purged;
- ulint n_bytes_merged;
- ulint n_pages_flushed;
- ulint n_bytes_archived;
- ulint n_tables_to_drop;
- ulint n_ios;
- ulint n_ios_old;
- ulint n_ios_very_old;
- ulint n_pend_ios;
- ibool skip_sleep = FALSE;
- ulint i;
-
-#ifdef UNIV_DEBUG_THREAD_CREATION
- fprintf(stderr, "Master thread starts, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
-#endif
- fprintf(stderr, "InnoDB master thread running with io_capacity %lu\n",
- srv_io_capacity);
-
- srv_main_thread_process_no = os_proc_get_number();
- srv_main_thread_id = os_thread_pf(os_thread_get_curr_id());
-
- srv_table_reserve_slot(SRV_MASTER);
-
- mutex_enter(&kernel_mutex);
-
- srv_n_threads_active[SRV_MASTER]++;
-
- mutex_exit(&kernel_mutex);
-
-loop:
- /*****************************************************************/
- /* ---- When there is database activity by users, we cycle in this
- loop */
-
- srv_main_thread_op_info = "reserving kernel mutex";
-
- n_ios_very_old = log_sys->n_log_ios + buf_pool->n_pages_read
- + buf_pool->n_pages_written;
- mutex_enter(&kernel_mutex);
-
- /* Store the user activity counter at the start of this loop */
- old_activity_count = srv_activity_count;
-
- mutex_exit(&kernel_mutex);
-
- if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) {
-
- goto suspend_thread;
- }
-
- /* ---- We run the following loop approximately once per second
- when there is database activity */
-
- skip_sleep = FALSE;
-
- for (i = 0; i < 10; i++) {
- n_ios_old = log_sys->n_log_ios + buf_pool->n_pages_read
- + buf_pool->n_pages_written;
- srv_main_thread_op_info = "sleeping";
- srv_main_1_second_loops++;
-
- if (!skip_sleep) {
-
- os_thread_sleep(1000000);
- srv_main_sleeps++;
- }
-
- skip_sleep = FALSE;
-
- /* ALTER TABLE in MySQL requires on Unix that the table handler
- can drop tables lazily after there no longer are SELECT
- queries to them. */
-
- srv_main_thread_op_info = "doing background drop tables";
-
- row_drop_tables_for_mysql_in_background();
-
- srv_main_thread_op_info = "";
-
- if (srv_fast_shutdown && srv_shutdown_state > 0) {
-
- goto background_loop;
- }
-
- /* We flush the log once in a second even if no commit
- is issued or the we have specified in my.cnf no flush
- at transaction commit */
-
- srv_main_thread_op_info = "flushing log";
- log_buffer_flush_to_disk();
- srv_sync_flush++;
-
- srv_main_thread_op_info = "making checkpoint";
- log_free_check();
-
- /* If i/os during one second sleep were less than 5% of
- capacity, we assume that there is free disk i/o capacity
- available, and it makes sense to do an insert buffer merge. */
-
- n_pend_ios = buf_get_n_pending_ios()
- + log_sys->n_pending_writes;
- n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
- + buf_pool->n_pages_written;
- if (n_pend_ios < PCT_IO(3) && (n_ios - n_ios_old < PCT_IO(5))) {
- srv_main_thread_op_info = "doing insert buffer merge";
- ibuf_contract_for_n_pages(TRUE, PCT_IO(20) / 4);
-
- srv_main_thread_op_info = "flushing log";
-
- /* No fsync when srv_flush_log_at_trx_commit != 1 */
- log_buffer_flush_maybe_sync();
- srv_async_flush++;
- }
-
- if (UNIV_UNLIKELY(buf_get_modified_ratio_pct()
- > srv_max_buf_pool_modified_pct)) {
-
- /* Try to keep the number of modified pages in the
- buffer pool under the limit wished by the user */
-
- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
- PCT_IO(100),
- ut_dulint_max);
-
- /* If we had to do the flush, it may have taken
- even more than 1 second, and also, there may be more
- to flush. Do not sleep 1 second during the next
- iteration of this loop. */
-
- skip_sleep = TRUE;
- }
-
- if (srv_activity_count == old_activity_count) {
-
- /* There is no user activity at the moment, go to
- the background loop */
-
- goto background_loop;
- }
- }
-
- /* ---- We perform the following code approximately once per
- 10 seconds when there is database activity */
-
-#ifdef MEM_PERIODIC_CHECK
- /* Check magic numbers of every allocated mem block once in 10
- seconds */
- mem_validate_all_blocks();
-#endif
- /* If i/os during the 10 second period were less than 200% of
- capacity, we assume that there is free disk i/o capacity
- available, and it makes sense to flush srv_io_capacity pages.
-
- Note that this is done regardless of the fraction of dirty
- pages relative to the max requested by the user. The one second
- loop above requests writes for that case. The writes done here
- are not required, and may be disabled. */
-
- n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes;
- n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
- + buf_pool->n_pages_written;
- if (srv_extra_dirty_writes &&
- n_pend_ios < 3 && (n_ios - n_ios_very_old < PCT_IO(200))) {
-
- srv_main_thread_op_info = "flushing buffer pool pages";
- buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), ut_dulint_max);
-
- srv_main_thread_op_info = "flushing log";
- /* No fsync when srv_flush_log_at_trx_commit != 1 */
- log_buffer_flush_maybe_sync();
- srv_async_flush++;
- }
-
- /* We run a batch of insert buffer merge every 10 seconds,
- even if the server were active */
-
- srv_main_thread_op_info = "doing insert buffer merge";
- ibuf_contract_for_n_pages(TRUE, PCT_IO(20) / 4);
-
- srv_main_thread_op_info = "flushing log";
- /* No fsync when srv_flush_log_at_trx_commit != 1 */
- log_buffer_flush_maybe_sync();
- srv_async_flush++;
-
- /* We run a full purge every 10 seconds, even if the server
- were active */
-
- n_pages_purged = 1;
-
- last_flush_time = time(NULL);
-
- while (n_pages_purged) {
-
- if (srv_fast_shutdown && srv_shutdown_state > 0) {
-
- goto background_loop;
- }
-
- srv_main_thread_op_info = "purging";
- n_pages_purged = trx_purge();
-
- current_time = time(NULL);
-
- if (difftime(current_time, last_flush_time) > 1) {
- srv_main_thread_op_info = "flushing log";
-
- log_buffer_flush_to_disk();
- last_flush_time = current_time;
- srv_sync_flush++;
- }
- }
-
- srv_main_thread_op_info = "flushing buffer pool pages";
-
- /* Flush a few oldest pages to make a new checkpoint younger */
-
- if (buf_get_modified_ratio_pct() > 70) {
-
- /* If there are lots of modified pages in the buffer pool
- (> 70 %), we assume we can afford reserving the disk(s) for
- the time it requires to flush 100 pages */
-
- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
- PCT_IO(100),
- ut_dulint_max);
- } else {
- /* Otherwise, we only flush a small number of pages so that
- we do not unnecessarily use much disk i/o capacity from
- other work */
-
- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
- PCT_IO(10),
- ut_dulint_max);
- }
-
- srv_main_thread_op_info = "making checkpoint";
-
- /* Make a new checkpoint about once in 10 seconds */
-
- log_checkpoint(TRUE, FALSE);
-
- srv_main_thread_op_info = "reserving kernel mutex";
-
- mutex_enter(&kernel_mutex);
-
- /* ---- When there is database activity, we jump from here back to
- the start of loop */
-
- if (srv_activity_count != old_activity_count) {
- mutex_exit(&kernel_mutex);
- goto loop;
- }
-
- mutex_exit(&kernel_mutex);
-
- /* If the database is quiet, we enter the background loop */
-
- /*****************************************************************/
-background_loop:
- /* ---- In this loop we run background operations when the server
- is quiet from user activity. Also in the case of a shutdown, we
- loop here, flushing the buffer pool to the data files. */
-
- /* The server has been quiet for a while: start running background
- operations */
- srv_main_background_loops++;
- srv_main_thread_op_info = "doing background drop tables";
-
- n_tables_to_drop = row_drop_tables_for_mysql_in_background();
-
- if (n_tables_to_drop > 0) {
- /* Do not monopolize the CPU even if there are tables waiting
- in the background drop queue. (It is essentially a bug if
- MySQL tries to drop a table while there are still open handles
- to it and we had to put it to the background drop queue.) */
-
- os_thread_sleep(100000);
- }
-
- srv_main_thread_op_info = "purging";
-
- /* Run a full purge */
-
- n_pages_purged = 1;
-
- last_flush_time = time(NULL);
-
- while (n_pages_purged) {
- if (srv_fast_shutdown && srv_shutdown_state > 0) {
-
- break;
- }
-
- srv_main_thread_op_info = "purging";
- n_pages_purged = trx_purge();
-
- current_time = time(NULL);
-
- if (difftime(current_time, last_flush_time) > 1) {
- srv_main_thread_op_info = "flushing log";
-
- log_buffer_flush_to_disk();
- last_flush_time = current_time;
- srv_sync_flush++;
- }
- }
-
- srv_main_thread_op_info = "reserving kernel mutex";
-
- mutex_enter(&kernel_mutex);
- if (srv_activity_count != old_activity_count) {
- mutex_exit(&kernel_mutex);
- goto loop;
- }
- mutex_exit(&kernel_mutex);
-
- srv_main_thread_op_info = "doing insert buffer merge";
-
- if (srv_fast_shutdown && srv_shutdown_state > 0) {
- n_bytes_merged = 0;
- } else {
- /* This should do an amount of IO similar to the number of
- * dirty pages that will be flushed in the call to
- * buf_flush_batch below. Otherwise, the system favors
- * clean pages over cleanup throughput. */
- n_bytes_merged = ibuf_contract_for_n_pages(TRUE, PCT_IO(100));
- }
-
- srv_main_thread_op_info = "reserving kernel mutex";
-
- mutex_enter(&kernel_mutex);
- if (srv_activity_count != old_activity_count) {
- mutex_exit(&kernel_mutex);
- goto loop;
- }
- mutex_exit(&kernel_mutex);
-
-flush_loop:
- srv_main_thread_op_info = "flushing buffer pool pages";
- srv_main_flush_loops++;
- if (srv_fast_shutdown < 2) {
- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
- PCT_IO(100),
- ut_dulint_max);
- } else {
- /* In the fastest shutdown we do not flush the buffer pool
- to data files: we set n_pages_flushed to 0 artificially. */
-
- n_pages_flushed = 0;
- }
-
- srv_main_thread_op_info = "reserving kernel mutex";
-
- mutex_enter(&kernel_mutex);
- if (srv_activity_count != old_activity_count) {
- mutex_exit(&kernel_mutex);
- goto loop;
- }
- mutex_exit(&kernel_mutex);
-
- srv_main_thread_op_info = "waiting for buffer pool flush to end";
- buf_flush_wait_batch_end(BUF_FLUSH_LIST);
-
- srv_main_thread_op_info = "flushing log";
-
- current_time = time(NULL);
- if (difftime(current_time, last_flush_time) > 1) {
- srv_main_thread_op_info = (char*) "flushing log";
- log_buffer_flush_to_disk();
- last_flush_time = current_time;
- srv_sync_flush++;
- } else {
- /* No fsync when srv_flush_log_at_trx_commit != 1 */
- log_buffer_flush_maybe_sync();
- srv_async_flush++;
- }
-
- srv_main_thread_op_info = "making checkpoint";
-
- log_checkpoint(TRUE, FALSE);
-
- if (buf_get_modified_ratio_pct() > srv_max_buf_pool_modified_pct) {
-
- /* Try to keep the number of modified pages in the
- buffer pool under the limit wished by the user */
-
- goto flush_loop;
- }
-
- srv_main_thread_op_info = "reserving kernel mutex";
-
- mutex_enter(&kernel_mutex);
- if (srv_activity_count != old_activity_count) {
- mutex_exit(&kernel_mutex);
- goto loop;
- }
- mutex_exit(&kernel_mutex);
- /*
- srv_main_thread_op_info = "archiving log (if log archive is on)";
-
- log_archive_do(FALSE, &n_bytes_archived);
- */
- n_bytes_archived = 0;
-
- /* Keep looping in the background loop if still work to do */
-
- if (srv_fast_shutdown && srv_shutdown_state > 0) {
- if (n_tables_to_drop + n_pages_flushed
- + n_bytes_archived != 0) {
-
- /* If we are doing a fast shutdown (= the default)
- we do not do purge or insert buffer merge. But we
- flush the buffer pool completely to disk.
- In a 'very fast' shutdown we do not flush the buffer
- pool to data files: we have set n_pages_flushed to
- 0 artificially. */
-
- goto background_loop;
- }
- } else if (n_tables_to_drop
- + n_pages_purged + n_bytes_merged + n_pages_flushed
- + n_bytes_archived != 0) {
- /* In a 'slow' shutdown we run purge and the insert buffer
- merge to completion */
-
- goto background_loop;
- }
-
- /* There is no work for background operations either: suspend
- master thread to wait for more server activity */
-
-suspend_thread:
- srv_main_thread_op_info = "suspending";
-
- mutex_enter(&kernel_mutex);
-
- if (row_get_background_drop_list_len_low() > 0) {
- mutex_exit(&kernel_mutex);
-
- goto loop;
- }
-
- event = srv_suspend_thread();
-
- mutex_exit(&kernel_mutex);
-
- /* DO NOT CHANGE THIS STRING. innobase_start_or_create_for_mysql()
- waits for database activity to die down when converting < 4.1.x
- databases, and relies on this string being exactly as it is. InnoDB
- manual also mentions this string in several places. */
- srv_main_thread_op_info = "waiting for server activity";
-
- os_event_wait(event);
-
- if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
- /* This is only extra safety, the thread should exit
- already when the event wait ends */
-
- os_thread_exit(NULL);
- }
-
- /* When there is user activity, InnoDB will set the event and the
- main thread goes back to loop. */
-
- goto loop;
-
- OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c
deleted file mode 100644
index 0b63d1a0b86..00000000000
--- a/storage/innobase/srv/srv0start.c
+++ /dev/null
@@ -1,2027 +0,0 @@
-/************************************************************************
-Starts the InnoDB database server
-
-(c) 1996-2000 Innobase Oy
-
-Created 2/16/1996 Heikki Tuuri
-*************************************************************************/
-
-#include "os0proc.h"
-#include "sync0sync.h"
-#include "ut0mem.h"
-#include "mem0mem.h"
-#include "mem0pool.h"
-#include "data0data.h"
-#include "data0type.h"
-#include "dict0dict.h"
-#include "buf0buf.h"
-#include "buf0flu.h"
-#include "buf0rea.h"
-#include "os0file.h"
-#include "os0thread.h"
-#include "fil0fil.h"
-#include "fsp0fsp.h"
-#include "rem0rec.h"
-#include "rem0cmp.h"
-#include "mtr0mtr.h"
-#include "log0log.h"
-#include "log0recv.h"
-#include "page0page.h"
-#include "page0cur.h"
-#include "trx0trx.h"
-#include "dict0boot.h"
-#include "dict0load.h"
-#include "trx0sys.h"
-#include "dict0crea.h"
-#include "btr0btr.h"
-#include "btr0pcur.h"
-#include "btr0cur.h"
-#include "btr0sea.h"
-#include "rem0rec.h"
-#include "srv0srv.h"
-#include "que0que.h"
-#include "usr0sess.h"
-#include "lock0lock.h"
-#include "trx0roll.h"
-#include "trx0purge.h"
-#include "row0ins.h"
-#include "row0sel.h"
-#include "row0upd.h"
-#include "row0row.h"
-#include "row0mysql.h"
-#include "lock0lock.h"
-#include "ibuf0ibuf.h"
-#include "pars0pars.h"
-#include "btr0sea.h"
-#include "srv0start.h"
-#include "que0que.h"
-
-/* Log sequence number immediately after startup */
-dulint srv_start_lsn;
-/* Log sequence number at shutdown */
-dulint srv_shutdown_lsn;
-
-#ifdef HAVE_DARWIN_THREADS
-# include <sys/utsname.h>
-ibool srv_have_fullfsync = FALSE;
-#endif
-
-ibool srv_start_raw_disk_in_use = FALSE;
-
-ulint srv_sizeof_trx_t_in_ha_innodb_cc;
-
-ibool srv_startup_is_before_trx_rollback_phase = FALSE;
-ibool srv_is_being_started = FALSE;
-#ifndef UNIV_HOTBACKUP
-static ibool srv_start_has_been_called = FALSE;
-static ibool srv_was_started = FALSE;
-#endif /* !UNIV_HOTBACKUP */
-
-/* At a shutdown the value first climbs to SRV_SHUTDOWN_CLEANUP
-and then to SRV_SHUTDOWN_LAST_PHASE */
-ulint srv_shutdown_state = 0;
-
-#ifndef UNIV_HOTBACKUP
-static os_file_t files[1000];
-
-static mutex_t ios_mutex;
-static ulint ios;
-
-static ulint n[SRV_MAX_N_IO_THREADS + 5];
-static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 5];
-
-/* We use this mutex to test the return value of pthread_mutex_trylock
- on successful locking. HP-UX does NOT return 0, though Linux et al do. */
-static os_fast_mutex_t srv_os_test_mutex;
-
-/* Name of srv_monitor_file */
-static char* srv_monitor_file_name;
-#endif /* !UNIV_HOTBACKUP */
-
-#define SRV_N_PENDING_IOS_PER_THREAD OS_AIO_N_PENDING_IOS_PER_THREAD
-#define SRV_MAX_N_PENDING_SYNC_IOS 100
-
-
-/* Avoid warnings when using purify */
-
-#ifdef HAVE_purify
-static int inno_bcmp(register const char *s1, register const char *s2,
- register uint len)
-{
- while ((len-- != 0) && (*s1++ == *s2++))
- ;
-
- return(len + 1);
-}
-#define memcmp(A,B,C) inno_bcmp((A),(B),(C))
-#endif
-
-static
-char*
-srv_parse_megabytes(
-/*================*/
- /* out: next character in string */
- char* str, /* in: string containing a quantity in bytes */
- ulint* megs) /* out: the number in megabytes */
-{
- char* endp;
- ulint size;
-
- size = strtoul(str, &endp, 10);
-
- str = endp;
-
- switch (*str) {
- case 'G': case 'g':
- size *= 1024;
- /* fall through */
- case 'M': case 'm':
- str++;
- break;
- default:
- size /= 1024 * 1024;
- break;
- }
-
- *megs = size;
- return(str);
-}
-
-/*************************************************************************
-Reads the data files and their sizes from a character string given in
-the .cnf file. */
-
-ibool
-srv_parse_data_file_paths_and_sizes(
-/*================================*/
- /* out: TRUE if ok, FALSE if parsing
- error */
- char* str, /* in: the data file path string */
- char*** data_file_names, /* out, own: array of data file
- names */
- ulint** data_file_sizes, /* out, own: array of data file sizes
- in megabytes */
- ulint** data_file_is_raw_partition,/* out, own: array of flags
- showing which data files are raw
- partitions */
- ulint* n_data_files, /* out: number of data files */
- ibool* is_auto_extending, /* out: TRUE if the last data file is
- auto-extending */
- ulint* max_auto_extend_size) /* out: max auto extend size for the
- last file if specified, 0 if not */
-{
- char* input_str;
- char* path;
- ulint size;
- ulint i = 0;
-
- *is_auto_extending = FALSE;
- *max_auto_extend_size = 0;
-
- input_str = str;
-
- /* First calculate the number of data files and check syntax:
- path:size[M | G];path:size[M | G]... . Note that a Windows path may
- contain a drive name and a ':'. */
-
- while (*str != '\0') {
- path = str;
-
- while ((*str != ':' && *str != '\0')
- || (*str == ':'
- && (*(str + 1) == '\\' || *(str + 1) == '/'
- || *(str + 1) == ':'))) {
- str++;
- }
-
- if (*str == '\0') {
- return(FALSE);
- }
-
- str++;
-
- str = srv_parse_megabytes(str, &size);
-
- if (0 == strncmp(str, ":autoextend",
- (sizeof ":autoextend") - 1)) {
-
- str += (sizeof ":autoextend") - 1;
-
- if (0 == strncmp(str, ":max:",
- (sizeof ":max:") - 1)) {
-
- str += (sizeof ":max:") - 1;
-
- str = srv_parse_megabytes(str, &size);
- }
-
- if (*str != '\0') {
-
- return(FALSE);
- }
- }
-
- if (strlen(str) >= 6
- && *str == 'n'
- && *(str + 1) == 'e'
- && *(str + 2) == 'w') {
- str += 3;
- }
-
- if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
- str += 3;
- }
-
- if (size == 0) {
- return(FALSE);
- }
-
- i++;
-
- if (*str == ';') {
- str++;
- } else if (*str != '\0') {
-
- return(FALSE);
- }
- }
-
- if (i == 0) {
- /* If innodb_data_file_path was defined it must contain
- at least one data file definition */
-
- return(FALSE);
- }
-
- *data_file_names = (char**)ut_malloc(i * sizeof(void*));
- *data_file_sizes = (ulint*)ut_malloc(i * sizeof(ulint));
- *data_file_is_raw_partition = (ulint*)ut_malloc(i * sizeof(ulint));
-
- *n_data_files = i;
-
- /* Then store the actual values to our arrays */
-
- str = input_str;
- i = 0;
-
- while (*str != '\0') {
- path = str;
-
- /* Note that we must step over the ':' in a Windows path;
- a Windows path normally looks like C:\ibdata\ibdata1:1G, but
- a Windows raw partition may have a specification like
- \\.\C::1Gnewraw or \\.\PHYSICALDRIVE2:1Gnewraw */
-
- while ((*str != ':' && *str != '\0')
- || (*str == ':'
- && (*(str + 1) == '\\' || *(str + 1) == '/'
- || *(str + 1) == ':'))) {
- str++;
- }
-
- if (*str == ':') {
- /* Make path a null-terminated string */
- *str = '\0';
- str++;
- }
-
- str = srv_parse_megabytes(str, &size);
-
- (*data_file_names)[i] = path;
- (*data_file_sizes)[i] = size;
-
- if (0 == strncmp(str, ":autoextend",
- (sizeof ":autoextend") - 1)) {
-
- *is_auto_extending = TRUE;
-
- str += (sizeof ":autoextend") - 1;
-
- if (0 == strncmp(str, ":max:",
- (sizeof ":max:") - 1)) {
-
- str += (sizeof ":max:") - 1;
-
- str = srv_parse_megabytes(
- str, max_auto_extend_size);
- }
-
- if (*str != '\0') {
-
- return(FALSE);
- }
- }
-
- (*data_file_is_raw_partition)[i] = 0;
-
- if (strlen(str) >= 6
- && *str == 'n'
- && *(str + 1) == 'e'
- && *(str + 2) == 'w') {
- str += 3;
- (*data_file_is_raw_partition)[i] = SRV_NEW_RAW;
- }
-
- if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
- str += 3;
-
- if ((*data_file_is_raw_partition)[i] == 0) {
- (*data_file_is_raw_partition)[i] = SRV_OLD_RAW;
- }
- }
-
- i++;
-
- if (*str == ';') {
- str++;
- }
- }
-
- return(TRUE);
-}
-
-/*************************************************************************
-Reads log group home directories from a character string given in
-the .cnf file. */
-
-ibool
-srv_parse_log_group_home_dirs(
-/*==========================*/
- /* out: TRUE if ok, FALSE if parsing
- error */
- char* str, /* in: character string */
- char*** log_group_home_dirs) /* out, own: log group home dirs */
-{
- char* input_str;
- char* path;
- ulint i = 0;
-
- input_str = str;
-
- /* First calculate the number of directories and check syntax:
- path;path;... */
-
- while (*str != '\0') {
- path = str;
-
- while (*str != ';' && *str != '\0') {
- str++;
- }
-
- i++;
-
- if (*str == ';') {
- str++;
- } else if (*str != '\0') {
-
- return(FALSE);
- }
- }
-
- if (i != 1) {
- /* If innodb_log_group_home_dir was defined it must
- contain exactly one path definition under current MySQL */
-
- return(FALSE);
- }
-
- *log_group_home_dirs = (char**) ut_malloc(i * sizeof(void*));
-
- /* Then store the actual values to our array */
-
- str = input_str;
- i = 0;
-
- while (*str != '\0') {
- path = str;
-
- while (*str != ';' && *str != '\0') {
- str++;
- }
-
- if (*str == ';') {
- *str = '\0';
- str++;
- }
-
- (*log_group_home_dirs)[i] = path;
-
- i++;
- }
-
- return(TRUE);
-}
-
-#ifndef UNIV_HOTBACKUP
-/************************************************************************
-I/o-handler thread function. */
-static
-
-os_thread_ret_t
-io_handler_thread(
-/*==============*/
- void* arg)
-{
- ulint segment;
- ulint i;
-
- segment = *((ulint*)arg);
-
-#ifdef UNIV_DEBUG_THREAD_CREATION
- fprintf(stderr, "Io handler thread %lu starts, id %lu\n", segment,
- os_thread_pf(os_thread_get_curr_id()));
-#endif
- for (i = 0;; i++) {
- fil_aio_wait(segment);
-
- mutex_enter(&ios_mutex);
- ios++;
- mutex_exit(&ios_mutex);
- }
-
- /* We count the number of threads in os_thread_exit(). A created
- thread should always use that to exit and not use return() to exit.
- The thread actually never comes here because it is exited in an
- os_event_wait(). */
-
- os_thread_exit(NULL);
-
- OS_THREAD_DUMMY_RETURN;
-}
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef __WIN__
-#define SRV_PATH_SEPARATOR '\\'
-#else
-#define SRV_PATH_SEPARATOR '/'
-#endif
-
-/*************************************************************************
-Normalizes a directory path for Windows: converts slashes to backslashes. */
-
-void
-srv_normalize_path_for_win(
-/*=======================*/
- char* str __attribute__((unused))) /* in/out: null-terminated
- character string */
-{
-#ifdef __WIN__
- for (; *str; str++) {
-
- if (*str == '/') {
- *str = '\\';
- }
- }
-#endif
-}
-
-/*************************************************************************
-Adds a slash or a backslash to the end of a string if it is missing
-and the string is not empty. */
-
-char*
-srv_add_path_separator_if_needed(
-/*=============================*/
- /* out: string which has the separator if the
- string is not empty */
- char* str) /* in: null-terminated character string */
-{
- char* out_str;
- ulint len = ut_strlen(str);
-
- if (len == 0 || str[len - 1] == SRV_PATH_SEPARATOR) {
-
- return(str);
- }
-
- out_str = ut_malloc(len + 2);
- memcpy(out_str, str, len);
- out_str[len] = SRV_PATH_SEPARATOR;
- out_str[len + 1] = 0;
-
- return(out_str);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*************************************************************************
-Calculates the low 32 bits when a file size which is given as a number
-database pages is converted to the number of bytes. */
-static
-ulint
-srv_calc_low32(
-/*===========*/
- /* out: low 32 bytes of file size when
- expressed in bytes */
- ulint file_size) /* in: file size in database pages */
-{
- return(0xFFFFFFFFUL & (file_size << UNIV_PAGE_SIZE_SHIFT));
-}
-
-/*************************************************************************
-Calculates the high 32 bits when a file size which is given as a number
-database pages is converted to the number of bytes. */
-static
-ulint
-srv_calc_high32(
-/*============*/
- /* out: high 32 bytes of file size when
- expressed in bytes */
- ulint file_size) /* in: file size in database pages */
-{
- return(file_size >> (32 - UNIV_PAGE_SIZE_SHIFT));
-}
-
-/*************************************************************************
-Creates or opens the log files and closes them. */
-static
-ulint
-open_or_create_log_file(
-/*====================*/
- /* out: DB_SUCCESS or error code */
- ibool create_new_db, /* in: TRUE if we should create a
- new database */
- ibool* log_file_created, /* out: TRUE if new log file
- created */
- ibool log_file_has_been_opened,/* in: TRUE if a log file has been
- opened before: then it is an error
- to try to create another log file */
- ulint k, /* in: log group number */
- ulint i) /* in: log file number in group */
-{
- ibool ret;
- ulint size;
- ulint size_high;
- char name[10000];
-
- UT_NOT_USED(create_new_db);
-
- *log_file_created = FALSE;
-
- srv_normalize_path_for_win(srv_log_group_home_dirs[k]);
- srv_log_group_home_dirs[k] = srv_add_path_separator_if_needed(
- srv_log_group_home_dirs[k]);
-
- ut_a(strlen(srv_log_group_home_dirs[k])
- < (sizeof name) - 10 - sizeof "ib_logfile");
- sprintf(name, "%s%s%lu", srv_log_group_home_dirs[k],
- "ib_logfile", (ulong) i);
-
- files[i] = os_file_create(name, OS_FILE_CREATE, OS_FILE_NORMAL,
- OS_LOG_FILE, &ret);
- if (ret == FALSE) {
- if (os_file_get_last_error(FALSE) != OS_FILE_ALREADY_EXISTS
-#ifdef UNIV_AIX
- /* AIX 5.1 after security patch ML7 may have errno set
- to 0 here, which causes our function to return 100;
- work around that AIX problem */
- && os_file_get_last_error(FALSE) != 100
-#endif
- ) {
- fprintf(stderr,
- "InnoDB: Error in creating"
- " or opening %s\n", name);
-
- return(DB_ERROR);
- }
-
- files[i] = os_file_create(name, OS_FILE_OPEN, OS_FILE_AIO,
- OS_LOG_FILE, &ret);
- if (!ret) {
- fprintf(stderr,
- "InnoDB: Error in opening %s\n", name);
-
- return(DB_ERROR);
- }
-
- ret = os_file_get_size(files[i], &size, &size_high);
- ut_a(ret);
-
- if (size != srv_calc_low32(srv_log_file_size)
- || size_high != srv_calc_high32(srv_log_file_size)) {
-
- fprintf(stderr,
- "InnoDB: Error: log file %s is"
- " of different size %lu %lu bytes\n"
- "InnoDB: than specified in the .cnf"
- " file %lu %lu bytes!\n",
- name, (ulong) size_high, (ulong) size,
- (ulong) srv_calc_high32(srv_log_file_size),
- (ulong) srv_calc_low32(srv_log_file_size));
-
- return(DB_ERROR);
- }
- } else {
- *log_file_created = TRUE;
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Log file %s did not exist:"
- " new to be created\n",
- name);
- if (log_file_has_been_opened) {
-
- return(DB_ERROR);
- }
-
- fprintf(stderr, "InnoDB: Setting log file %s size to %lu MB\n",
- name, (ulong) srv_log_file_size
- >> (20 - UNIV_PAGE_SIZE_SHIFT));
-
- fprintf(stderr,
- "InnoDB: Database physically writes the file"
- " full: wait...\n");
-
- ret = os_file_set_size(name, files[i],
- srv_calc_low32(srv_log_file_size),
- srv_calc_high32(srv_log_file_size));
- if (!ret) {
- fprintf(stderr,
- "InnoDB: Error in creating %s:"
- " probably out of disk space\n",
- name);
-
- return(DB_ERROR);
- }
- }
-
- ret = os_file_close(files[i]);
- ut_a(ret);
-
- if (i == 0) {
- /* Create in memory the file space object
- which is for this log group */
-
- fil_space_create(name,
- 2 * k + SRV_LOG_SPACE_FIRST_ID, FIL_LOG);
- }
-
- ut_a(fil_validate());
-
- fil_node_create(name, srv_log_file_size,
- 2 * k + SRV_LOG_SPACE_FIRST_ID, FALSE);
-#ifdef UNIV_LOG_ARCHIVE
- /* If this is the first log group, create the file space object
- for archived logs.
- Under MySQL, no archiving ever done. */
-
- if (k == 0 && i == 0) {
- arch_space_id = 2 * k + 1 + SRV_LOG_SPACE_FIRST_ID;
-
- fil_space_create("arch_log_space", arch_space_id, FIL_LOG);
- } else {
- arch_space_id = ULINT_UNDEFINED;
- }
-#endif /* UNIV_LOG_ARCHIVE */
- if (i == 0) {
- log_group_init(k, srv_n_log_files,
- srv_log_file_size * UNIV_PAGE_SIZE,
- 2 * k + SRV_LOG_SPACE_FIRST_ID,
- SRV_LOG_SPACE_FIRST_ID + 1); /* dummy arch
- space id */
- }
-
- return(DB_SUCCESS);
-}
-
-/*************************************************************************
-Creates or opens database data files and closes them. */
-static
-ulint
-open_or_create_data_files(
-/*======================*/
- /* out: DB_SUCCESS or error code */
- ibool* create_new_db, /* out: TRUE if new database should be
- created */
-#ifdef UNIV_LOG_ARCHIVE
- ulint* min_arch_log_no,/* out: min of archived log numbers in data
- files */
- ulint* max_arch_log_no,/* out: */
-#endif /* UNIV_LOG_ARCHIVE */
- dulint* min_flushed_lsn,/* out: min of flushed lsn values in data
- files */
- dulint* max_flushed_lsn,/* out: */
- ulint* sum_of_new_sizes)/* out: sum of sizes of the new files added */
-{
- ibool ret;
- ulint i;
- ibool one_opened = FALSE;
- ibool one_created = FALSE;
- ulint size;
- ulint size_high;
- ulint rounded_size_pages;
- char name[10000];
-
- if (srv_n_data_files >= 1000) {
- fprintf(stderr, "InnoDB: can only have < 1000 data files\n"
- "InnoDB: you have defined %lu\n",
- (ulong) srv_n_data_files);
- return(DB_ERROR);
- }
-
- *sum_of_new_sizes = 0;
-
- *create_new_db = FALSE;
-
- srv_normalize_path_for_win(srv_data_home);
- srv_data_home = srv_add_path_separator_if_needed(srv_data_home);
-
- for (i = 0; i < srv_n_data_files; i++) {
- srv_normalize_path_for_win(srv_data_file_names[i]);
-
- ut_a(strlen(srv_data_home) + strlen(srv_data_file_names[i])
- < (sizeof name) - 1);
- sprintf(name, "%s%s", srv_data_home, srv_data_file_names[i]);
-
- if (srv_data_file_is_raw_partition[i] == 0) {
-
- /* First we try to create the file: if it already
- exists, ret will get value FALSE */
-
- files[i] = os_file_create(name, OS_FILE_CREATE,
- OS_FILE_NORMAL,
- OS_DATA_FILE, &ret);
-
- if (ret == FALSE && os_file_get_last_error(FALSE)
- != OS_FILE_ALREADY_EXISTS
-#ifdef UNIV_AIX
- /* AIX 5.1 after security patch ML7 may have
- errno set to 0 here, which causes our function
- to return 100; work around that AIX problem */
- && os_file_get_last_error(FALSE) != 100
-#endif
- ) {
- fprintf(stderr,
- "InnoDB: Error in creating"
- " or opening %s\n",
- name);
-
- return(DB_ERROR);
- }
- } else if (srv_data_file_is_raw_partition[i] == SRV_NEW_RAW) {
- /* The partition is opened, not created; then it is
- written over */
-
- srv_start_raw_disk_in_use = TRUE;
- srv_created_new_raw = TRUE;
-
- files[i] = os_file_create(name, OS_FILE_OPEN_RAW,
- OS_FILE_NORMAL,
- OS_DATA_FILE, &ret);
- if (!ret) {
- fprintf(stderr,
- "InnoDB: Error in opening %s\n", name);
-
- return(DB_ERROR);
- }
- } else if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
- srv_start_raw_disk_in_use = TRUE;
-
- ret = FALSE;
- } else {
- ut_a(0);
- }
-
- if (ret == FALSE) {
- /* We open the data file */
-
- if (one_created) {
- fprintf(stderr,
- "InnoDB: Error: data files can only"
- " be added at the end\n");
- fprintf(stderr,
- "InnoDB: of a tablespace, but"
- " data file %s existed beforehand.\n",
- name);
- return(DB_ERROR);
- }
-
- if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
- files[i] = os_file_create(
- name, OS_FILE_OPEN_RAW,
- OS_FILE_NORMAL, OS_DATA_FILE, &ret);
- } else if (i == 0) {
- files[i] = os_file_create(
- name, OS_FILE_OPEN_RETRY,
- OS_FILE_NORMAL, OS_DATA_FILE, &ret);
- } else {
- files[i] = os_file_create(
- name, OS_FILE_OPEN, OS_FILE_NORMAL,
- OS_DATA_FILE, &ret);
- }
-
- if (!ret) {
- fprintf(stderr,
- "InnoDB: Error in opening %s\n", name);
- os_file_get_last_error(TRUE);
-
- return(DB_ERROR);
- }
-
- if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
-
- goto skip_size_check;
- }
-
- ret = os_file_get_size(files[i], &size, &size_high);
- ut_a(ret);
- /* Round size downward to megabytes */
-
- rounded_size_pages
- = (size / (1024 * 1024) + 4096 * size_high)
- << (20 - UNIV_PAGE_SIZE_SHIFT);
-
- if (i == srv_n_data_files - 1
- && srv_auto_extend_last_data_file) {
-
- if (srv_data_file_sizes[i] > rounded_size_pages
- || (srv_last_file_size_max > 0
- && srv_last_file_size_max
- < rounded_size_pages)) {
-
- fprintf(stderr,
- "InnoDB: Error: auto-extending"
- " data file %s is"
- " of a different size\n"
- "InnoDB: %lu pages (rounded"
- " down to MB) than specified"
- " in the .cnf file:\n"
- "InnoDB: initial %lu pages,"
- " max %lu (relevant if"
- " non-zero) pages!\n",
- name,
- (ulong) rounded_size_pages,
- (ulong) srv_data_file_sizes[i],
- (ulong)
- srv_last_file_size_max);
-
- return(DB_ERROR);
- }
-
- srv_data_file_sizes[i] = rounded_size_pages;
- }
-
- if (rounded_size_pages != srv_data_file_sizes[i]) {
-
- fprintf(stderr,
- "InnoDB: Error: data file %s"
- " is of a different size\n"
- "InnoDB: %lu pages"
- " (rounded down to MB)\n"
- "InnoDB: than specified"
- " in the .cnf file %lu pages!\n",
- name,
- (ulong) rounded_size_pages,
- (ulong) srv_data_file_sizes[i]);
-
- return(DB_ERROR);
- }
-skip_size_check:
- fil_read_flushed_lsn_and_arch_log_no(
- files[i], one_opened,
-#ifdef UNIV_LOG_ARCHIVE
- min_arch_log_no, max_arch_log_no,
-#endif /* UNIV_LOG_ARCHIVE */
- min_flushed_lsn, max_flushed_lsn);
- one_opened = TRUE;
- } else {
- /* We created the data file and now write it full of
- zeros */
-
- one_created = TRUE;
-
- if (i > 0) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Data file %s did not"
- " exist: new to be created\n",
- name);
- } else {
- fprintf(stderr,
- "InnoDB: The first specified"
- " data file %s did not exist:\n"
- "InnoDB: a new database"
- " to be created!\n", name);
- *create_new_db = TRUE;
- }
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Setting file %s size to %lu MB\n",
- name,
- (ulong) (srv_data_file_sizes[i]
- >> (20 - UNIV_PAGE_SIZE_SHIFT)));
-
- fprintf(stderr,
- "InnoDB: Database physically writes the"
- " file full: wait...\n");
-
- ret = os_file_set_size(
- name, files[i],
- srv_calc_low32(srv_data_file_sizes[i]),
- srv_calc_high32(srv_data_file_sizes[i]));
-
- if (!ret) {
- fprintf(stderr,
- "InnoDB: Error in creating %s:"
- " probably out of disk space\n", name);
-
- return(DB_ERROR);
- }
-
- *sum_of_new_sizes = *sum_of_new_sizes
- + srv_data_file_sizes[i];
- }
-
- ret = os_file_close(files[i]);
- ut_a(ret);
-
- if (i == 0) {
- fil_space_create(name, 0, FIL_TABLESPACE);
- }
-
- ut_a(fil_validate());
-
- if (srv_data_file_is_raw_partition[i]) {
-
- fil_node_create(name, srv_data_file_sizes[i], 0, TRUE);
- } else {
- fil_node_create(name, srv_data_file_sizes[i], 0,
- FALSE);
- }
- }
-
- ios = 0;
-
- mutex_create(&ios_mutex, SYNC_NO_ORDER_CHECK);
-
- return(DB_SUCCESS);
-}
-
-/********************************************************************
-Starts InnoDB and creates a new database if database files
-are not found and the user wants. Server parameters are
-read from a file of name "srv_init" in the ib_home directory. */
-
-int
-innobase_start_or_create_for_mysql(void)
-/*====================================*/
- /* out: DB_SUCCESS or error code */
-{
- buf_pool_t* ret;
- ibool create_new_db;
- ibool log_file_created;
- ibool log_created = FALSE;
- ibool log_opened = FALSE;
- dulint min_flushed_lsn;
- dulint max_flushed_lsn;
-#ifdef UNIV_LOG_ARCHIVE
- ulint min_arch_log_no;
- ulint max_arch_log_no;
-#endif /* UNIV_LOG_ARCHIVE */
- ulint sum_of_new_sizes;
- ulint sum_of_data_file_sizes;
- ulint tablespace_size_in_header;
- ulint err;
- ulint i;
- ibool srv_file_per_table_original_value = srv_file_per_table;
- mtr_t mtr;
- ulint n_threads;
-#ifdef HAVE_DARWIN_THREADS
-# ifdef F_FULLFSYNC
- /* This executable has been compiled on Mac OS X 10.3 or later.
- Assume that F_FULLFSYNC is available at run-time. */
- srv_have_fullfsync = TRUE;
-# else /* F_FULLFSYNC */
- /* This executable has been compiled on Mac OS X 10.2
- or earlier. Determine if the executable is running
- on Mac OS X 10.3 or later. */
- struct utsname utsname;
- if (uname(&utsname)) {
- fputs("InnoDB: cannot determine Mac OS X version!\n", stderr);
- } else {
- srv_have_fullfsync = strcmp(utsname.release, "7.") >= 0;
- }
- if (!srv_have_fullfsync) {
- fputs("InnoDB: On Mac OS X, fsync() may be"
- " broken on internal drives,\n"
- "InnoDB: making transactions unsafe!\n", stderr);
- }
-# endif /* F_FULLFSYNC */
-#endif /* HAVE_DARWIN_THREADS */
-
- if (sizeof(ulint) != sizeof(void*)) {
- fprintf(stderr,
- "InnoDB: Error: size of InnoDB's ulint is %lu,"
- " but size of void* is %lu.\n"
- "InnoDB: The sizes should be the same"
- " so that on a 64-bit platform you can\n"
- "InnoDB: allocate more than 4 GB of memory.",
- (ulong)sizeof(ulint), (ulong)sizeof(void*));
- }
-
- srv_file_per_table = FALSE; /* system tables are created in tablespace
- 0 */
-#ifdef UNIV_DEBUG
- fprintf(stderr,
- "InnoDB: !!!!!!!! UNIV_DEBUG switched on !!!!!!!!!\n");
-#endif
-
-#ifdef UNIV_IBUF_DEBUG
- fprintf(stderr,
- "InnoDB: !!!!!!!! UNIV_IBUF_DEBUG switched on !!!!!!!!!\n"
- "InnoDB: Crash recovery will fail with UNIV_IBUF_DEBUG\n");
-#endif
-
-#ifdef UNIV_SYNC_DEBUG
- fprintf(stderr,
- "InnoDB: !!!!!!!! UNIV_SYNC_DEBUG switched on !!!!!!!!!\n");
-#endif
-
-#ifdef UNIV_SEARCH_DEBUG
- fprintf(stderr,
- "InnoDB: !!!!!!!! UNIV_SEARCH_DEBUG switched on !!!!!!!!!\n");
-#endif
-
-#ifdef UNIV_MEM_DEBUG
- fprintf(stderr,
- "InnoDB: !!!!!!!! UNIV_MEM_DEBUG switched on !!!!!!!!!\n");
-#endif
-
-#ifdef UNIV_SIMULATE_AWE
- fprintf(stderr,
- "InnoDB: !!!!!!!! UNIV_SIMULATE_AWE switched on !!!!!!!!!\n");
-#endif
- if (srv_sizeof_trx_t_in_ha_innodb_cc != (ulint)sizeof(trx_t)) {
- fprintf(stderr,
- "InnoDB: Error: trx_t size is %lu in ha_innodb.cc"
- " but %lu in srv0start.c\n"
- "InnoDB: Check that pthread_mutex_t is defined"
- " in the same way in these\n"
- "InnoDB: compilation modules. Cannot continue.\n",
- (ulong) srv_sizeof_trx_t_in_ha_innodb_cc,
- (ulong) sizeof(trx_t));
- return(DB_ERROR);
- }
-
-#ifdef UNIV_DISABLE_MEM_POOL
- fprintf(stderr,
- "InnoDB: The InnoDB memory heap has been disabled.\n");
-#endif
-
-#ifdef UNIV_SYNC_ATOMIC
- fprintf(stderr,
- "InnoDB: Mutex and rw_lock use atomics.\n");
-#endif
-
- /* Since InnoDB does not currently clean up all its internal data
- structures in MySQL Embedded Server Library server_end(), we
- print an error message if someone tries to start up InnoDB a
- second time during the process lifetime. */
-
- if (srv_start_has_been_called) {
- fprintf(stderr,
- "InnoDB: Error:startup called second time"
- " during the process lifetime.\n"
- "InnoDB: In the MySQL Embedded Server Library"
- " you cannot call server_init()\n"
- "InnoDB: more than once during"
- " the process lifetime.\n");
- }
-
- srv_start_has_been_called = TRUE;
-
-#ifdef UNIV_DEBUG
- log_do_write = TRUE;
-#endif /* UNIV_DEBUG */
- /* yydebug = TRUE; */
-
- srv_is_being_started = TRUE;
- srv_startup_is_before_trx_rollback_phase = TRUE;
- os_aio_use_native_aio = FALSE;
-
-#if !defined(__WIN2000__) && !defined(UNIV_SIMULATE_AWE)
- if (srv_use_awe) {
-
- fprintf(stderr,
- "InnoDB: Error: You have specified"
- " innodb_buffer_pool_awe_mem_mb\n"
- "InnoDB: in my.cnf, but AWE can only"
- " be used in Windows 2000 and later.\n"
- "InnoDB: To use AWE, InnoDB must"
- " be compiled with __WIN2000__ defined.\n");
-
- return(DB_ERROR);
- }
-#endif
-
-#ifdef __WIN__
- if (os_get_os_version() == OS_WIN95
- || os_get_os_version() == OS_WIN31
- || os_get_os_version() == OS_WINNT) {
-
- /* On Win 95, 98, ME, Win32 subsystem for Windows 3.1,
- and NT use simulated aio. In NT Windows provides async i/o,
- but when run in conjunction with InnoDB Hot Backup, it seemed
- to corrupt the data files. */
-
- os_aio_use_native_aio = FALSE;
- } else {
- /* On Win 2000 and XP use async i/o */
- os_aio_use_native_aio = TRUE;
- }
-#endif
- if (srv_file_flush_method_str == NULL) {
- /* These are the default options */
-
- srv_unix_file_flush_method = SRV_UNIX_FSYNC;
-
- srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
-#ifndef __WIN__
- } else if (0 == ut_strcmp(srv_file_flush_method_str, "fsync")) {
- srv_unix_file_flush_method = SRV_UNIX_FSYNC;
-
- } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DSYNC")) {
- srv_unix_file_flush_method = SRV_UNIX_O_DSYNC;
-
- } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) {
- srv_unix_file_flush_method = SRV_UNIX_O_DIRECT;
-
- } else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) {
- srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC;
-
- } else if (0 == ut_strcmp(srv_file_flush_method_str, "nosync")) {
- srv_unix_file_flush_method = SRV_UNIX_NOSYNC;
-#else
- } else if (0 == ut_strcmp(srv_file_flush_method_str, "normal")) {
- srv_win_file_flush_method = SRV_WIN_IO_NORMAL;
- os_aio_use_native_aio = FALSE;
-
- } else if (0 == ut_strcmp(srv_file_flush_method_str, "unbuffered")) {
- srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
- os_aio_use_native_aio = FALSE;
-
- } else if (0 == ut_strcmp(srv_file_flush_method_str,
- "async_unbuffered")) {
- srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
-#endif
- } else {
- fprintf(stderr,
- "InnoDB: Unrecognized value %s for"
- " innodb_flush_method\n",
- srv_file_flush_method_str);
- return(DB_ERROR);
- }
-
- /* Note that the call srv_boot() also changes the values of
- srv_pool_size etc. to the units used by InnoDB internally */
-
- /* Set the maximum number of threads which can wait for a semaphore
- inside InnoDB: this is the 'sync wait array' size, as well as the
- maximum number of threads that can wait in the 'srv_conc array' for
- their time to enter InnoDB. */
-
-#if defined(__NETWARE__)
-
- /* Create less event semaphores because Win 98/ME had
- difficulty creating 40000 event semaphores. Comment from
- Novell, Inc.: also, these just take a lot of memory on
- NetWare. */
- srv_max_n_threads = 1000;
-#else
- if (srv_pool_size >= 1000 * 1024) {
- /* Here we still have srv_pool_size counted
- in kilobytes (in 4.0 this was in bytes)
- srv_boot() converts the value to
- pages; if buffer pool is less than 1000 MB,
- assume fewer threads. */
- srv_max_n_threads = 50000;
-
- } else if (srv_pool_size >= 8 * 1024) {
-
- srv_max_n_threads = 10000;
- } else {
- srv_max_n_threads = 1000; /* saves several MB of memory,
- especially in 64-bit
- computers */
- }
-#endif
- err = srv_boot(); /* This changes srv_pool_size to units of a page */
-
- if (err != DB_SUCCESS) {
-
- return((int) err);
- }
-
- mutex_create(&srv_monitor_file_mutex, SYNC_NO_ORDER_CHECK);
-
- if (srv_innodb_status) {
- srv_monitor_file_name = mem_alloc(
- strlen(fil_path_to_mysql_datadir)
- + 20 + sizeof "/innodb_status.");
- sprintf(srv_monitor_file_name, "%s/innodb_status.%lu",
- fil_path_to_mysql_datadir, os_proc_get_number());
- srv_monitor_file = fopen(srv_monitor_file_name, "w+");
- if (!srv_monitor_file) {
- fprintf(stderr, "InnoDB: unable to create %s: %s\n",
- srv_monitor_file_name, strerror(errno));
- return(DB_ERROR);
- }
- } else {
- srv_monitor_file_name = NULL;
- srv_monitor_file = os_file_create_tmpfile();
- if (!srv_monitor_file) {
- return(DB_ERROR);
- }
- }
-
- mutex_create(&srv_dict_tmpfile_mutex, SYNC_DICT_OPERATION);
-
- srv_dict_tmpfile = os_file_create_tmpfile();
- if (!srv_dict_tmpfile) {
- return(DB_ERROR);
- }
-
- mutex_create(&srv_misc_tmpfile_mutex, SYNC_ANY_LATCH);
-
- srv_misc_tmpfile = os_file_create_tmpfile();
- if (!srv_misc_tmpfile) {
- return(DB_ERROR);
- }
-
-#ifdef __WIN__
- /*
- Need to hardcode this to 1 read and 1 write on Windows
- while searching for problem causing this to crash when
- higher number of threads are supported.
- */
- srv_n_read_io_threads = srv_n_write_io_threads = 1;
-#endif
- /* Restrict the maximum number of file i/o threads */
- if ((srv_n_read_io_threads + srv_n_write_io_threads) > SRV_MAX_N_IO_THREADS) {
- fprintf(stderr,
- "InnoDB: requested too many read(%d) or write(%d) IO threads, max is %d\n",
- (int)srv_n_read_io_threads,
- (int)srv_n_write_io_threads,
- SRV_MAX_N_IO_THREADS);
- return(DB_ERROR);
- }
-
- if (!os_aio_use_native_aio) {
- /* More than 4 threads are now supported. */
- n_threads = os_aio_init(8 * SRV_N_PENDING_IOS_PER_THREAD,
- srv_n_read_io_threads,
- srv_n_write_io_threads,
- SRV_MAX_N_PENDING_SYNC_IOS);
- } else {
- /* Might need more slots here. Alas, I don't do windows. */
- n_threads = os_aio_init(SRV_N_PENDING_IOS_PER_THREAD,
- srv_n_read_io_threads,
- srv_n_write_io_threads,
- SRV_MAX_N_PENDING_SYNC_IOS);
- }
-
- if (n_threads > SRV_MAX_N_IO_THREADS) {
- fprintf(stderr,
- "InnoDB: requested too many IO threads(%d), max is %d\n",
- (int)n_threads, SRV_MAX_N_IO_THREADS);
- return(DB_ERROR);
- }
-
- fil_init(srv_max_n_open_files);
-
- if (srv_use_awe) {
- fprintf(stderr,
- "InnoDB: Using AWE: Memory window is %lu MB"
- " and AWE memory is %lu MB\n",
- (ulong) (srv_awe_window_size / ((1024 * 1024)
- / UNIV_PAGE_SIZE)),
- (ulong) (srv_pool_size / ((1024 * 1024)
- / UNIV_PAGE_SIZE)));
-
- /* We must disable adaptive hash indexes because they do not
- tolerate remapping of pages in AWE */
-
- srv_use_adaptive_hash_indexes = FALSE;
- ret = buf_pool_init(srv_pool_size, srv_pool_size,
- srv_awe_window_size);
- } else {
- ret = buf_pool_init(srv_pool_size, srv_pool_size,
- srv_pool_size);
- }
-
- if (ret == NULL) {
- fprintf(stderr,
- "InnoDB: Fatal error: cannot allocate the memory"
- " for the buffer pool\n");
-
- return(DB_ERROR);
- }
-
- fsp_init();
- log_init();
-
- lock_sys_create(srv_lock_table_size);
-
- /* Create i/o-handler threads: */
-
- for (i = 0; i < n_threads; i++) {
- n[i] = i;
-
- os_thread_create(io_handler_thread, n + i, thread_ids + i);
- }
-
-#ifdef UNIV_LOG_ARCHIVE
- if (0 != ut_strcmp(srv_log_group_home_dirs[0], srv_arch_dir)) {
- fprintf(stderr,
- "InnoDB: Error: you must set the log group"
- " home dir in my.cnf the\n"
- "InnoDB: same as log arch dir.\n");
-
- return(DB_ERROR);
- }
-#endif /* UNIV_LOG_ARCHIVE */
-
- if (srv_n_log_files * srv_log_file_size >= 262144) {
- fprintf(stderr,
- "InnoDB: Error: combined size of log files"
- " must be < 4 GB\n");
-
- return(DB_ERROR);
- }
-
- sum_of_new_sizes = 0;
-
- for (i = 0; i < srv_n_data_files; i++) {
-#ifndef __WIN__
- if (sizeof(off_t) < 5 && srv_data_file_sizes[i] >= 262144) {
- fprintf(stderr,
- "InnoDB: Error: file size must be < 4 GB"
- " with this MySQL binary\n"
- "InnoDB: and operating system combination,"
- " in some OS's < 2 GB\n");
-
- return(DB_ERROR);
- }
-#endif
- sum_of_new_sizes += srv_data_file_sizes[i];
- }
-
- if (sum_of_new_sizes < 640) {
- fprintf(stderr,
- "InnoDB: Error: tablespace size must be"
- " at least 10 MB\n");
-
- return(DB_ERROR);
- }
-
- err = open_or_create_data_files(&create_new_db,
-#ifdef UNIV_LOG_ARCHIVE
- &min_arch_log_no, &max_arch_log_no,
-#endif /* UNIV_LOG_ARCHIVE */
- &min_flushed_lsn, &max_flushed_lsn,
- &sum_of_new_sizes);
- if (err != DB_SUCCESS) {
- fprintf(stderr,
- "InnoDB: Could not open or create data files.\n"
- "InnoDB: If you tried to add new data files,"
- " and it failed here,\n"
- "InnoDB: you should now edit innodb_data_file_path"
- " in my.cnf back\n"
- "InnoDB: to what it was, and remove the"
- " new ibdata files InnoDB created\n"
- "InnoDB: in this failed attempt. InnoDB only wrote"
- " those files full of\n"
- "InnoDB: zeros, but did not yet use them in any way."
- " But be careful: do not\n"
- "InnoDB: remove old data files"
- " which contain your precious data!\n");
-
- return((int) err);
- }
-
-#ifdef UNIV_LOG_ARCHIVE
- srv_normalize_path_for_win(srv_arch_dir);
- srv_arch_dir = srv_add_path_separator_if_needed(srv_arch_dir);
-#endif /* UNIV_LOG_ARCHIVE */
-
- for (i = 0; i < srv_n_log_files; i++) {
- err = open_or_create_log_file(create_new_db, &log_file_created,
- log_opened, 0, i);
- if (err != DB_SUCCESS) {
-
- return((int) err);
- }
-
- if (log_file_created) {
- log_created = TRUE;
- } else {
- log_opened = TRUE;
- }
- if ((log_opened && create_new_db)
- || (log_opened && log_created)) {
- fprintf(stderr,
- "InnoDB: Error: all log files must be"
- " created at the same time.\n"
- "InnoDB: All log files must be"
- " created also in database creation.\n"
- "InnoDB: If you want bigger or smaller"
- " log files, shut down the\n"
- "InnoDB: database and make sure there"
- " were no errors in shutdown.\n"
- "InnoDB: Then delete the existing log files."
- " Edit the .cnf file\n"
- "InnoDB: and start the database again.\n");
-
- return(DB_ERROR);
- }
- }
-
- /* Open all log files and data files in the system tablespace: we
- keep them open until database shutdown */
-
- fil_open_log_and_system_tablespace_files();
-
- if (log_created && !create_new_db
-#ifdef UNIV_LOG_ARCHIVE
- && !srv_archive_recovery
-#endif /* UNIV_LOG_ARCHIVE */
- ) {
- if (ut_dulint_cmp(max_flushed_lsn, min_flushed_lsn) != 0
-#ifdef UNIV_LOG_ARCHIVE
- || max_arch_log_no != min_arch_log_no
-#endif /* UNIV_LOG_ARCHIVE */
- ) {
- fprintf(stderr,
- "InnoDB: Cannot initialize created"
- " log files because\n"
- "InnoDB: data files were not in sync"
- " with each other\n"
- "InnoDB: or the data files are corrupt.\n");
-
- return(DB_ERROR);
- }
-
- if (ut_dulint_cmp(max_flushed_lsn, ut_dulint_create(0, 1000))
- < 0) {
- fprintf(stderr,
- "InnoDB: Cannot initialize created"
- " log files because\n"
- "InnoDB: data files are corrupt,"
- " or new data files were\n"
- "InnoDB: created when the database"
- " was started previous\n"
- "InnoDB: time but the database"
- " was not shut down\n"
- "InnoDB: normally after that.\n");
-
- return(DB_ERROR);
- }
-
- mutex_enter(&(log_sys->mutex));
-
-#ifdef UNIV_LOG_ARCHIVE
- /* Do not + 1 arch_log_no because we do not use log
- archiving */
- recv_reset_logs(max_flushed_lsn, max_arch_log_no, TRUE);
-#else
- recv_reset_logs(max_flushed_lsn, TRUE);
-#endif /* UNIV_LOG_ARCHIVE */
-
- mutex_exit(&(log_sys->mutex));
- }
-
- if (create_new_db) {
- mtr_start(&mtr);
-
- fsp_header_init(0, sum_of_new_sizes, &mtr);
-
- mtr_commit(&mtr);
-
- trx_sys_create();
- dict_create();
- srv_startup_is_before_trx_rollback_phase = FALSE;
-
-#ifdef UNIV_LOG_ARCHIVE
- } else if (srv_archive_recovery) {
- fprintf(stderr,
- "InnoDB: Starting archive"
- " recovery from a backup...\n");
- err = recv_recovery_from_archive_start(
- min_flushed_lsn, srv_archive_recovery_limit_lsn,
- min_arch_log_no);
- if (err != DB_SUCCESS) {
-
- return(DB_ERROR);
- }
- /* Since ibuf init is in dict_boot, and ibuf is needed
- in any disk i/o, first call dict_boot */
-
- dict_boot();
- trx_sys_init_at_db_start();
- srv_startup_is_before_trx_rollback_phase = FALSE;
-
- /* Initialize the fsp free limit global variable in the log
- system */
- fsp_header_get_free_limit(0);
-
- recv_recovery_from_archive_finish();
-#endif /* UNIV_LOG_ARCHIVE */
- } else {
- /* We always try to do a recovery, even if the database had
- been shut down normally: this is the normal startup path */
-
- err = recv_recovery_from_checkpoint_start(LOG_CHECKPOINT,
- ut_dulint_max,
- min_flushed_lsn,
- max_flushed_lsn);
- if (err != DB_SUCCESS) {
-
- return(DB_ERROR);
- }
-
- /* Since the insert buffer init is in dict_boot, and the
- insert buffer is needed in any disk i/o, first we call
- dict_boot(). Note that trx_sys_init_at_db_start() only needs
- to access space 0, and the insert buffer at this stage already
- works for space 0. */
-
- dict_boot();
- trx_sys_init_at_db_start();
-
- if (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) {
- /* The following call is necessary for the insert
- buffer to work with multiple tablespaces. We must
- know the mapping between space id's and .ibd file
- names.
-
- In a crash recovery, we check that the info in data
- dictionary is consistent with what we already know
- about space id's from the call of
- fil_load_single_table_tablespaces().
-
- In a normal startup, we create the space objects for
- every table in the InnoDB data dictionary that has
- an .ibd file.
-
- We also determine the maximum tablespace id used.
-
- TODO: We may have incomplete transactions in the
- data dictionary tables. Does that harm the scanning of
- the data dictionary below? */
-
- dict_check_tablespaces_and_store_max_id(
- recv_needed_recovery);
- }
-
- srv_startup_is_before_trx_rollback_phase = FALSE;
-
- /* Initialize the fsp free limit global variable in the log
- system */
- fsp_header_get_free_limit(0);
-
- /* recv_recovery_from_checkpoint_finish needs trx lists which
- are initialized in trx_sys_init_at_db_start(). */
-
- recv_recovery_from_checkpoint_finish();
- }
-
- if (!create_new_db && sum_of_new_sizes > 0) {
- /* New data file(s) were added */
- mtr_start(&mtr);
-
- fsp_header_inc_size(0, sum_of_new_sizes, &mtr);
-
- mtr_commit(&mtr);
-
- /* Immediately write the log record about increased tablespace
- size to disk, so that it is durable even if mysqld would crash
- quickly */
-
- log_buffer_flush_to_disk();
- }
-
-#ifdef UNIV_LOG_ARCHIVE
- /* Archiving is always off under MySQL */
- if (!srv_log_archive_on) {
- ut_a(DB_SUCCESS == log_archive_noarchivelog());
- } else {
- mutex_enter(&(log_sys->mutex));
-
- start_archive = FALSE;
-
- if (log_sys->archiving_state == LOG_ARCH_OFF) {
- start_archive = TRUE;
- }
-
- mutex_exit(&(log_sys->mutex));
-
- if (start_archive) {
- ut_a(DB_SUCCESS == log_archive_archivelog());
- }
- }
-#endif /* UNIV_LOG_ARCHIVE */
-
- /* fprintf(stderr, "Max allowed record size %lu\n",
- page_get_free_space_of_empty() / 2); */
-
- /* Create the thread which watches the timeouts for lock waits
- and prints InnoDB monitor info */
-
- os_thread_create(&srv_lock_timeout_and_monitor_thread, NULL,
- thread_ids + 2 + SRV_MAX_N_IO_THREADS);
-
- /* Create the thread which warns of long semaphore waits */
- os_thread_create(&srv_error_monitor_thread, NULL,
- thread_ids + 3 + SRV_MAX_N_IO_THREADS);
- srv_was_started = TRUE;
- srv_is_being_started = FALSE;
-
- if (trx_doublewrite == NULL) {
- /* Create the doublewrite buffer to a new tablespace */
-
- trx_sys_create_doublewrite_buf();
- }
-
- err = dict_create_or_check_foreign_constraint_tables();
-
- if (err != DB_SUCCESS) {
- return((int)DB_ERROR);
- }
-
- /* Create the master thread which does purge and other utility
- operations */
-
- os_thread_create(&srv_master_thread, NULL, thread_ids
- + (1 + SRV_MAX_N_IO_THREADS));
-#ifdef UNIV_DEBUG
- /* buf_debug_prints = TRUE; */
-#endif /* UNIV_DEBUG */
- sum_of_data_file_sizes = 0;
-
- for (i = 0; i < srv_n_data_files; i++) {
- sum_of_data_file_sizes += srv_data_file_sizes[i];
- }
-
- tablespace_size_in_header = fsp_header_get_tablespace_size(0);
-
- if (!srv_auto_extend_last_data_file
- && sum_of_data_file_sizes != tablespace_size_in_header) {
-
- fprintf(stderr,
- "InnoDB: Error: tablespace size"
- " stored in header is %lu pages, but\n"
- "InnoDB: the sum of data file sizes is %lu pages\n",
- (ulong) tablespace_size_in_header,
- (ulong) sum_of_data_file_sizes);
-
- if (srv_force_recovery == 0
- && sum_of_data_file_sizes < tablespace_size_in_header) {
- /* This is a fatal error, the tail of a tablespace is
- missing */
-
- fprintf(stderr,
- "InnoDB: Cannot start InnoDB."
- " The tail of the system tablespace is\n"
- "InnoDB: missing. Have you edited"
- " innodb_data_file_path in my.cnf in an\n"
- "InnoDB: inappropriate way, removing"
- " ibdata files from there?\n"
- "InnoDB: You can set innodb_force_recovery=1"
- " in my.cnf to force\n"
- "InnoDB: a startup if you are trying"
- " to recover a badly corrupt database.\n");
-
- return(DB_ERROR);
- }
- }
-
- if (srv_auto_extend_last_data_file
- && sum_of_data_file_sizes < tablespace_size_in_header) {
-
- fprintf(stderr,
- "InnoDB: Error: tablespace size stored in header"
- " is %lu pages, but\n"
- "InnoDB: the sum of data file sizes"
- " is only %lu pages\n",
- (ulong) tablespace_size_in_header,
- (ulong) sum_of_data_file_sizes);
-
- if (srv_force_recovery == 0) {
-
- fprintf(stderr,
- "InnoDB: Cannot start InnoDB. The tail of"
- " the system tablespace is\n"
- "InnoDB: missing. Have you edited"
- " innodb_data_file_path in my.cnf in an\n"
- "InnoDB: inappropriate way, removing"
- " ibdata files from there?\n"
- "InnoDB: You can set innodb_force_recovery=1"
- " in my.cnf to force\n"
- "InnoDB: a startup if you are trying to"
- " recover a badly corrupt database.\n");
-
- return(DB_ERROR);
- }
- }
-
- /* Check that os_fast_mutexes work as expected */
- os_fast_mutex_init(&srv_os_test_mutex);
-
- if (0 != os_fast_mutex_trylock(&srv_os_test_mutex)) {
- fprintf(stderr,
- "InnoDB: Error: pthread_mutex_trylock returns"
- " an unexpected value on\n"
- "InnoDB: success! Cannot continue.\n");
- exit(1);
- }
-
- os_fast_mutex_unlock(&srv_os_test_mutex);
-
- os_fast_mutex_lock(&srv_os_test_mutex);
-
- os_fast_mutex_unlock(&srv_os_test_mutex);
-
- os_fast_mutex_free(&srv_os_test_mutex);
-
- if (srv_print_verbose_log) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Started; log sequence number %lu %lu\n",
- (ulong) ut_dulint_get_high(srv_start_lsn),
- (ulong) ut_dulint_get_low(srv_start_lsn));
- }
-
- if (srv_force_recovery > 0) {
- fprintf(stderr,
- "InnoDB: !!! innodb_force_recovery"
- " is set to %lu !!!\n",
- (ulong) srv_force_recovery);
- }
-
- fflush(stderr);
-
- if (trx_doublewrite_must_reset_space_ids) {
- /* Actually, we did not change the undo log format between
- 4.0 and 4.1.1, and we would not need to run purge to
- completion. Note also that the purge algorithm in 4.1.1
- can process the the history list again even after a full
- purge, because our algorithm does not cut the end of the
- history list in all cases so that it would become empty
- after a full purge. That mean that we may purge 4.0 type
- undo log even after this phase.
-
- The insert buffer record format changed between 4.0 and
- 4.1.1. It is essential that the insert buffer is emptied
- here! */
-
- fprintf(stderr,
- "InnoDB: You are upgrading to an"
- " InnoDB version which allows multiple\n"
- "InnoDB: tablespaces. Wait that purge"
- " and insert buffer merge run to\n"
- "InnoDB: completion...\n");
- for (;;) {
- os_thread_sleep(1000000);
-
- if (0 == strcmp(srv_main_thread_op_info,
- "waiting for server activity")) {
-
- ut_a(ibuf_is_empty());
-
- break;
- }
- }
- fprintf(stderr,
- "InnoDB: Full purge and insert buffer merge"
- " completed.\n");
-
- trx_sys_mark_upgraded_to_multiple_tablespaces();
-
- fprintf(stderr,
- "InnoDB: You have now successfully upgraded"
- " to the multiple tablespaces\n"
- "InnoDB: format. You should NOT DOWNGRADE"
- " to an earlier version of\n"
- "InnoDB: InnoDB! But if you absolutely need to"
- " downgrade, see\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "multiple-tablespaces.html\n"
- "InnoDB: for instructions.\n");
- }
-
- if (srv_force_recovery == 0) {
- /* In the insert buffer we may have even bigger tablespace
- id's, because we may have dropped those tablespaces, but
- insert buffer merge has not had time to clean the records from
- the ibuf tree. */
-
- ibuf_update_max_tablespace_id();
- }
-
- srv_file_per_table = srv_file_per_table_original_value;
-
- return((int) DB_SUCCESS);
-}
-
-/********************************************************************
-Shuts down the InnoDB database. */
-
-int
-innobase_shutdown_for_mysql(void)
-/*=============================*/
- /* out: DB_SUCCESS or error code */
-{
- ulint i;
-#ifdef __NETWARE__
- extern ibool panic_shutdown;
-#endif
- if (!srv_was_started) {
- if (srv_is_being_started) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: shutting down"
- " a not properly started\n"
- "InnoDB: or created database!\n");
- }
-
- return(DB_SUCCESS);
- }
-
- /* 1. Flush the buffer pool to disk, write the current lsn to
- the tablespace header(s), and copy all log data to archive.
- The step 1 is the real InnoDB shutdown. The remaining steps 2 - ...
- just free data structures after the shutdown. */
-
-
- if (srv_fast_shutdown == 2) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: MySQL has requested a very fast shutdown"
- " without flushing "
- "the InnoDB buffer pool to data files."
- " At the next mysqld startup "
- "InnoDB will do a crash recovery!\n");
- }
-
-#ifdef __NETWARE__
- if(!panic_shutdown)
-#endif
- logs_empty_and_mark_files_at_shutdown();
-
- if (srv_conc_n_threads != 0) {
- fprintf(stderr,
- "InnoDB: Warning: query counter shows %ld queries"
- " still\n"
- "InnoDB: inside InnoDB at shutdown\n",
- srv_conc_n_threads);
- }
-
- /* 2. Make all threads created by InnoDB to exit */
-
- srv_shutdown_state = SRV_SHUTDOWN_EXIT_THREADS;
-
- /* In a 'very fast' shutdown, we do not need to wait for these threads
- to die; all which counts is that we flushed the log; a 'very fast'
- shutdown is essentially a crash. */
-
- if (srv_fast_shutdown == 2) {
- return(DB_SUCCESS);
- }
-
- /* All threads end up waiting for certain events. Put those events
- to the signaled state. Then the threads will exit themselves in
- os_thread_event_wait(). */
-
- for (i = 0; i < 1000; i++) {
- /* NOTE: IF YOU CREATE THREADS IN INNODB, YOU MUST EXIT THEM
- HERE OR EARLIER */
-
- /* a. Let the lock timeout thread exit */
- os_event_set(srv_lock_timeout_thread_event);
-
- /* b. srv error monitor thread exits automatically, no need
- to do anything here */
-
- /* c. We wake the master thread so that it exits */
- srv_wake_master_thread();
-
- /* d. Exit the i/o threads */
-
- os_aio_wake_all_threads_at_shutdown();
-
- os_mutex_enter(os_sync_mutex);
-
- if (os_thread_count == 0) {
- /* All the threads have exited or are just exiting;
- NOTE that the threads may not have completed their
- exit yet. Should we use pthread_join() to make sure
- they have exited? Now we just sleep 0.1 seconds and
- hope that is enough! */
-
- os_mutex_exit(os_sync_mutex);
-
- os_thread_sleep(100000);
-
- break;
- }
-
- os_mutex_exit(os_sync_mutex);
-
- os_thread_sleep(100000);
- }
-
- if (i == 1000) {
- fprintf(stderr,
- "InnoDB: Warning: %lu threads created by InnoDB"
- " had not exited at shutdown!\n",
- (ulong) os_thread_count);
- }
-
- if (srv_monitor_file) {
- fclose(srv_monitor_file);
- srv_monitor_file = 0;
- if (srv_monitor_file_name) {
- unlink(srv_monitor_file_name);
- mem_free(srv_monitor_file_name);
- }
- }
- if (srv_dict_tmpfile) {
- fclose(srv_dict_tmpfile);
- srv_dict_tmpfile = 0;
- }
-
- if (srv_misc_tmpfile) {
- fclose(srv_misc_tmpfile);
- srv_misc_tmpfile = 0;
- }
-
- mutex_free(&srv_monitor_file_mutex);
- mutex_free(&srv_dict_tmpfile_mutex);
- mutex_free(&srv_misc_tmpfile_mutex);
-
- /* 3. Free all InnoDB's own mutexes and the os_fast_mutexes inside
- them */
- sync_close();
-
- /* 4. Free the os_conc_mutex and all os_events and os_mutexes */
-
- srv_free();
- os_sync_free();
-
- /* Check that all read views are closed except read view owned
- by a purge. */
-
- if (UT_LIST_GET_LEN(trx_sys->view_list) > 1) {
- fprintf(stderr,
- "InnoDB: Error: all read views were not closed"
- " before shutdown:\n"
- "InnoDB: %lu read views open \n",
- UT_LIST_GET_LEN(trx_sys->view_list) - 1);
- }
-
- /* 5. Free all allocated memory and the os_fast_mutex created in
- ut0mem.c */
-
- ut_free_all_mem();
-
- if (os_thread_count != 0
- || os_event_count != 0
- || os_mutex_count != 0
- || os_fast_mutex_count != 0) {
- fprintf(stderr,
- "InnoDB: Warning: some resources were not"
- " cleaned up in shutdown:\n"
- "InnoDB: threads %lu, events %lu,"
- " os_mutexes %lu, os_fast_mutexes %lu\n",
- (ulong) os_thread_count, (ulong) os_event_count,
- (ulong) os_mutex_count, (ulong) os_fast_mutex_count);
- }
-
- if (dict_foreign_err_file) {
- fclose(dict_foreign_err_file);
- }
- if (lock_latest_err_file) {
- fclose(lock_latest_err_file);
- }
-
- if (srv_print_verbose_log) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Shutdown completed;"
- " log sequence number %lu %lu\n",
- (ulong) ut_dulint_get_high(srv_shutdown_lsn),
- (ulong) ut_dulint_get_low(srv_shutdown_lsn));
- }
-
- return((int) DB_SUCCESS);
-}
-
-#ifdef __NETWARE__
-void set_panic_flag_for_netware()
-{
- extern ibool panic_shutdown;
- panic_shutdown = TRUE;
-}
-#endif /* __NETWARE__ */
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/sync/sync0arr.c b/storage/innobase/sync/sync0arr.c
deleted file mode 100644
index bb64ac07342..00000000000
--- a/storage/innobase/sync/sync0arr.c
+++ /dev/null
@@ -1,1021 +0,0 @@
-/******************************************************
-The wait array used in synchronization primitives
-
-(c) 1995 Innobase Oy
-
-Created 9/5/1995 Heikki Tuuri
-*******************************************************/
-
-#include "sync0arr.h"
-#ifdef UNIV_NONINL
-#include "sync0arr.ic"
-#endif
-
-#include "sync0sync.h"
-#include "sync0rw.h"
-#include "os0sync.h"
-#include "os0file.h"
-#include "srv0srv.h"
-
-/*
- WAIT ARRAY
- ==========
-
-The wait array consists of cells each of which has an
-an operating system event object created for it. The threads
-waiting for a mutex, for example, can reserve a cell
-in the array and suspend themselves to wait for the event
-to become signaled. When using the wait array, remember to make
-sure that some thread holding the synchronization object
-will eventually know that there is a waiter in the array and
-signal the object, to prevent infinite wait.
-Why we chose to implement a wait array? First, to make
-mutexes fast, we had to code our own implementation of them,
-which only in usually uncommon cases resorts to using
-slow operating system primitives. Then we had the choice of
-assigning a unique OS event for each mutex, which would
-be simpler, or using a global wait array. In some operating systems,
-the global wait array solution is more efficient and flexible,
-because we can do with a very small number of OS events,
-say 200. In NT 3.51, allocating events seems to be a quadratic
-algorithm, because 10 000 events are created fast, but
-100 000 events takes a couple of minutes to create.
-
-As of 5.0.30 the above mentioned design is changed. Since now
-OS can handle millions of wait events efficiently, we no longer
-have this concept of each cell of wait array having one event.
-Instead, now the event that a thread wants to wait on is embedded
-in the wait object (mutex or rw_lock). We still keep the global
-wait array for the sake of diagnostics and also to avoid infinite
-wait The error_monitor thread scans the global wait array to signal
-any waiting threads who have missed the signal. */
-
-/* A cell where an individual thread may wait suspended
-until a resource is released. The suspending is implemented
-using an operating system event semaphore. */
-struct sync_cell_struct {
- void* wait_object; /* pointer to the object the
- thread is waiting for; if NULL
- the cell is free for use */
- mutex_t* old_wait_mutex; /* the latest wait mutex in cell */
- rw_lock_t* old_wait_rw_lock;/* the latest wait rw-lock in cell */
- ulint request_type; /* lock type requested on the
- object */
- const char* file; /* in debug version file where
- requested */
- ulint line; /* in debug version line where
- requested */
- os_thread_id_t thread; /* thread id of this waiting
- thread */
- ibool waiting; /* TRUE if the thread has already
- called sync_array_event_wait
- on this cell */
- ib_longlong signal_count; /* We capture the signal_count
- of the wait_object when we
- reset the event. This value is
- then passed on to os_event_wait
- and we wait only if the event
- has not been signalled in the
- period between the reset and
- wait call. */
- time_t reservation_time;/* time when the thread reserved
- the wait cell */
-};
-
-/* NOTE: It is allowed for a thread to wait
-for an event allocated for the array without owning the
-protecting mutex (depending on the case: OS or database mutex), but
-all changes (set or reset) to the state of the event must be made
-while owning the mutex. */
-struct sync_array_struct {
- ulint n_reserved; /* number of currently reserved
- cells in the wait array */
- ulint n_cells; /* number of cells in the
- wait array */
- sync_cell_t* array; /* pointer to wait array */
- ulint protection; /* this flag tells which
- mutex protects the data */
- mutex_t mutex; /* possible database mutex
- protecting this data structure */
- os_mutex_t os_mutex; /* Possible operating system mutex
- protecting the data structure.
- As this data structure is used in
- constructing the database mutex,
- to prevent infinite recursion
- in implementation, we fall back to
- an OS mutex. */
- ulint sg_count; /* count of how many times an
- object has been signalled */
- ulint res_count; /* count of cell reservations
- since creation of the array */
-};
-
-/* Counts the number of times that sync_arr_wake_threads_if_sema_free has
- * found a thread that can run because it may have missed a wakeup signal. */
-ulint sync_wake_ups = 0;
-
-#ifdef UNIV_SYNC_DEBUG
-/**********************************************************************
-This function is called only in the debug version. Detects a deadlock
-of one or more threads because of waits of semaphores. */
-static
-ibool
-sync_array_detect_deadlock(
-/*=======================*/
- /* out: TRUE if deadlock detected */
- sync_array_t* arr, /* in: wait array; NOTE! the caller must
- own the mutex to array */
- sync_cell_t* start, /* in: cell where recursive search started */
- sync_cell_t* cell, /* in: cell to search */
- ulint depth); /* in: recursion depth */
-#endif /* UNIV_SYNC_DEBUG */
-
-/*********************************************************************
-Gets the nth cell in array. */
-static
-sync_cell_t*
-sync_array_get_nth_cell(
-/*====================*/
- /* out: cell */
- sync_array_t* arr, /* in: sync array */
- ulint n) /* in: index */
-{
- ut_a(arr);
- ut_a(n < arr->n_cells);
-
- return(arr->array + n);
-}
-
-/**********************************************************************
-Reserves the mutex semaphore protecting a sync array. */
-static
-void
-sync_array_enter(
-/*=============*/
- sync_array_t* arr) /* in: sync wait array */
-{
- ulint protection;
-
- protection = arr->protection;
-
- if (protection == SYNC_ARRAY_OS_MUTEX) {
- os_mutex_enter(arr->os_mutex);
- } else if (protection == SYNC_ARRAY_MUTEX) {
- mutex_enter(&(arr->mutex));
- } else {
- ut_error;
- }
-}
-
-/**********************************************************************
-Releases the mutex semaphore protecting a sync array. */
-static
-void
-sync_array_exit(
-/*============*/
- sync_array_t* arr) /* in: sync wait array */
-{
- ulint protection;
-
- protection = arr->protection;
-
- if (protection == SYNC_ARRAY_OS_MUTEX) {
- os_mutex_exit(arr->os_mutex);
- } else if (protection == SYNC_ARRAY_MUTEX) {
- mutex_exit(&(arr->mutex));
- } else {
- ut_error;
- }
-}
-
-/***********************************************************************
-Creates a synchronization wait array. It is protected by a mutex
-which is automatically reserved when the functions operating on it
-are called. */
-
-sync_array_t*
-sync_array_create(
-/*==============*/
- /* out, own: created wait array */
- ulint n_cells, /* in: number of cells in the array
- to create */
- ulint protection) /* in: either SYNC_ARRAY_OS_MUTEX or
- SYNC_ARRAY_MUTEX: determines the type
- of mutex protecting the data structure */
-{
- sync_array_t* arr;
- sync_cell_t* cell_array;
- sync_cell_t* cell;
- ulint i;
-
- ut_a(n_cells > 0);
-
- /* Allocate memory for the data structures */
- arr = ut_malloc(sizeof(sync_array_t));
-
- cell_array = ut_malloc(sizeof(sync_cell_t) * n_cells);
-
- arr->n_cells = n_cells;
- arr->n_reserved = 0;
- arr->array = cell_array;
- arr->protection = protection;
- arr->sg_count = 0;
- arr->res_count = 0;
-
- /* Then create the mutex to protect the wait array complex */
- if (protection == SYNC_ARRAY_OS_MUTEX) {
- arr->os_mutex = os_mutex_create(NULL);
- } else if (protection == SYNC_ARRAY_MUTEX) {
- mutex_create(&arr->mutex, SYNC_NO_ORDER_CHECK);
- } else {
- ut_error;
- }
-
- for (i = 0; i < n_cells; i++) {
- cell = sync_array_get_nth_cell(arr, i);
- cell->wait_object = NULL;
- cell->waiting = FALSE;
- cell->signal_count = 0;
- }
-
- return(arr);
-}
-
-/**********************************************************************
-Frees the resources in a wait array. */
-
-void
-sync_array_free(
-/*============*/
- sync_array_t* arr) /* in, own: sync wait array */
-{
- ulint protection;
-
- ut_a(arr->n_reserved == 0);
-
- sync_array_validate(arr);
-
- protection = arr->protection;
-
- /* Release the mutex protecting the wait array complex */
-
- if (protection == SYNC_ARRAY_OS_MUTEX) {
- os_mutex_free(arr->os_mutex);
- } else if (protection == SYNC_ARRAY_MUTEX) {
- mutex_free(&(arr->mutex));
- } else {
- ut_error;
- }
-
- ut_free(arr->array);
- ut_free(arr);
-}
-
-/************************************************************************
-Validates the integrity of the wait array. Checks
-that the number of reserved cells equals the count variable. */
-
-void
-sync_array_validate(
-/*================*/
- sync_array_t* arr) /* in: sync wait array */
-{
- ulint i;
- sync_cell_t* cell;
- ulint count = 0;
-
- sync_array_enter(arr);
-
- for (i = 0; i < arr->n_cells; i++) {
- cell = sync_array_get_nth_cell(arr, i);
- if (cell->wait_object != NULL) {
- count++;
- }
- }
-
- ut_a(count == arr->n_reserved);
-
- sync_array_exit(arr);
-}
-
-/***********************************************************************
-Returns the event that the thread owning the cell waits for. */
-static
-os_event_t
-sync_cell_get_event(
-/*================*/
- sync_cell_t* cell) /* in: non-empty sync array cell */
-{
- ulint type = cell->request_type;
-
- if (type == SYNC_MUTEX) {
- return(((mutex_t *) cell->wait_object)->event);
- } else if (type == RW_LOCK_WAIT_EX) {
- return(((rw_lock_t *) cell->wait_object)->wait_ex_event);
- } else { /* RW_LOCK_SHARED and RW_LOCK_EX wait on the same event */
- return(((rw_lock_t *) cell->wait_object)->event);
- }
-}
-
-
-/**********************************************************************
-Reserves a wait array cell for waiting for an object.
-The event of the cell is reset to nonsignalled state. */
-
-void
-sync_array_reserve_cell(
-/*====================*/
- sync_array_t* arr, /* in: wait array */
- void* object, /* in: pointer to the object to wait for */
- ulint type, /* in: lock request type */
- const char* file, /* in: file where requested */
- ulint line, /* in: line where requested */
- ulint* index) /* out: index of the reserved cell */
-{
- sync_cell_t* cell;
- os_event_t event;
- ulint i;
-
- ut_a(object);
- ut_a(index);
-
- sync_array_enter(arr);
-
- arr->res_count++;
-
- /* Reserve a new cell. */
- for (i = 0; i < arr->n_cells; i++) {
- cell = sync_array_get_nth_cell(arr, i);
-
- if (cell->wait_object == NULL) {
-
- cell->waiting = FALSE;
- cell->wait_object = object;
-
- if (type == SYNC_MUTEX) {
- cell->old_wait_mutex = object;
- } else {
- cell->old_wait_rw_lock = object;
- }
-
- cell->request_type = type;
-
- cell->file = file;
- cell->line = line;
-
- arr->n_reserved++;
-
- *index = i;
-
- sync_array_exit(arr);
-
- /* Make sure the event is reset and also store
- the value of signal_count at which the event
- was reset. */
- event = sync_cell_get_event(cell);
- cell->signal_count = os_event_reset(event);
-
- cell->reservation_time = time(NULL);
-
- cell->thread = os_thread_get_curr_id();
-
- return;
- }
- }
-
- ut_error; /* No free cell found */
-
- return;
-}
-
-/**********************************************************************
-This function should be called when a thread starts to wait on
-a wait array cell. In the debug version this function checks
-if the wait for a semaphore will result in a deadlock, in which
-case prints info and asserts. */
-
-void
-sync_array_wait_event(
-/*==================*/
- sync_array_t* arr, /* in: wait array */
- ulint index) /* in: index of the reserved cell */
-{
- sync_cell_t* cell;
- os_event_t event;
-
- ut_a(arr);
-
- sync_array_enter(arr);
-
- cell = sync_array_get_nth_cell(arr, index);
-
- ut_a(cell->wait_object);
- ut_a(!cell->waiting);
- ut_ad(os_thread_get_curr_id() == cell->thread);
-
- event = sync_cell_get_event(cell);
- cell->waiting = TRUE;
-
-#ifdef UNIV_SYNC_DEBUG
-
- /* We use simple enter to the mutex below, because if
- we cannot acquire it at once, mutex_enter would call
- recursively sync_array routines, leading to trouble.
- rw_lock_debug_mutex freezes the debug lists. */
-
- rw_lock_debug_mutex_enter();
-
- if (TRUE == sync_array_detect_deadlock(arr, cell, cell, 0)) {
-
- fputs("########################################\n", stderr);
- ut_error;
- }
-
- rw_lock_debug_mutex_exit();
-#endif
- sync_array_exit(arr);
-
- os_event_wait_low(event, cell->signal_count);
-
- sync_array_free_cell(arr, index);
-}
-
-/**********************************************************************
-Reports info of a wait array cell. */
-static
-void
-sync_array_cell_print(
-/*==================*/
- FILE* file, /* in: file where to print */
- sync_cell_t* cell) /* in: sync cell */
-{
- mutex_t* mutex;
- rw_lock_t* rwlock;
- ulint type;
- ulint writer;
-
- type = cell->request_type;
-
- fprintf(file,
- "--Thread %lu has waited at %s line %lu"
- " for %.2f seconds the semaphore:\n",
- (ulong) os_thread_pf(cell->thread), cell->file,
- (ulong) cell->line,
- difftime(time(NULL), cell->reservation_time));
-
- if (type == SYNC_MUTEX) {
- /* We use old_wait_mutex in case the cell has already
- been freed meanwhile */
- mutex = cell->old_wait_mutex;
-
- fprintf(file,
- "Mutex at %p created file %s line %lu, lock var %lu\n"
-#ifdef UNIV_SYNC_DEBUG
- "Last time reserved in file %s line %lu, "
-#endif /* UNIV_SYNC_DEBUG */
- "waiters flag %lu\n",
- (void*) mutex, mutex->cfile_name, (ulong) mutex->cline,
- (ulong) mutex->lock_word,
-#ifdef UNIV_SYNC_DEBUG
- mutex->file_name, (ulong) mutex->line,
-#endif /* UNIV_SYNC_DEBUG */
- (ulong) mutex->waiters);
-
- } else if (type == RW_LOCK_EX
- || type == RW_LOCK_WAIT_EX
- || type == RW_LOCK_SHARED) {
-
- switch(type) {
- case RW_LOCK_EX: fputs("X-lock on", file); break;
- case RW_LOCK_WAIT_EX: fputs("wait-X-lock on", file); break;
- default: fputs("S-lock on", file); break;
- }
-
- rwlock = cell->old_wait_rw_lock;
-
- fprintf(file,
- " RW-latch at %p created in file %s line %lu\n",
- (void*) rwlock, rwlock->cfile_name,
- (ulong) rwlock->cline);
- writer = rw_lock_get_writer(rwlock);
- if (writer != RW_LOCK_NOT_LOCKED) {
- fprintf(file,
- "a writer (thread id %lu) has"
- " reserved it in mode %s",
- (ulong) os_thread_pf(rwlock->writer_thread),
- writer == RW_LOCK_EX
- ? " exclusive\n"
- : " wait exclusive\n");
- }
-
- fprintf(file,
- "number of readers %lu, waiters flag %lu, "
- "lock_word: %ld\n"
- "Last time read locked in file %s line %lu\n"
- "Last time write locked in file %s line %lu\n",
- (ulong) rw_lock_get_reader_count(rwlock),
- (ulong) rwlock->waiters,
- rwlock->lock_word,
- rwlock->last_s_file_name,
- (ulong) rwlock->last_s_line,
- rwlock->last_x_file_name,
- (ulong) rwlock->last_x_line);
- } else {
- ut_error;
- }
-
- if (!cell->waiting) {
- fputs("wait has ended\n", file);
- }
-}
-
-#ifdef UNIV_SYNC_DEBUG
-/**********************************************************************
-Looks for a cell with the given thread id. */
-static
-sync_cell_t*
-sync_array_find_thread(
-/*===================*/
- /* out: pointer to cell or NULL
- if not found */
- sync_array_t* arr, /* in: wait array */
- os_thread_id_t thread) /* in: thread id */
-{
- ulint i;
- sync_cell_t* cell;
-
- for (i = 0; i < arr->n_cells; i++) {
-
- cell = sync_array_get_nth_cell(arr, i);
-
- if (cell->wait_object != NULL
- && os_thread_eq(cell->thread, thread)
- && cell->waiting)) {
-
- return(cell); /* Found */
- }
- }
-
- return(NULL); /* Not found */
-}
-
-/**********************************************************************
-Recursion step for deadlock detection. */
-static
-ibool
-sync_array_deadlock_step(
-/*=====================*/
- /* out: TRUE if deadlock detected */
- sync_array_t* arr, /* in: wait array; NOTE! the caller must
- own the mutex to array */
- sync_cell_t* start, /* in: cell where recursive search
- started */
- os_thread_id_t thread, /* in: thread to look at */
- ulint pass, /* in: pass value */
- ulint depth) /* in: recursion depth */
-{
- sync_cell_t* new;
- ibool ret;
-
- depth++;
-
- if (pass != 0) {
- /* If pass != 0, then we do not know which threads are
- responsible of releasing the lock, and no deadlock can
- be detected. */
-
- return(FALSE);
- }
-
- new = sync_array_find_thread(arr, thread);
-
- if (new == start) {
- /* Stop running of other threads */
-
- ut_dbg_stop_threads = TRUE;
-
- /* Deadlock */
- fputs("########################################\n"
- "DEADLOCK of threads detected!\n", stderr);
-
- return(TRUE);
-
- } else if (new) {
- ret = sync_array_detect_deadlock(arr, start, new, depth);
-
- if (ret) {
- return(TRUE);
- }
- }
- return(FALSE);
-}
-
-/**********************************************************************
-This function is called only in the debug version. Detects a deadlock
-of one or more threads because of waits of semaphores. */
-static
-ibool
-sync_array_detect_deadlock(
-/*=======================*/
- /* out: TRUE if deadlock detected */
- sync_array_t* arr, /* in: wait array; NOTE! the caller must
- own the mutex to array */
- sync_cell_t* start, /* in: cell where recursive search started */
- sync_cell_t* cell, /* in: cell to search */
- ulint depth) /* in: recursion depth */
-{
- mutex_t* mutex;
- rw_lock_t* lock;
- os_thread_id_t thread;
- ibool ret;
- rw_lock_debug_t*debug;
-
- ut_a(arr);
- ut_a(start);
- ut_a(cell);
- ut_ad(cell->wait_object);
- ut_ad(os_thread_get_curr_id() == start->thread);
- ut_ad(depth < 100);
-
- depth++;
-
- if (!cell->waiting) {
-
- return(FALSE); /* No deadlock here */
- }
-
- if (cell->request_type == SYNC_MUTEX) {
-
- mutex = cell->wait_object;
-
- if (mutex_get_lock_word(mutex) != 0) {
-
- thread = mutex->thread_id;
-
- /* Note that mutex->thread_id above may be
- also OS_THREAD_ID_UNDEFINED, because the
- thread which held the mutex maybe has not
- yet updated the value, or it has already
- released the mutex: in this case no deadlock
- can occur, as the wait array cannot contain
- a thread with ID_UNDEFINED value. */
-
- ret = sync_array_deadlock_step(arr, start, thread, 0,
- depth);
- if (ret) {
- fprintf(stderr,
- "Mutex %p owned by thread %lu file %s line %lu\n",
- mutex, (ulong) os_thread_pf(mutex->thread_id),
- mutex->file_name, (ulong) mutex->line);
- sync_array_cell_print(stderr, cell);
-
- return(TRUE);
- }
- }
-
- return(FALSE); /* No deadlock */
-
- } else if (cell->request_type == RW_LOCK_EX
- || cell->request_type == RW_LOCK_WAIT_EX) {
-
- lock = cell->wait_object;
-
- debug = UT_LIST_GET_FIRST(lock->debug_list);
-
- while (debug != NULL) {
-
- thread = debug->thread_id;
-
- if (((debug->lock_type == RW_LOCK_EX)
- && !os_thread_eq(thread, cell->thread))
- || ((debug->lock_type == RW_LOCK_WAIT_EX)
- && !os_thread_eq(thread, cell->thread))
- || (debug->lock_type == RW_LOCK_SHARED)) {
-
- /* The (wait) x-lock request can block
- infinitely only if someone (can be also cell
- thread) is holding s-lock, or someone
- (cannot be cell thread) (wait) x-lock, and
- he is blocked by start thread */
-
- ret = sync_array_deadlock_step(
- arr, start, thread, debug->pass,
- depth);
- if (ret) {
-print:
- fprintf(stderr, "rw-lock %p ",
- (void*) lock);
- sync_array_cell_print(stderr, cell);
- rw_lock_debug_print(debug);
- return(TRUE);
- }
- }
-
- debug = UT_LIST_GET_NEXT(list, debug);
- }
-
- return(FALSE);
-
- } else if (cell->request_type == RW_LOCK_SHARED) {
-
- lock = cell->wait_object;
- debug = UT_LIST_GET_FIRST(lock->debug_list);
-
- while (debug != NULL) {
-
- thread = debug->thread_id;
-
- if ((debug->lock_type == RW_LOCK_EX)
- || (debug->lock_type == RW_LOCK_WAIT_EX)) {
-
- /* The s-lock request can block infinitely
- only if someone (can also be cell thread) is
- holding (wait) x-lock, and he is blocked by
- start thread */
-
- ret = sync_array_deadlock_step(
- arr, start, thread, debug->pass,
- depth);
- if (ret) {
- goto print;
- }
- }
-
- debug = UT_LIST_GET_NEXT(list, debug);
- }
-
- return(FALSE);
-
- } else {
- ut_error;
- }
-
- return(TRUE); /* Execution never reaches this line: for compiler
- fooling only */
-}
-#endif /* UNIV_SYNC_DEBUG */
-
-/**********************************************************************
-Determines if we can wake up the thread waiting for a sempahore. */
-static
-ibool
-sync_arr_cell_can_wake_up(
-/*======================*/
- sync_cell_t* cell) /* in: cell to search */
-{
- mutex_t* mutex;
- rw_lock_t* lock;
-
- if (cell->request_type == SYNC_MUTEX) {
-
- mutex = cell->wait_object;
-
- if (mutex_get_lock_word(mutex) == 0) {
-
- return(TRUE);
- }
-
- } else if (cell->request_type == RW_LOCK_EX) {
-
- lock = cell->wait_object;
-
- /* X_LOCK_DECR is the unlocked state */
- if (lock->lock_word == X_LOCK_DECR) {
-
- return(TRUE);
- }
-
- } else if (cell->request_type == RW_LOCK_WAIT_EX) {
-
- lock = cell->wait_object;
-
- /* lock_word == 0 means all readers have left */
- if (lock->lock_word == 0) {
-
- return(TRUE);
- }
- } else if (cell->request_type == RW_LOCK_SHARED) {
- lock = cell->wait_object;
-
- /* lock_word > 0 means no writer or reserved writer */
- if (lock->lock_word > 0) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/**********************************************************************
-Frees the cell. NOTE! sync_array_wait_event frees the cell
-automatically! */
-
-void
-sync_array_free_cell(
-/*=================*/
- sync_array_t* arr, /* in: wait array */
- ulint index) /* in: index of the cell in array */
-{
- sync_cell_t* cell;
-
- sync_array_enter(arr);
-
- cell = sync_array_get_nth_cell(arr, index);
-
- ut_a(cell->wait_object != NULL);
-
- cell->waiting = FALSE;
- cell->wait_object = NULL;
- cell->signal_count = 0;
-
- ut_a(arr->n_reserved > 0);
- arr->n_reserved--;
-
- sync_array_exit(arr);
-}
-
-/**************************************************************************
-Increments the signalled count. */
-
-void
-sync_array_object_signalled(
-/*========================*/
- sync_array_t* arr) /* in: wait array */
-{
-#ifdef UNIV_SYNC_ATOMIC
- (void)os_atomic_increment((volatile lint *)&(arr->sg_count), 1);
-#else
- sync_array_enter(arr);
-
- arr->sg_count++;
-
- sync_array_exit(arr);
-#endif
-}
-
-/**************************************************************************
-If the wakeup algorithm does not work perfectly at semaphore relases,
-this function will do the waking (see the comment in mutex_exit). This
-function should be called about every 1 second in the server.
-
-Note that there's a race condition between this thread and mutex_exit
-changing the lock_word and calling signal_object, so sometimes this finds
-threads to wake up even when nothing has gone wrong. */
-
-void
-sync_arr_wake_threads_if_sema_free(void)
-/*====================================*/
-{
- sync_array_t* arr = sync_primary_wait_array;
- sync_cell_t* cell;
- ulint count;
- ulint i;
- os_event_t event;
-
- sync_array_enter(arr);
-
- i = 0;
- count = 0;
-
- while (count < arr->n_reserved) {
-
- cell = sync_array_get_nth_cell(arr, i);
- i++;
-
- if (cell->wait_object == NULL) {
- continue;
- }
- count++;
-
- if (!cell->waiting) {
- continue;
- }
-
- if (sync_arr_cell_can_wake_up(cell)) {
-
- event = sync_cell_get_event(cell);
-
- os_event_set(event);
- sync_wake_ups++;
- }
-
- }
-
- sync_array_exit(arr);
-}
-
-/**************************************************************************
-Prints warnings of long semaphore waits to stderr. */
-
-ibool
-sync_array_print_long_waits(void)
-/*=============================*/
- /* out: TRUE if fatal semaphore wait threshold
- was exceeded */
-{
- sync_cell_t* cell;
- ibool old_val;
- ibool noticed = FALSE;
- ulint i;
- ulint fatal_timeout = srv_fatal_semaphore_wait_threshold;
- ibool fatal = FALSE;
-
- for (i = 0; i < sync_primary_wait_array->n_cells; i++) {
-
- cell = sync_array_get_nth_cell(sync_primary_wait_array, i);
-
- if (cell->wait_object != NULL && cell->waiting
- && difftime(time(NULL), cell->reservation_time) > 240) {
- fputs("InnoDB: Warning: a long semaphore wait:\n",
- stderr);
- sync_array_cell_print(stderr, cell);
- noticed = TRUE;
- }
-
- if (cell->wait_object != NULL && cell->waiting
- && difftime(time(NULL), cell->reservation_time)
- > fatal_timeout) {
- fatal = TRUE;
- }
- }
-
- if (noticed) {
- fprintf(stderr,
- "InnoDB: ###### Starts InnoDB Monitor"
- " for 30 secs to print diagnostic info:\n");
- old_val = srv_print_innodb_monitor;
-
- /* If some crucial semaphore is reserved, then also the InnoDB
- Monitor can hang, and we do not get diagnostics. Since in
- many cases an InnoDB hang is caused by a pwrite() or a pread()
- call hanging inside the operating system, let us print right
- now the values of pending calls of these. */
-
- fprintf(stderr,
- "InnoDB: Pending preads %lu, pwrites %lu\n",
- (ulong)os_file_n_pending_preads,
- (ulong)os_file_n_pending_pwrites);
-
- srv_print_innodb_monitor = TRUE;
- os_event_set(srv_lock_timeout_thread_event);
-
- os_thread_sleep(30000000);
-
- srv_print_innodb_monitor = old_val;
- fprintf(stderr,
- "InnoDB: ###### Diagnostic info printed"
- " to the standard error stream\n");
- }
-
- return(fatal);
-}
-
-/**************************************************************************
-Prints info of the wait array. */
-static
-void
-sync_array_output_info(
-/*===================*/
- FILE* file, /* in: file where to print */
- sync_array_t* arr) /* in: wait array; NOTE! caller must own the
- mutex */
-{
- sync_cell_t* cell;
- ulint count;
- ulint i;
-
- fprintf(file,
- "OS WAIT ARRAY INFO: reservation count %ld, signal count %ld\n",
- (long) arr->res_count, (long) arr->sg_count);
- i = 0;
- count = 0;
-
- while (count < arr->n_reserved) {
-
- cell = sync_array_get_nth_cell(arr, i);
-
- if (cell->wait_object != NULL) {
- count++;
- sync_array_cell_print(file, cell);
- }
-
- i++;
- }
-}
-
-/**************************************************************************
-Prints info of the wait array. */
-
-void
-sync_array_print_info(
-/*==================*/
- FILE* file, /* in: file where to print */
- sync_array_t* arr) /* in: wait array */
-{
- sync_array_enter(arr);
-
- sync_array_output_info(file, arr);
-
- sync_array_exit(arr);
-}
diff --git a/storage/innobase/sync/sync0rw.c b/storage/innobase/sync/sync0rw.c
deleted file mode 100644
index 2fcf75009a6..00000000000
--- a/storage/innobase/sync/sync0rw.c
+++ /dev/null
@@ -1,997 +0,0 @@
-/******************************************************
-The read-write lock (for thread synchronization)
-
-(c) 1995 Innobase Oy
-
-Created 9/11/1995 Heikki Tuuri
-*******************************************************/
-
-#include "sync0rw.h"
-#ifdef UNIV_NONINL
-#include "sync0rw.ic"
-#endif
-
-#include "os0thread.h"
-#include "mem0mem.h"
-#include "srv0srv.h"
-
-/*
- IMPLEMENTATION OF THE RW_LOCK
- =============================
-The status of a rw_lock is held in lock_word. The initial value of lock_word is
-X_LOCK_DECR. lock_word is decremented by 1 for each s-lock and by X_LOCK_DECR
-for each x-lock. This describes the lock state for each value of lock_word:
-
-lock_word == X_LOCK_DECR: Unlocked.
-0 < lock_word < X_LOCK_DECR: Read locked, no waiting writers.
- (X_LOCK_DECR - lock_word) is the
- number of readers that hold the lock.
-lock_word == 0: Write locked
--X_LOCK_DECR < lock_word < 0: Read locked, with a waiting writer.
- (-lock_word) is the number of readers
- that hold the lock.
-lock_word <= -X_LOCK_DECR: Recursively write locked. lock_word has been
- decremented by X_LOCK_DECR once for each lock,
- so the number of locks is:
- ((-lock_word) / X_LOCK_DECR) + 1
-When lock_word <= -X_LOCK_DECR, we also know that lock_word % X_LOCK_DECR == 0:
-other values of lock_word are invalid.
-
-The lock_word is always read and updated atomically and consistently, so that
-it always represents the state of the lock, and the state of the lock changes
-with a single atomic operation. This lock_word holds all of the information
-that a thread needs in order to determine if it is eligible to gain the lock
-or if it must spin or sleep. The one exception to this is that writer_thread
-must be verified before recursive write locks: to solve this scenario, we make
-writer_thread readable by all threads, but only writeable by the x-lock holder.
-
-The other members of the lock obey the following rules to remain consistent:
-
-pass: This is only set to 1 to prevent recursive x-locks. It must
- be set as specified by x_lock caller after the lock_word
- indicates that the thread holds the lock, but before that
- thread resumes execution. It must also be set to 1 during the
- final x_unlock, but before the lock_word status is updated.
- When an x_lock or move_ownership call wishes to change
- pass, it must first update the writer_thread appropriately.
-writer_thread: Must be set to the writers thread_id after the lock_word
- indicates that the thread holds the lock, but before that
- thread resumes execution. writer_thread may be invalid and
- should not be read when pass == 1. A thread trying to become
- writer never reads its own stale writer_thread, since it sets
- pass during its previous unlock call.
-waiters: May be set to 1 anytime, but to avoid unnecessary wake-up
- signals, it should only be set to 1 when there are threads
- waiting on event. Must be 1 when a writer starts waiting to
- ensure the current x-locking thread sends a wake-up signal
- during unlock. May only be reset to 0 immediately before a
- a wake-up signal is sent to event.
-event: Threads wait on event for read or writer lock when another
- thread has an x-lock or an x-lock reservation (wait_ex). A
- thread may only wait on event after performing the following
- actions in order:
- (1) Record the counter value of event (with os_event_reset).
- (2) Set waiters to 1.
- (3) Verify lock_word <= 0.
- (1) must come before (2) to ensure signal is not missed.
- (2) must come before (3) to ensure a signal is sent.
- These restrictions force the above ordering.
- Immediately before sending the wake-up signal, we should:
- (1) Verify lock_word == X_LOCK_DECR (unlocked)
- (2) Reset waiters to 0.
-wait_ex_event: A thread may only wait on the wait_ex_event after it has
- performed the following actions in order:
- (1) Decrement lock_word by X_LOCK_DECR.
- (2) Record counter value of wait_ex_event (os_event_reset,
- called from sync_array_reserve_cell).
- (3) Verify that lock_word < 0.
- (1) must come first to ensures no other threads become reader
- or next writer, and notifies unlocker that signal must be sent.
- (2) must come before (3) to ensure the signal is not missed.
- These restrictions force the above ordering.
- Immediately before sending the wake-up signal, we should:
- Verify lock_word == 0 (waiting thread holds x_lock)
-*/
-
-
-/* number of spin waits on rw-latches,
-resulted during shared (read) locks */
-ib_longlong rw_s_spin_wait_count = 0;
-ib_longlong rw_s_spin_round_count = 0;
-
-/* number of OS waits on rw-latches,
-resulted during shared (read) locks */
-ib_longlong rw_s_os_wait_count = 0;
-
-/* number of unlocks (that unlock shared locks),
-set only when UNIV_SYNC_PERF_STAT is defined */
-ib_longlong rw_s_exit_count = 0;
-
-/* number of spin waits on rw-latches,
-resulted during exclusive (write) locks */
-ib_longlong rw_x_spin_wait_count = 0;
-ib_longlong rw_x_spin_round_count = 0;
-
-/* number of OS waits on rw-latches,
-resulted during exclusive (write) locks */
-ib_longlong rw_x_os_wait_count = 0;
-
-/* number of unlocks (that unlock exclusive locks),
-set only when UNIV_SYNC_PERF_STAT is defined */
-ib_longlong rw_x_exit_count = 0;
-
-/* The global list of rw-locks */
-rw_lock_list_t rw_lock_list;
-mutex_t rw_lock_list_mutex;
-
-#ifdef UNIV_SYNC_DEBUG
-/* The global mutex which protects debug info lists of all rw-locks.
-To modify the debug info list of an rw-lock, this mutex has to be
-acquired in addition to the mutex protecting the lock. */
-
-mutex_t rw_lock_debug_mutex;
-os_event_t rw_lock_debug_event; /* If deadlock detection does not
- get immediately the mutex, it may
- wait for this event */
-ibool rw_lock_debug_waiters; /* This is set to TRUE, if there may
- be waiters for the event */
-
-/**********************************************************************
-Creates a debug info struct. */
-static
-rw_lock_debug_t*
-rw_lock_debug_create(void);
-/*======================*/
-/**********************************************************************
-Frees a debug info struct. */
-static
-void
-rw_lock_debug_free(
-/*===============*/
- rw_lock_debug_t* info);
-
-/**********************************************************************
-Creates a debug info struct. */
-static
-rw_lock_debug_t*
-rw_lock_debug_create(void)
-/*======================*/
-{
- return((rw_lock_debug_t*) mem_alloc(sizeof(rw_lock_debug_t)));
-}
-
-/**********************************************************************
-Frees a debug info struct. */
-static
-void
-rw_lock_debug_free(
-/*===============*/
- rw_lock_debug_t* info)
-{
- mem_free(info);
-}
-#endif /* UNIV_SYNC_DEBUG */
-
-/**********************************************************************
-Creates, or rather, initializes an rw-lock object in a specified memory
-location (which must be appropriately aligned). The rw-lock is initialized
-to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free
-is necessary only if the memory block containing it is freed. */
-
-void
-rw_lock_create_func(
-/*================*/
- rw_lock_t* lock, /* in: pointer to memory */
-#ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
- ulint level, /* in: level */
-# endif /* UNIV_SYNC_DEBUG */
- const char* cmutex_name, /* in: mutex name */
-#endif /* UNIV_DEBUG */
- const char* cfile_name, /* in: file name where created */
- ulint cline) /* in: file line where created */
-{
- /* If this is the very first time a synchronization object is
- created, then the following call initializes the sync system. */
-
-#ifndef UNIV_SYNC_ATOMIC
- mutex_create(rw_lock_get_mutex(lock), SYNC_NO_ORDER_CHECK);
-
- lock->mutex.cfile_name = cfile_name;
- lock->mutex.cline = cline;
-
-#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
- lock->mutex.cmutex_name = cmutex_name;
- lock->mutex.mutex_type = 1;
-#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
-
-#endif /* UNIV_SYNC_ATOMIC */
-
- lock->lock_word = X_LOCK_DECR;
- lock->waiters = 0;
- lock->pass = 1;
- /* We do not have to initialize writer_thread until pass == 0 */
-
-#ifdef UNIV_SYNC_DEBUG
- UT_LIST_INIT(lock->debug_list);
-
- lock->level = level;
-#endif /* UNIV_SYNC_DEBUG */
-
- lock->magic_n = RW_LOCK_MAGIC_N;
-
- lock->cfile_name = cfile_name;
- lock->cline = (unsigned int) cline;
-
- lock->count_os_wait = 0;
- lock->last_s_file_name = "not yet reserved";
- lock->last_x_file_name = "not yet reserved";
- lock->last_s_line = 0;
- lock->last_x_line = 0;
- lock->event = os_event_create(NULL);
- lock->wait_ex_event = os_event_create(NULL);
-
- mutex_enter(&rw_lock_list_mutex);
-
- if (UT_LIST_GET_LEN(rw_lock_list) > 0) {
- ut_a(UT_LIST_GET_FIRST(rw_lock_list)->magic_n
- == RW_LOCK_MAGIC_N);
- }
-
- UT_LIST_ADD_FIRST(list, rw_lock_list, lock);
-
- mutex_exit(&rw_lock_list_mutex);
-}
-
-/**********************************************************************
-Calling this function is obligatory only if the memory buffer containing
-the rw-lock is freed. Removes an rw-lock object from the global list. The
-rw-lock is checked to be in the non-locked state. */
-
-void
-rw_lock_free(
-/*=========*/
- rw_lock_t* lock) /* in: rw-lock */
-{
- ut_ad(rw_lock_validate(lock));
- ut_a(lock->lock_word == X_LOCK_DECR);
-
- lock->magic_n = 0;
-
-#ifndef UNIV_SYNC_ATOMIC
- mutex_free(rw_lock_get_mutex(lock));
-#endif /* UNIV_SYNC_ATOMIC */
-
- mutex_enter(&rw_lock_list_mutex);
- os_event_free(lock->event);
-
- os_event_free(lock->wait_ex_event);
-
- if (UT_LIST_GET_PREV(list, lock)) {
- ut_a(UT_LIST_GET_PREV(list, lock)->magic_n == RW_LOCK_MAGIC_N);
- }
- if (UT_LIST_GET_NEXT(list, lock)) {
- ut_a(UT_LIST_GET_NEXT(list, lock)->magic_n == RW_LOCK_MAGIC_N);
- }
-
- UT_LIST_REMOVE(list, rw_lock_list, lock);
-
- mutex_exit(&rw_lock_list_mutex);
-}
-
-#ifdef UNIV_DEBUG
-/**********************************************************************
-Checks that the rw-lock has been initialized and that there are no
-simultaneous shared and exclusive locks. */
-
-ibool
-rw_lock_validate(
-/*=============*/
- rw_lock_t* lock)
-{
- ut_a(lock);
-
- ulint waiters = rw_lock_get_waiters(lock);
- lint lock_word = lock->lock_word;
-
- ut_a(lock->magic_n == RW_LOCK_MAGIC_N);
- ut_a(waiters == 0 || waiters == 1);
- ut_a(lock_word > -X_LOCK_DECR ||(-lock_word) % X_LOCK_DECR == 0);
-
- return(TRUE);
-}
-#endif /* UNIV_DEBUG */
-
-/**********************************************************************
-Lock an rw-lock in shared mode for the current thread. If the rw-lock is
-locked in exclusive mode, or there is an exclusive lock request waiting,
-the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
-for the lock, before suspending the thread. */
-
-void
-rw_lock_s_lock_spin(
-/*================*/
- rw_lock_t* lock, /* in: pointer to rw-lock */
- ulint pass, /* in: pass value; != 0, if the lock
- will be passed to another thread to unlock */
- const char* file_name, /* in: file name where lock requested */
- ulint line) /* in: line where requested */
-{
- ulint index; /* index of the reserved wait cell */
- ulint i = 0; /* spin round count */
-
- ut_ad(rw_lock_validate(lock));
-
- rw_s_spin_wait_count++; /* Count calls to this function */
-lock_loop:
-
- /* Spin waiting for the writer field to become free */
- while (i < SYNC_SPIN_ROUNDS && lock->lock_word <= 0) {
- if (srv_spin_wait_delay) {
- ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
- }
-
- i++;
- }
-
- if (i == SYNC_SPIN_ROUNDS) {
- os_thread_yield();
- }
-
- if (srv_print_latch_waits) {
- fprintf(stderr,
- "Thread %lu spin wait rw-s-lock at %p"
- " cfile %s cline %lu rnds %lu\n",
- (ulong) os_thread_pf(os_thread_get_curr_id()),
- (void*) lock,
- lock->cfile_name, (ulong) lock->cline, (ulong) i);
- }
-
- /* We try once again to obtain the lock */
- if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) {
- rw_s_spin_round_count += i;
-
- return; /* Success */
- } else {
-
- if (i < SYNC_SPIN_ROUNDS) {
- goto lock_loop;
- }
-
- rw_s_spin_round_count += i;
-
- sync_array_reserve_cell(sync_primary_wait_array,
- lock, RW_LOCK_SHARED,
- file_name, line,
- &index);
-
- /* Set waiters before checking lock_word to ensure wake-up
- signal is sent. This may lead to some unnecessary signals. */
- rw_lock_set_waiters(lock);
-
- if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) {
- sync_array_free_cell(sync_primary_wait_array, index);
- return; /* Success */
- }
-
- if (srv_print_latch_waits) {
- fprintf(stderr,
- "Thread %lu OS wait rw-s-lock at %p"
- " cfile %s cline %lu\n",
- os_thread_pf(os_thread_get_curr_id()),
- (void*) lock, lock->cfile_name,
- (ulong) lock->cline);
- }
-
- /* these stats may not be accurate */
- lock->count_os_wait++;
- rw_s_os_wait_count++;
-
- sync_array_wait_event(sync_primary_wait_array, index);
-
- i = 0;
- goto lock_loop;
- }
-}
-
-/**********************************************************************
-This function is used in the insert buffer to move the ownership of an
-x-latch on a buffer frame to the current thread. The x-latch was set by
-the buffer read operation and it protected the buffer frame while the
-read was done. The ownership is moved because we want that the current
-thread is able to acquire a second x-latch which is stored in an mtr.
-This, in turn, is needed to pass the debug checks of index page
-operations. */
-
-void
-rw_lock_x_lock_move_ownership(
-/*==========================*/
- rw_lock_t* lock) /* in: lock which was x-locked in the
- buffer read */
-{
- ut_ad(rw_lock_is_locked(lock, RW_LOCK_EX));
-
-#ifdef UNIV_SYNC_ATOMIC
- lock->writer_thread = os_thread_get_curr_id();
- os_memory_barrier_store();
- lock->pass = 0;
-#else /* UNIV_SYNC_ATOMIC */
- mutex_enter(&(lock->mutex));
- lock->writer_thread = os_thread_get_curr_id();
- lock->pass = 0;
- mutex_exit(&(lock->mutex));
-#endif /* UNIV_SYNC_ATOMIC */
-}
-
-/**********************************************************************
-Function for the next writer to call. Waits for readers to exit.
-The caller must have already decremented lock_word by X_LOCK_DECR.*/
-UNIV_INLINE
-void
-rw_lock_x_lock_wait(
-/*================*/
- rw_lock_t* lock, /* in: pointer to rw-lock */
-#ifdef UNIV_SYNC_DEBUG
- ulint pass, /* in: pass value; != 0, if the lock will
- be passed to another thread to unlock */
-#endif
- const char* file_name,/* in: file name where lock requested */
- ulint line) /* in: line where requested */
-{
- ulint index;
- ulint i = 0;
-
- ut_ad(lock->lock_word <= 0);
-
- while (lock->lock_word < 0) {
- if (srv_spin_wait_delay) {
- ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
- }
- if(i < SYNC_SPIN_ROUNDS) {
- i++;
- continue;
- }
-
- /* If there is still a reader, then go to sleep.*/
- rw_x_spin_round_count += i;
- i = 0;
- sync_array_reserve_cell(sync_primary_wait_array,
- lock,
- RW_LOCK_WAIT_EX,
- file_name, line,
- &index);
- /* Check lock_word to ensure wake-up isn't missed.*/
- if(lock->lock_word < 0) {
-
- /* these stats may not be accurate */
- lock->count_os_wait++;
- rw_x_os_wait_count++;
-
- /* Add debug info as it is needed to detect possible
- deadlock. We must add info for WAIT_EX thread for
- deadlock detection to work properly. */
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_add_debug_info(lock, pass, RW_LOCK_WAIT_EX,
- file_name, line);
-#endif
-
- sync_array_wait_event(sync_primary_wait_array,
- index);
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_remove_debug_info(lock, pass,
- RW_LOCK_WAIT_EX);
-#endif
- /* It is possible to wake when lock_word < 0.
- We must pass the while-loop check to proceed.*/
- } else {
- sync_array_free_cell(sync_primary_wait_array,
- index);
- }
- }
- rw_x_spin_round_count += i;
-}
-
-/**********************************************************************
-Low-level function for acquiring an exclusive lock. */
-UNIV_INLINE
-ibool
-rw_lock_x_lock_low(
-/*===============*/
- /* out: RW_LOCK_NOT_LOCKED if did
- not succeed, RW_LOCK_EX if success. */
- rw_lock_t* lock, /* in: pointer to rw-lock */
- ulint pass, /* in: pass value; != 0, if the lock will
- be passed to another thread to unlock */
- const char* file_name,/* in: file name where lock requested */
- ulint line) /* in: line where requested */
-{
- os_thread_id_t curr_thread = os_thread_get_curr_id();
-
- if(rw_lock_lock_word_decr(lock, X_LOCK_DECR)) {
- ut_ad(lock->pass);
-
- /* Decrement occurred: we are writer or next-writer. */
- lock->writer_thread = curr_thread;
- lock->pass = pass;
- rw_lock_x_lock_wait(lock,
-#ifdef UNIV_SYNC_DEBUG
- pass,
-#endif
- file_name, line);
-
- } else {
- /* Decrement failed: relock or failed lock */
- /* Must verify pass first: otherwise another thread can
- call move_ownership suddenly allowing recursive locks.
- and after we have verified our thread_id matches
- (though move_ownership has since changed it).*/
- if(!pass && !(lock->pass) &&
- os_thread_eq(lock->writer_thread, curr_thread)) {
- /* Relock */
- lock->lock_word -= X_LOCK_DECR;
- } else {
- /* Another thread locked before us */
- return(FALSE);
- }
- }
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_add_debug_info(lock, pass, RW_LOCK_EX,
- file_name, line);
-#endif
- lock->last_x_file_name = file_name;
- lock->last_x_line = (unsigned int) line;
-
- return(TRUE);
-}
-
-/**********************************************************************
-NOTE! Use the corresponding macro, not directly this function! Lock an
-rw-lock in exclusive mode for the current thread. If the rw-lock is locked
-in shared or exclusive mode, or there is an exclusive lock request waiting,
-the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
-for the lock before suspending the thread. If the same thread has an x-lock
-on the rw-lock, locking succeed, with the following exception: if pass != 0,
-only a single x-lock may be taken on the lock. NOTE: If the same thread has
-an s-lock, locking does not succeed! */
-
-void
-rw_lock_x_lock_func(
-/*================*/
- rw_lock_t* lock, /* in: pointer to rw-lock */
- ulint pass, /* in: pass value; != 0, if the lock will
- be passed to another thread to unlock */
- const char* file_name,/* in: file name where lock requested */
- ulint line) /* in: line where requested */
-{
- ulint index; /* index of the reserved wait cell */
- ulint i; /* spin round count */
- ibool spinning = FALSE;
-
- ut_ad(rw_lock_validate(lock));
-
- i = 0;
-
-lock_loop:
-
- if (rw_lock_x_lock_low(lock, pass, file_name, line)) {
- rw_x_spin_round_count += i;
-
- return; /* Locking succeeded */
-
- } else {
-
- if (!spinning) {
- spinning = TRUE;
- rw_x_spin_wait_count++;
- }
-
- /* Spin waiting for the lock_word to become free */
- while (i < SYNC_SPIN_ROUNDS
- && lock->lock_word <= 0) {
- if (srv_spin_wait_delay) {
- ut_delay(ut_rnd_interval(0,
- srv_spin_wait_delay));
- }
-
- i++;
- }
- if (i == SYNC_SPIN_ROUNDS) {
- os_thread_yield();
- } else {
- goto lock_loop;
- }
- }
-
- rw_x_spin_round_count += i;
-
- if (srv_print_latch_waits) {
- fprintf(stderr,
- "Thread %lu spin wait rw-x-lock at %p"
- " cfile %s cline %lu rnds %lu\n",
- os_thread_pf(os_thread_get_curr_id()), (void*) lock,
- lock->cfile_name, (ulong) lock->cline, (ulong) i);
- }
-
- sync_array_reserve_cell(sync_primary_wait_array,
- lock,
- RW_LOCK_EX,
- file_name, line,
- &index);
-
- /* Waiters must be set before checking lock_word, to ensure signal
- is sent. This could lead to a few unnecessary wake-up signals. */
- rw_lock_set_waiters(lock);
-
- if (rw_lock_x_lock_low(lock, pass, file_name, line)) {
- sync_array_free_cell(sync_primary_wait_array, index);
- return; /* Locking succeeded */
- }
-
- if (srv_print_latch_waits) {
- fprintf(stderr,
- "Thread %lu OS wait for rw-x-lock at %p"
- " cfile %s cline %lu\n",
- os_thread_pf(os_thread_get_curr_id()), (void*) lock,
- lock->cfile_name, (ulong) lock->cline);
- }
-
- /* these stats may not be accurate */
- lock->count_os_wait++;
- rw_x_os_wait_count++;
-
- sync_array_wait_event(sync_primary_wait_array, index);
-
- i = 0;
- goto lock_loop;
-}
-
-#ifdef UNIV_SYNC_DEBUG
-/**********************************************************************
-Acquires the debug mutex. We cannot use the mutex defined in sync0sync,
-because the debug mutex is also acquired in sync0arr while holding the OS
-mutex protecting the sync array, and the ordinary mutex_enter might
-recursively call routines in sync0arr, leading to a deadlock on the OS
-mutex. */
-
-void
-rw_lock_debug_mutex_enter(void)
-/*==========================*/
-{
-loop:
- if (0 == mutex_enter_nowait(&rw_lock_debug_mutex)) {
- return;
- }
-
- os_event_reset(rw_lock_debug_event);
-
- rw_lock_debug_waiters = TRUE;
-
- if (0 == mutex_enter_nowait(&rw_lock_debug_mutex)) {
- return;
- }
-
- os_event_wait(rw_lock_debug_event);
-
- goto loop;
-}
-
-/**********************************************************************
-Releases the debug mutex. */
-
-void
-rw_lock_debug_mutex_exit(void)
-/*==========================*/
-{
- mutex_exit(&rw_lock_debug_mutex);
-
- if (rw_lock_debug_waiters) {
- rw_lock_debug_waiters = FALSE;
- os_event_set(rw_lock_debug_event);
- }
-}
-
-/**********************************************************************
-Inserts the debug information for an rw-lock. */
-
-void
-rw_lock_add_debug_info(
-/*===================*/
- rw_lock_t* lock, /* in: rw-lock */
- ulint pass, /* in: pass value */
- ulint lock_type, /* in: lock type */
- const char* file_name, /* in: file where requested */
- ulint line) /* in: line where requested */
-{
- rw_lock_debug_t* info;
-
- ut_ad(lock);
- ut_ad(file_name);
-
- info = rw_lock_debug_create();
-
- rw_lock_debug_mutex_enter();
-
- info->file_name = file_name;
- info->line = line;
- info->lock_type = lock_type;
- info->thread_id = os_thread_get_curr_id();
- info->pass = pass;
-
- UT_LIST_ADD_FIRST(list, lock->debug_list, info);
-
- rw_lock_debug_mutex_exit();
-
- if ((pass == 0) && (lock_type != RW_LOCK_WAIT_EX)) {
- sync_thread_add_level(lock, lock->level);
- }
-}
-
-/**********************************************************************
-Removes a debug information struct for an rw-lock. */
-
-void
-rw_lock_remove_debug_info(
-/*======================*/
- rw_lock_t* lock, /* in: rw-lock */
- ulint pass, /* in: pass value */
- ulint lock_type) /* in: lock type */
-{
- rw_lock_debug_t* info;
-
- ut_ad(lock);
-
- if ((pass == 0) && (lock_type != RW_LOCK_WAIT_EX)) {
- sync_thread_reset_level(lock);
- }
-
- rw_lock_debug_mutex_enter();
-
- info = UT_LIST_GET_FIRST(lock->debug_list);
-
- while (info != NULL) {
- if ((pass == info->pass)
- && ((pass != 0)
- || os_thread_eq(info->thread_id,
- os_thread_get_curr_id()))
- && (info->lock_type == lock_type)) {
-
- /* Found! */
- UT_LIST_REMOVE(list, lock->debug_list, info);
- rw_lock_debug_mutex_exit();
-
- rw_lock_debug_free(info);
-
- return;
- }
-
- info = UT_LIST_GET_NEXT(list, info);
- }
-
- ut_error;
-}
-#endif /* UNIV_SYNC_DEBUG */
-
-#ifdef UNIV_SYNC_DEBUG
-/**********************************************************************
-Checks if the thread has locked the rw-lock in the specified mode, with
-the pass value == 0. */
-
-ibool
-rw_lock_own(
-/*========*/
- /* out: TRUE if locked */
- rw_lock_t* lock, /* in: rw-lock */
- ulint lock_type) /* in: lock type: RW_LOCK_SHARED,
- RW_LOCK_EX */
-{
- rw_lock_debug_t* info;
-
- ut_ad(lock);
- ut_ad(rw_lock_validate(lock));
-
- rw_lock_debug_mutex_enter();
-
- info = UT_LIST_GET_FIRST(lock->debug_list);
-
- while (info != NULL) {
-
- if (os_thread_eq(info->thread_id, os_thread_get_curr_id())
- && (info->pass == 0)
- && (info->lock_type == lock_type)) {
-
- rw_lock_debug_mutex_exit();
- /* Found! */
-
- return(TRUE);
- }
-
- info = UT_LIST_GET_NEXT(list, info);
- }
- rw_lock_debug_mutex_exit();
-
- return(FALSE);
-}
-#endif /* UNIV_SYNC_DEBUG */
-
-/**********************************************************************
-Checks if somebody has locked the rw-lock in the specified mode. */
-
-ibool
-rw_lock_is_locked(
-/*==============*/
- /* out: TRUE if locked */
- rw_lock_t* lock, /* in: rw-lock */
- ulint lock_type) /* in: lock type: RW_LOCK_SHARED,
- RW_LOCK_EX */
-{
- ibool ret = FALSE;
-
- ut_ad(lock);
- ut_ad(rw_lock_validate(lock));
-
- if (lock_type == RW_LOCK_SHARED) {
- if (rw_lock_get_reader_count(lock) > 0) {
- ret = TRUE;
- }
- } else if (lock_type == RW_LOCK_EX) {
- if (rw_lock_get_writer(lock) == RW_LOCK_EX) {
- ret = TRUE;
- }
- } else {
- ut_error;
- }
-
- return(ret);
-}
-
-#ifdef UNIV_SYNC_DEBUG
-/*******************************************************************
-Prints debug info of currently locked rw-locks. */
-
-void
-rw_lock_list_print_info(
-/*====================*/
- FILE* file) /* in: file where to print */
-{
- rw_lock_t* lock;
- ulint count = 0;
- rw_lock_debug_t* info;
-
- mutex_enter(&rw_lock_list_mutex);
-
- fputs("-------------\n"
- "RW-LATCH INFO\n"
- "-------------\n", file);
-
- lock = UT_LIST_GET_FIRST(rw_lock_list);
-
- while (lock != NULL) {
-
- count++;
-
-#ifndef UNIV_SYNC_ATOMIC
- mutex_enter(&(lock->mutex));
-#endif
- if (lock->lock_word != X_LOCK_DECR) {
-
- fprintf(file, "RW-LOCK: %p ", (void*) lock);
-
- if (rw_lock_get_waiters(lock)) {
- fputs(" Waiters for the lock exist\n", file);
- } else {
- putc('\n', file);
- }
-
- info = UT_LIST_GET_FIRST(lock->debug_list);
- while (info != NULL) {
- rw_lock_debug_print(info);
- info = UT_LIST_GET_NEXT(list, info);
- }
- }
-#ifndef UNIV_SYNC_ATOMIC
- mutex_exit(&(lock->mutex));
-#endif
-
- lock = UT_LIST_GET_NEXT(list, lock);
- }
-
- fprintf(file, "Total number of rw-locks %ld\n", count);
- mutex_exit(&rw_lock_list_mutex);
-}
-
-/*******************************************************************
-Prints debug info of an rw-lock. */
-
-void
-rw_lock_print(
-/*==========*/
- rw_lock_t* lock) /* in: rw-lock */
-{
- rw_lock_debug_t* info;
-
- fprintf(stderr,
- "-------------\n"
- "RW-LATCH INFO\n"
- "RW-LATCH: %p ", (void*) lock);
-
-#ifndef UNIV_SYNC_ATOMIC
- mutex_enter(&(lock->mutex));
-#endif
- if (lock->lock_word != X_LOCK_DECR) {
-
- if (rw_lock_get_waiters(lock)) {
- fputs(" Waiters for the lock exist\n", stderr);
- } else {
- putc('\n', stderr);
- }
-
- info = UT_LIST_GET_FIRST(lock->debug_list);
- while (info != NULL) {
- rw_lock_debug_print(info);
- info = UT_LIST_GET_NEXT(list, info);
- }
- }
-#ifndef UNIV_SYNC_ATOMIC
- mutex_exit(&(lock->mutex));
-#endif
-}
-
-/*************************************************************************
-Prints info of a debug struct. */
-
-void
-rw_lock_debug_print(
-/*================*/
- rw_lock_debug_t* info) /* in: debug struct */
-{
- ulint rwt;
-
- rwt = info->lock_type;
-
- fprintf(stderr, "Locked: thread %ld file %s line %ld ",
- (ulong) os_thread_pf(info->thread_id), info->file_name,
- (ulong) info->line);
- if (rwt == RW_LOCK_SHARED) {
- fputs("S-LOCK", stderr);
- } else if (rwt == RW_LOCK_EX) {
- fputs("X-LOCK", stderr);
- } else if (rwt == RW_LOCK_WAIT_EX) {
- fputs("WAIT X-LOCK", stderr);
- } else {
- ut_error;
- }
- if (info->pass != 0) {
- fprintf(stderr, " pass value %lu", (ulong) info->pass);
- }
- putc('\n', stderr);
-}
-
-/*******************************************************************
-Returns the number of currently locked rw-locks. Works only in the debug
-version. */
-
-ulint
-rw_lock_n_locked(void)
-/*==================*/
-{
- rw_lock_t* lock;
- ulint count = 0;
-
- mutex_enter(&rw_lock_list_mutex);
-
- lock = UT_LIST_GET_FIRST(rw_lock_list);
-
- while (lock != NULL) {
-
- if (lock->lock_word != X_LOCK_DECR) {
- count++;
- }
-
- lock = UT_LIST_GET_NEXT(list, lock);
- }
-
- mutex_exit(&rw_lock_list_mutex);
-
- return(count);
-}
-#endif /* UNIV_SYNC_DEBUG */
diff --git a/storage/innobase/sync/sync0sync.c b/storage/innobase/sync/sync0sync.c
deleted file mode 100644
index a8b1ac4926e..00000000000
--- a/storage/innobase/sync/sync0sync.c
+++ /dev/null
@@ -1,1425 +0,0 @@
-/******************************************************
-Mutex, the basic synchronization primitive
-
-(c) 1995 Innobase Oy
-
-Created 9/5/1995 Heikki Tuuri
-*******************************************************/
-
-#include "sync0sync.h"
-#ifdef UNIV_NONINL
-#include "sync0sync.ic"
-#endif
-
-#include "sync0rw.h"
-#include "buf0buf.h"
-#include "srv0srv.h"
-#include "buf0types.h"
-
-/*
- REASONS FOR IMPLEMENTING THE SPIN LOCK MUTEX
- ============================================
-
-Semaphore operations in operating systems are slow: Solaris on a 1993 Sparc
-takes 3 microseconds (us) for a lock-unlock pair and Windows NT on a 1995
-Pentium takes 20 microseconds for a lock-unlock pair. Therefore, we have to
-implement our own efficient spin lock mutex. Future operating systems may
-provide efficient spin locks, but we cannot count on that.
-
-Another reason for implementing a spin lock is that on multiprocessor systems
-it can be more efficient for a processor to run a loop waiting for the
-semaphore to be released than to switch to a different thread. A thread switch
-takes 25 us on both platforms mentioned above. See Gray and Reuter's book
-Transaction processing for background.
-
-How long should the spin loop last before suspending the thread? On a
-uniprocessor, spinning does not help at all, because if the thread owning the
-mutex is not executing, it cannot be released. Spinning actually wastes
-resources.
-
-On a multiprocessor, we do not know if the thread owning the mutex is
-executing or not. Thus it would make sense to spin as long as the operation
-guarded by the mutex would typically last assuming that the thread is
-executing. If the mutex is not released by that time, we may assume that the
-thread owning the mutex is not executing and suspend the waiting thread.
-
-A typical operation (where no i/o involved) guarded by a mutex or a read-write
-lock may last 1 - 20 us on the current Pentium platform. The longest
-operations are the binary searches on an index node.
-
-We conclude that the best choice is to set the spin time at 20 us. Then the
-system should work well on a multiprocessor. On a uniprocessor we have to
-make sure that thread swithches due to mutex collisions are not frequent,
-i.e., they do not happen every 100 us or so, because that wastes too much
-resources. If the thread switches are not frequent, the 20 us wasted in spin
-loop is not too much.
-
-Empirical studies on the effect of spin time should be done for different
-platforms.
-
-
- IMPLEMENTATION OF THE MUTEX
- ===========================
-
-For background, see Curt Schimmel's book on Unix implementation on modern
-architectures. The key points in the implementation are atomicity and
-serialization of memory accesses. The test-and-set instruction (XCHG in
-Pentium) must be atomic. As new processors may have weak memory models, also
-serialization of memory references may be necessary. The successor of Pentium,
-P6, has at least one mode where the memory model is weak. As far as we know,
-in Pentium all memory accesses are serialized in the program order and we do
-not have to worry about the memory model. On other processors there are
-special machine instructions called a fence, memory barrier, or storage
-barrier (STBAR in Sparc), which can be used to serialize the memory accesses
-to happen in program order relative to the fence instruction.
-
-Leslie Lamport has devised a "bakery algorithm" to implement a mutex without
-the atomic test-and-set, but his algorithm should be modified for weak memory
-models. We do not use Lamport's algorithm, because we guess it is slower than
-the atomic test-and-set.
-
-Our mutex implementation works as follows: After that we perform the atomic
-test-and-set instruction on the memory word. If the test returns zero, we
-know we got the lock first. If the test returns not zero, some other thread
-was quicker and got the lock: then we spin in a loop reading the memory word,
-waiting it to become zero. It is wise to just read the word in the loop, not
-perform numerous test-and-set instructions, because they generate memory
-traffic between the cache and the main memory. The read loop can just access
-the cache, saving bus bandwidth.
-
-If we cannot acquire the mutex lock in the specified time, we reserve a cell
-in the wait array, set the waiters byte in the mutex to 1. To avoid a race
-condition, after setting the waiters byte and before suspending the waiting
-thread, we still have to check that the mutex is reserved, because it may
-have happened that the thread which was holding the mutex has just released
-it and did not see the waiters byte set to 1, a case which would lead the
-other thread to an infinite wait.
-
-LEMMA 1: After a thread resets the event of a mutex (or rw_lock), some
-=======
-thread will eventually call os_event_set() on that particular event.
-Thus no infinite wait is possible in this case.
-
-Proof: After making the reservation the thread sets the waiters field in the
-mutex to 1. Then it checks that the mutex is still reserved by some thread,
-or it reserves the mutex for itself. In any case, some thread (which may be
-also some earlier thread, not necessarily the one currently holding the mutex)
-will set the waiters field to 0 in mutex_exit, and then call
-os_event_set() with the mutex as an argument.
-Q.E.D.
-
-LEMMA 2: If an os_event_set() call is made after some thread has called
-=======
-the os_event_reset() and before it starts wait on that event, the call
-will not be lost to the second thread. This is true even if there is an
-intervening call to os_event_reset() by another thread.
-Thus no infinite wait is possible in this case.
-
-Proof (non-windows platforms): os_event_reset() returns a monotonically
-increasing value of signal_count. This value is increased at every
-call of os_event_set() If thread A has called os_event_reset() followed
-by thread B calling os_event_set() and then some other thread C calling
-os_event_reset(), the is_set flag of the event will be set to FALSE;
-but now if thread A calls os_event_wait_low() with the signal_count
-value returned from the earlier call of os_event_reset(), it will
-return immediately without waiting.
-Q.E.D.
-
-Proof (windows): If there is a writer thread which is forced to wait for
-the lock, it may be able to set the state of rw_lock to RW_LOCK_WAIT_EX
-The design of rw_lock ensures that there is one and only one thread
-that is able to change the state to RW_LOCK_WAIT_EX and this thread is
-guaranteed to acquire the lock after it is released by the current
-holders and before any other waiter gets the lock.
-On windows this thread waits on a separate event i.e.: wait_ex_event.
-Since only one thread can wait on this event there is no chance
-of this event getting reset before the writer starts wait on it.
-Therefore, this thread is guaranteed to catch the os_set_event()
-signalled unconditionally at the release of the lock.
-Q.E.D. */
-
-/* Number of spin waits on mutexes: for performance monitoring */
-
-/* round=one iteration of a spin loop */
-ib_longlong mutex_spin_round_count = 0;
-ib_longlong mutex_spin_wait_count = 0;
-ib_longlong mutex_os_wait_count = 0;
-ib_longlong mutex_exit_count = 0;
-
-/* The global array of wait cells for implementation of the database's own
-mutexes and read-write locks */
-sync_array_t* sync_primary_wait_array;
-
-/* This variable is set to TRUE when sync_init is called */
-ibool sync_initialized = FALSE;
-
-
-typedef struct sync_level_struct sync_level_t;
-typedef struct sync_thread_struct sync_thread_t;
-
-#ifdef UNIV_SYNC_DEBUG
-/* The latch levels currently owned by threads are stored in this data
-structure; the size of this array is OS_THREAD_MAX_N */
-
-sync_thread_t* sync_thread_level_arrays;
-
-/* Mutex protecting sync_thread_level_arrays */
-mutex_t sync_thread_mutex;
-#endif /* UNIV_SYNC_DEBUG */
-
-/* Global list of database mutexes (not OS mutexes) created. */
-ut_list_base_node_t mutex_list;
-
-/* Mutex protecting the mutex_list variable */
-mutex_t mutex_list_mutex;
-
-#ifdef UNIV_SYNC_DEBUG
-/* Latching order checks start when this is set TRUE */
-ibool sync_order_checks_on = FALSE;
-#endif /* UNIV_SYNC_DEBUG */
-
-struct sync_thread_struct{
- os_thread_id_t id; /* OS thread id */
- sync_level_t* levels; /* level array for this thread; if this is NULL
- this slot is unused */
-};
-
-/* Number of slots reserved for each OS thread in the sync level array */
-#define SYNC_THREAD_N_LEVELS 10000
-
-struct sync_level_struct{
- void* latch; /* pointer to a mutex or an rw-lock; NULL means that
- the slot is empty */
- ulint level; /* level of the latch in the latching order */
-};
-
-/**********************************************************************
-A noninlined function that reserves a mutex. In ha_innodb.cc we have disabled
-inlining of InnoDB functions, and no inlined functions should be called from
-there. That is why we need to duplicate the inlined function here. */
-
-void
-mutex_enter_noninline(
-/*==================*/
- mutex_t* mutex) /* in: mutex */
-{
- mutex_enter(mutex);
-}
-
-/**********************************************************************
-Releases a mutex. */
-
-void
-mutex_exit_noninline(
-/*=================*/
- mutex_t* mutex) /* in: mutex */
-{
- mutex_exit(mutex);
-}
-
-/**********************************************************************
-Creates, or rather, initializes a mutex object in a specified memory
-location (which must be appropriately aligned). The mutex is initialized
-in the reset state. Explicit freeing of the mutex with mutex_free is
-necessary only if the memory block containing it is freed. */
-
-void
-mutex_create_func(
-/*==============*/
- mutex_t* mutex, /* in: pointer to memory */
-#ifdef UNIV_DEBUG
- const char* cmutex_name, /* in: mutex name */
-# ifdef UNIV_SYNC_DEBUG
- ulint level, /* in: level */
-# endif /* UNIV_SYNC_DEBUG */
-#endif /* UNIV_DEBUG */
- const char* cfile_name, /* in: file name where created */
- ulint cline) /* in: file line where created */
-{
-#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER)
- mutex_reset_lock_word(mutex);
-#elif defined(MY_ATOMIC_NOLOCK)
- mutex_reset_lock_word(mutex);
-#else
- os_fast_mutex_init(&(mutex->os_fast_mutex));
- mutex->lock_word = 0;
-#endif
- mutex->event = os_event_create(NULL);
- mutex_set_waiters(mutex, 0);
-#ifdef UNIV_DEBUG
- mutex->magic_n = MUTEX_MAGIC_N;
-#endif /* UNIV_DEBUG */
-#ifdef UNIV_SYNC_DEBUG
- mutex->line = 0;
- mutex->file_name = "not yet reserved";
- mutex->level = level;
-#endif /* UNIV_SYNC_DEBUG */
- mutex->cfile_name = cfile_name;
- mutex->cline = cline;
-#ifndef UNIV_HOTBACKUP
- mutex->count_os_wait = 0;
-# ifdef UNIV_DEBUG
- mutex->cmutex_name= cmutex_name;
- mutex->count_using= 0;
- mutex->mutex_type= 0;
- mutex->lspent_time= 0;
- mutex->lmax_spent_time= 0;
- mutex->count_spin_loop= 0;
- mutex->count_spin_rounds= 0;
- mutex->count_os_yield= 0;
-# endif /* UNIV_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
-
- /* Check that lock_word is aligned; this is important on Intel */
- ut_ad(((ulint)(&(mutex->lock_word))) % 4 == 0);
-
- /* NOTE! The very first mutexes are not put to the mutex list */
-
- if ((mutex == &mutex_list_mutex)
-#ifdef UNIV_SYNC_DEBUG
- || (mutex == &sync_thread_mutex)
-#endif /* UNIV_SYNC_DEBUG */
- ) {
-
- return;
- }
-
- mutex_enter(&mutex_list_mutex);
-
- ut_ad(UT_LIST_GET_LEN(mutex_list) == 0
- || UT_LIST_GET_FIRST(mutex_list)->magic_n == MUTEX_MAGIC_N);
-
- UT_LIST_ADD_FIRST(list, mutex_list, mutex);
-
- mutex_exit(&mutex_list_mutex);
-}
-
-/**********************************************************************
-Calling this function is obligatory only if the memory buffer containing
-the mutex is freed. Removes a mutex object from the mutex list. The mutex
-is checked to be in the reset state. */
-
-void
-mutex_free(
-/*=======*/
- mutex_t* mutex) /* in: mutex */
-{
- ut_ad(mutex_validate(mutex));
- ut_a(mutex_get_lock_word(mutex) == 0);
- ut_a(mutex_get_waiters(mutex) == 0);
-
- if (mutex != &mutex_list_mutex
-#ifdef UNIV_SYNC_DEBUG
- && mutex != &sync_thread_mutex
-#endif /* UNIV_SYNC_DEBUG */
- ) {
-
- mutex_enter(&mutex_list_mutex);
-
- ut_ad(!UT_LIST_GET_PREV(list, mutex)
- || UT_LIST_GET_PREV(list, mutex)->magic_n
- == MUTEX_MAGIC_N);
- ut_ad(!UT_LIST_GET_NEXT(list, mutex)
- || UT_LIST_GET_NEXT(list, mutex)->magic_n
- == MUTEX_MAGIC_N);
-
- UT_LIST_REMOVE(list, mutex_list, mutex);
-
- mutex_exit(&mutex_list_mutex);
- }
-
- os_event_free(mutex->event);
-
-#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER)
-#elif defined(MY_ATOMIC_NOLOCK)
-#else
- os_fast_mutex_free(&(mutex->os_fast_mutex));
-#endif
- /* If we free the mutex protecting the mutex list (freeing is
- not necessary), we have to reset the magic number AFTER removing
- it from the list. */
-#ifdef UNIV_DEBUG
- mutex->magic_n = 0;
-#endif /* UNIV_DEBUG */
-}
-
-/************************************************************************
-NOTE! Use the corresponding macro in the header file, not this function
-directly. Tries to lock the mutex for the current thread. If the lock is not
-acquired immediately, returns with return value 1. */
-
-ulint
-mutex_enter_nowait_func(
-/*====================*/
- /* out: 0 if succeed, 1 if not */
- mutex_t* mutex, /* in: pointer to mutex */
- const char* file_name __attribute__((unused)),
- /* in: file name where mutex
- requested */
- ulint line __attribute__((unused)))
- /* in: line where requested */
-{
- ut_ad(mutex_validate(mutex));
-
- if (!mutex_test_and_set(mutex)) {
-
- ut_d(mutex->thread_id = os_thread_get_curr_id());
-#ifdef UNIV_SYNC_DEBUG
- mutex_set_debug_info(mutex, file_name, line);
-#endif
-
- return(0); /* Succeeded! */
- }
-
- return(1);
-}
-
-#ifdef UNIV_DEBUG
-/**********************************************************************
-Checks that the mutex has been initialized. */
-
-ibool
-mutex_validate(
-/*===========*/
- const mutex_t* mutex)
-{
- ut_a(mutex);
- ut_a(mutex->magic_n == MUTEX_MAGIC_N);
-
- return(TRUE);
-}
-
-/**********************************************************************
-Checks that the current thread owns the mutex. Works only in the debug
-version. */
-
-ibool
-mutex_own(
-/*======*/
- /* out: TRUE if owns */
- const mutex_t* mutex) /* in: mutex */
-{
- ut_ad(mutex_validate(mutex));
-
- return(mutex_get_lock_word(mutex) == 1
- && os_thread_eq(mutex->thread_id, os_thread_get_curr_id()));
-}
-#endif /* UNIV_DEBUG */
-
-/**********************************************************************
-Sets the waiters field in a mutex. */
-
-void
-mutex_set_waiters(
-/*==============*/
- mutex_t* mutex, /* in: mutex */
- ulint n) /* in: value to set */
-{
- volatile ulint* ptr; /* declared volatile to ensure that
- the value is stored to memory */
- ut_ad(mutex);
-
- ptr = &(mutex->waiters);
-
- *ptr = n; /* Here we assume that the write of a single
- word in memory is atomic */
-}
-
-/**********************************************************************
-Reserves a mutex for the current thread. If the mutex is reserved, the
-function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
-for the mutex before suspending the thread. */
-
-void
-mutex_spin_wait(
-/*============*/
- mutex_t* mutex, /* in: pointer to mutex */
- const char* file_name, /* in: file name where mutex
- requested */
- ulint line) /* in: line where requested */
-{
- ulint index; /* index of the reserved wait cell */
- ulint i; /* spin round count */
-#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
- ib_longlong lstart_time = 0, lfinish_time; /* for timing os_wait */
- ulint ltime_diff;
- ulint sec;
- ulint ms;
- uint timer_started = 0;
-#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
- ut_ad(mutex);
-
- /* This update is not thread safe, but we don't mind if the count
- isn't exact. Moved out of ifdef that follows because we are willing
- to sacrifice the cost of counting this as the data is valuable.
- Count the number of calls to mutex_spin_wait. */
- mutex_spin_wait_count++;
-
-mutex_loop:
-
- i = 0;
-
- /* Spin waiting for the lock word to become zero. Note that we do
- not have to assume that the read access to the lock word is atomic,
- as the actual locking is always committed with atomic test-and-set.
- In reality, however, all processors probably have an atomic read of
- a memory word. */
-
-spin_loop:
-#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
- mutex->count_spin_loop++;
-#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
-
- while (mutex_get_lock_word(mutex) != 0 && i < SYNC_SPIN_ROUNDS) {
- if (srv_spin_wait_delay) {
- ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
- }
-
- i++;
- }
-
- if (i == SYNC_SPIN_ROUNDS) {
-#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
- mutex->count_os_yield++;
- if (timed_mutexes == 1 && timer_started==0) {
- ut_usectime(&sec, &ms);
- lstart_time= (ib_longlong)sec * 1000000 + ms;
- timer_started = 1;
- }
-#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
- os_thread_yield();
- }
-
-#ifdef UNIV_SRV_PRINT_LATCH_WAITS
- fprintf(stderr,
- "Thread %lu spin wait mutex at %p"
- " cfile %s cline %lu rnds %lu\n",
- (ulong) os_thread_pf(os_thread_get_curr_id()), (void*) mutex,
- mutex->cfile_name, (ulong) mutex->cline, (ulong) i);
-#endif
-
- mutex_spin_round_count += i;
-
-#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
- mutex->count_spin_rounds += i;
-#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
-
- if (mutex_test_and_set(mutex) == 0) {
- /* Succeeded! */
-
- ut_d(mutex->thread_id = os_thread_get_curr_id());
-#ifdef UNIV_SYNC_DEBUG
- mutex_set_debug_info(mutex, file_name, line);
-#endif
-
- goto finish_timing;
- }
-
- /* We may end up with a situation where lock_word is 0 but the OS
- fast mutex is still reserved. On FreeBSD the OS does not seem to
- schedule a thread which is constantly calling pthread_mutex_trylock
- (in mutex_test_and_set implementation). Then we could end up
- spinning here indefinitely. The following 'i++' stops this infinite
- spin. */
-
- i++;
-
- if (i < SYNC_SPIN_ROUNDS) {
- goto spin_loop;
- }
-
- sync_array_reserve_cell(sync_primary_wait_array, mutex,
- SYNC_MUTEX, file_name, line, &index);
-
- /* The memory order of the array reservation and the change in the
- waiters field is important: when we suspend a thread, we first
- reserve the cell and then set waiters field to 1. When threads are
- released in mutex_exit, the waiters field is first set to zero and
- then the event is set to the signaled state. */
-
- mutex_set_waiters(mutex, 1);
-
- /* Try to reserve still a few times */
- for (i = 0; i < 4; i++) {
- if (mutex_test_and_set(mutex) == 0) {
- /* Succeeded! Free the reserved wait cell */
-
- sync_array_free_cell(sync_primary_wait_array, index);
-
- ut_d(mutex->thread_id = os_thread_get_curr_id());
-#ifdef UNIV_SYNC_DEBUG
- mutex_set_debug_info(mutex, file_name, line);
-#endif
-
-#ifdef UNIV_SRV_PRINT_LATCH_WAITS
- fprintf(stderr, "Thread %lu spin wait succeeds at 2:"
- " mutex at %p\n",
- (ulong) os_thread_pf(os_thread_get_curr_id()),
- (void*) mutex);
-#endif
-
- goto finish_timing;
-
- /* Note that in this case we leave the waiters field
- set to 1. We cannot reset it to zero, as we do not
- know if there are other waiters. */
- }
- }
-
- /* Now we know that there has been some thread holding the mutex
- after the change in the wait array and the waiters field was made.
- Now there is no risk of infinite wait on the event. */
-
-#ifdef UNIV_SRV_PRINT_LATCH_WAITS
- fprintf(stderr,
- "Thread %lu OS wait mutex at %p cfile %s cline %lu rnds %lu\n",
- (ulong) os_thread_pf(os_thread_get_curr_id()), (void*) mutex,
- mutex->cfile_name, (ulong) mutex->cline, (ulong) i);
-#endif
-
- mutex_os_wait_count++;
-
-#ifndef UNIV_HOTBACKUP
- mutex->count_os_wait++;
-# ifdef UNIV_DEBUG
- /* !!!!! Sometimes os_wait can be called without os_thread_yield */
-
- if (timed_mutexes == 1 && timer_started==0) {
- ut_usectime(&sec, &ms);
- lstart_time= (ib_longlong)sec * 1000000 + ms;
- timer_started = 1;
- }
-# endif /* UNIV_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
-
- sync_array_wait_event(sync_primary_wait_array, index);
- goto mutex_loop;
-
-finish_timing:
-#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
- if (timed_mutexes == 1 && timer_started==1) {
- ut_usectime(&sec, &ms);
- lfinish_time= (ib_longlong)sec * 1000000 + ms;
-
- ltime_diff= (ulint) (lfinish_time - lstart_time);
- mutex->lspent_time += ltime_diff;
-
- if (mutex->lmax_spent_time < ltime_diff) {
- mutex->lmax_spent_time= ltime_diff;
- }
- }
-#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
- return;
-}
-
-/**********************************************************************
-Releases the threads waiting in the primary wait array for this mutex. */
-
-void
-mutex_signal_object(
-/*================*/
- mutex_t* mutex) /* in: mutex */
-{
- mutex_set_waiters(mutex, 0);
-
- /* The memory order of resetting the waiters field and
- signaling the object is important. See LEMMA 1 above. */
- os_event_set(mutex->event);
- sync_array_object_signalled(sync_primary_wait_array);
-}
-
-#ifdef UNIV_SYNC_DEBUG
-/**********************************************************************
-Sets the debug information for a reserved mutex. */
-
-void
-mutex_set_debug_info(
-/*=================*/
- mutex_t* mutex, /* in: mutex */
- const char* file_name, /* in: file where requested */
- ulint line) /* in: line where requested */
-{
- ut_ad(mutex);
- ut_ad(file_name);
-
- sync_thread_add_level(mutex, mutex->level);
-
- mutex->file_name = file_name;
- mutex->line = line;
-}
-
-/**********************************************************************
-Gets the debug information for a reserved mutex. */
-
-void
-mutex_get_debug_info(
-/*=================*/
- mutex_t* mutex, /* in: mutex */
- const char** file_name, /* out: file where requested */
- ulint* line, /* out: line where requested */
- os_thread_id_t* thread_id) /* out: id of the thread which owns
- the mutex */
-{
- ut_ad(mutex);
-
- *file_name = mutex->file_name;
- *line = mutex->line;
- *thread_id = mutex->thread_id;
-}
-
-/**********************************************************************
-Prints debug info of currently reserved mutexes. */
-static
-void
-mutex_list_print_info(
-/*==================*/
- FILE* file) /* in: file where to print */
-{
- mutex_t* mutex;
- const char* file_name;
- ulint line;
- os_thread_id_t thread_id;
- ulint count = 0;
-
- fputs("----------\n"
- "MUTEX INFO\n"
- "----------\n", file);
-
- mutex_enter(&mutex_list_mutex);
-
- mutex = UT_LIST_GET_FIRST(mutex_list);
-
- while (mutex != NULL) {
- count++;
-
- if (mutex_get_lock_word(mutex) != 0) {
- mutex_get_debug_info(mutex, &file_name, &line,
- &thread_id);
- fprintf(file,
- "Locked mutex: addr %p thread %ld"
- " file %s line %ld\n",
- (void*) mutex, os_thread_pf(thread_id),
- file_name, line);
- }
-
- mutex = UT_LIST_GET_NEXT(list, mutex);
- }
-
- fprintf(file, "Total number of mutexes %ld\n", count);
-
- mutex_exit(&mutex_list_mutex);
-}
-
-/**********************************************************************
-Counts currently reserved mutexes. Works only in the debug version. */
-
-ulint
-mutex_n_reserved(void)
-/*==================*/
-{
- mutex_t* mutex;
- ulint count = 0;
-
- mutex_enter(&mutex_list_mutex);
-
- mutex = UT_LIST_GET_FIRST(mutex_list);
-
- while (mutex != NULL) {
- if (mutex_get_lock_word(mutex) != 0) {
-
- count++;
- }
-
- mutex = UT_LIST_GET_NEXT(list, mutex);
- }
-
- mutex_exit(&mutex_list_mutex);
-
- ut_a(count >= 1);
-
- return(count - 1); /* Subtract one, because this function itself
- was holding one mutex (mutex_list_mutex) */
-}
-
-/**********************************************************************
-Returns TRUE if no mutex or rw-lock is currently locked. Works only in
-the debug version. */
-
-ibool
-sync_all_freed(void)
-/*================*/
-{
- return(mutex_n_reserved() + rw_lock_n_locked() == 0);
-}
-
-/**********************************************************************
-Gets the value in the nth slot in the thread level arrays. */
-static
-sync_thread_t*
-sync_thread_level_arrays_get_nth(
-/*=============================*/
- /* out: pointer to thread slot */
- ulint n) /* in: slot number */
-{
- ut_ad(n < OS_THREAD_MAX_N);
-
- return(sync_thread_level_arrays + n);
-}
-
-/**********************************************************************
-Looks for the thread slot for the calling thread. */
-static
-sync_thread_t*
-sync_thread_level_arrays_find_slot(void)
-/*====================================*/
- /* out: pointer to thread slot, NULL if not found */
-
-{
- sync_thread_t* slot;
- os_thread_id_t id;
- ulint i;
-
- id = os_thread_get_curr_id();
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
-
- slot = sync_thread_level_arrays_get_nth(i);
-
- if (slot->levels && os_thread_eq(slot->id, id)) {
-
- return(slot);
- }
- }
-
- return(NULL);
-}
-
-/**********************************************************************
-Looks for an unused thread slot. */
-static
-sync_thread_t*
-sync_thread_level_arrays_find_free(void)
-/*====================================*/
- /* out: pointer to thread slot */
-
-{
- sync_thread_t* slot;
- ulint i;
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
-
- slot = sync_thread_level_arrays_get_nth(i);
-
- if (slot->levels == NULL) {
-
- return(slot);
- }
- }
-
- return(NULL);
-}
-
-/**********************************************************************
-Gets the value in the nth slot in the thread level array. */
-static
-sync_level_t*
-sync_thread_levels_get_nth(
-/*=======================*/
- /* out: pointer to level slot */
- sync_level_t* arr, /* in: pointer to level array for an OS
- thread */
- ulint n) /* in: slot number */
-{
- ut_ad(n < SYNC_THREAD_N_LEVELS);
-
- return(arr + n);
-}
-
-/**********************************************************************
-Checks if all the level values stored in the level array are greater than
-the given limit. */
-static
-ibool
-sync_thread_levels_g(
-/*=================*/
- /* out: TRUE if all greater */
- sync_level_t* arr, /* in: pointer to level array for an OS
- thread */
- ulint limit) /* in: level limit */
-{
- sync_level_t* slot;
- rw_lock_t* lock;
- mutex_t* mutex;
- ulint i;
-
- for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
-
- slot = sync_thread_levels_get_nth(arr, i);
-
- if (slot->latch != NULL) {
- if (slot->level <= limit) {
-
- lock = slot->latch;
- mutex = slot->latch;
-
- fprintf(stderr,
- "InnoDB: sync levels should be"
- " > %lu but a level is %lu\n",
- (ulong) limit, (ulong) slot->level);
-
- if (mutex->magic_n == MUTEX_MAGIC_N) {
- fprintf(stderr,
- "Mutex created at %s %lu\n",
- mutex->cfile_name,
- (ulong) mutex->cline);
-
- if (mutex_get_lock_word(mutex) != 0) {
- const char* file_name;
- ulint line;
- os_thread_id_t thread_id;
-
- mutex_get_debug_info(
- mutex, &file_name,
- &line, &thread_id);
-
- fprintf(stderr,
- "InnoDB: Locked mutex:"
- " addr %p thread %ld"
- " file %s line %ld\n",
- (void*) mutex,
- os_thread_pf(
- thread_id),
- file_name,
- (ulong) line);
- } else {
- fputs("Not locked\n", stderr);
- }
- } else {
- rw_lock_print(lock);
- }
-
- return(FALSE);
- }
- }
- }
-
- return(TRUE);
-}
-
-/**********************************************************************
-Checks if the level value is stored in the level array. */
-static
-ibool
-sync_thread_levels_contain(
-/*=======================*/
- /* out: TRUE if stored */
- sync_level_t* arr, /* in: pointer to level array for an OS
- thread */
- ulint level) /* in: level */
-{
- sync_level_t* slot;
- ulint i;
-
- for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
-
- slot = sync_thread_levels_get_nth(arr, i);
-
- if (slot->latch != NULL) {
- if (slot->level == level) {
-
- return(TRUE);
- }
- }
- }
-
- return(FALSE);
-}
-
-/**********************************************************************
-Checks that the level array for the current thread is empty. */
-
-ibool
-sync_thread_levels_empty_gen(
-/*=========================*/
- /* out: TRUE if empty except the
- exceptions specified below */
- ibool dict_mutex_allowed) /* in: TRUE if dictionary mutex is
- allowed to be owned by the thread,
- also purge_is_running mutex is
- allowed */
-{
- sync_level_t* arr;
- sync_thread_t* thread_slot;
- sync_level_t* slot;
- ulint i;
-
- if (!sync_order_checks_on) {
-
- return(TRUE);
- }
-
- mutex_enter(&sync_thread_mutex);
-
- thread_slot = sync_thread_level_arrays_find_slot();
-
- if (thread_slot == NULL) {
-
- mutex_exit(&sync_thread_mutex);
-
- return(TRUE);
- }
-
- arr = thread_slot->levels;
-
- for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
-
- slot = sync_thread_levels_get_nth(arr, i);
-
- if (slot->latch != NULL
- && (!dict_mutex_allowed
- || (slot->level != SYNC_DICT
- && slot->level != SYNC_DICT_OPERATION))) {
-
- mutex_exit(&sync_thread_mutex);
- ut_error;
-
- return(FALSE);
- }
- }
-
- mutex_exit(&sync_thread_mutex);
-
- return(TRUE);
-}
-
-/**********************************************************************
-Checks that the level array for the current thread is empty. */
-
-ibool
-sync_thread_levels_empty(void)
-/*==========================*/
- /* out: TRUE if empty */
-{
- return(sync_thread_levels_empty_gen(FALSE));
-}
-
-/**********************************************************************
-Adds a latch and its level in the thread level array. Allocates the memory
-for the array if called first time for this OS thread. Makes the checks
-against other latch levels stored in the array for this thread. */
-
-void
-sync_thread_add_level(
-/*==================*/
- void* latch, /* in: pointer to a mutex or an rw-lock */
- ulint level) /* in: level in the latching order; if
- SYNC_LEVEL_VARYING, nothing is done */
-{
- sync_level_t* array;
- sync_level_t* slot;
- sync_thread_t* thread_slot;
- ulint i;
-
- if (!sync_order_checks_on) {
-
- return;
- }
-
- if ((latch == (void*)&sync_thread_mutex)
- || (latch == (void*)&mutex_list_mutex)
- || (latch == (void*)&rw_lock_debug_mutex)
- || (latch == (void*)&rw_lock_list_mutex)) {
-
- return;
- }
-
- if (level == SYNC_LEVEL_VARYING) {
-
- return;
- }
-
- mutex_enter(&sync_thread_mutex);
-
- thread_slot = sync_thread_level_arrays_find_slot();
-
- if (thread_slot == NULL) {
- /* We have to allocate the level array for a new thread */
- array = ut_malloc(sizeof(sync_level_t) * SYNC_THREAD_N_LEVELS);
-
- thread_slot = sync_thread_level_arrays_find_free();
-
- thread_slot->id = os_thread_get_curr_id();
- thread_slot->levels = array;
-
- for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
-
- slot = sync_thread_levels_get_nth(array, i);
-
- slot->latch = NULL;
- }
- }
-
- array = thread_slot->levels;
-
- /* NOTE that there is a problem with _NODE and _LEAF levels: if the
- B-tree height changes, then a leaf can change to an internal node
- or the other way around. We do not know at present if this can cause
- unnecessary assertion failures below. */
-
- switch (level) {
- case SYNC_NO_ORDER_CHECK:
- case SYNC_EXTERN_STORAGE:
- case SYNC_TREE_NODE_FROM_HASH:
- /* Do no order checking */
- break;
- case SYNC_MEM_POOL:
- ut_a(sync_thread_levels_g(array, SYNC_MEM_POOL));
- break;
- case SYNC_MEM_HASH:
- ut_a(sync_thread_levels_g(array, SYNC_MEM_HASH));
- break;
- case SYNC_RECV:
- ut_a(sync_thread_levels_g(array, SYNC_RECV));
- break;
- case SYNC_WORK_QUEUE:
- ut_a(sync_thread_levels_g(array, SYNC_WORK_QUEUE));
- break;
- case SYNC_LOG:
- ut_a(sync_thread_levels_g(array, SYNC_LOG));
- break;
- case SYNC_THR_LOCAL:
- ut_a(sync_thread_levels_g(array, SYNC_THR_LOCAL));
- break;
- case SYNC_ANY_LATCH:
- ut_a(sync_thread_levels_g(array, SYNC_ANY_LATCH));
- break;
- case SYNC_TRX_SYS_HEADER:
- ut_a(sync_thread_levels_g(array, SYNC_TRX_SYS_HEADER));
- break;
- case SYNC_DOUBLEWRITE:
- ut_a(sync_thread_levels_g(array, SYNC_DOUBLEWRITE));
- break;
- case SYNC_BUF_BLOCK:
- ut_a((sync_thread_levels_contain(array, SYNC_BUF_POOL)
- && sync_thread_levels_g(array, SYNC_BUF_BLOCK - 1))
- || sync_thread_levels_g(array, SYNC_BUF_BLOCK));
- break;
- case SYNC_BUF_POOL:
- ut_a(sync_thread_levels_g(array, SYNC_BUF_POOL));
- break;
- case SYNC_SEARCH_SYS:
- ut_a(sync_thread_levels_g(array, SYNC_SEARCH_SYS));
- break;
- case SYNC_TRX_LOCK_HEAP:
- ut_a(sync_thread_levels_g(array, SYNC_TRX_LOCK_HEAP));
- break;
- case SYNC_REC_LOCK:
- ut_a((sync_thread_levels_contain(array, SYNC_KERNEL)
- && sync_thread_levels_g(array, SYNC_REC_LOCK - 1))
- || sync_thread_levels_g(array, SYNC_REC_LOCK));
- break;
- case SYNC_KERNEL:
- ut_a(sync_thread_levels_g(array, SYNC_KERNEL));
- break;
- case SYNC_IBUF_BITMAP:
- ut_a((sync_thread_levels_contain(array, SYNC_IBUF_BITMAP_MUTEX)
- && sync_thread_levels_g(array, SYNC_IBUF_BITMAP - 1))
- || sync_thread_levels_g(array, SYNC_IBUF_BITMAP));
- break;
- case SYNC_IBUF_BITMAP_MUTEX:
- ut_a(sync_thread_levels_g(array, SYNC_IBUF_BITMAP_MUTEX));
- break;
- case SYNC_FSP_PAGE:
- ut_a(sync_thread_levels_contain(array, SYNC_FSP));
- break;
- case SYNC_FSP:
- ut_a(sync_thread_levels_contain(array, SYNC_FSP)
- || sync_thread_levels_g(array, SYNC_FSP));
- break;
- case SYNC_TRX_UNDO_PAGE:
- ut_a(sync_thread_levels_contain(array, SYNC_TRX_UNDO)
- || sync_thread_levels_contain(array, SYNC_RSEG)
- || sync_thread_levels_contain(array, SYNC_PURGE_SYS)
- || sync_thread_levels_g(array, SYNC_TRX_UNDO_PAGE));
- break;
- case SYNC_RSEG_HEADER:
- ut_a(sync_thread_levels_contain(array, SYNC_RSEG));
- break;
- case SYNC_RSEG_HEADER_NEW:
- ut_a(sync_thread_levels_contain(array, SYNC_KERNEL)
- && sync_thread_levels_contain(array, SYNC_FSP_PAGE));
- break;
- case SYNC_RSEG:
- ut_a(sync_thread_levels_g(array, SYNC_RSEG));
- break;
- case SYNC_TRX_UNDO:
- ut_a(sync_thread_levels_g(array, SYNC_TRX_UNDO));
- break;
- case SYNC_PURGE_LATCH:
- ut_a(sync_thread_levels_g(array, SYNC_PURGE_LATCH));
- break;
- case SYNC_PURGE_SYS:
- ut_a(sync_thread_levels_g(array, SYNC_PURGE_SYS));
- break;
- case SYNC_TREE_NODE:
- ut_a(sync_thread_levels_contain(array, SYNC_INDEX_TREE)
- || sync_thread_levels_contain(array, SYNC_DICT_OPERATION)
- || sync_thread_levels_g(array, SYNC_TREE_NODE - 1));
- break;
- case SYNC_TREE_NODE_NEW:
- ut_a(sync_thread_levels_contain(array, SYNC_FSP_PAGE)
- || sync_thread_levels_contain(array, SYNC_IBUF_MUTEX));
- break;
- case SYNC_INDEX_TREE:
- ut_a((sync_thread_levels_contain(array, SYNC_IBUF_MUTEX)
- && sync_thread_levels_contain(array, SYNC_FSP)
- && sync_thread_levels_g(array, SYNC_FSP_PAGE - 1))
- || sync_thread_levels_g(array, SYNC_TREE_NODE - 1));
- break;
- case SYNC_IBUF_MUTEX:
- ut_a(sync_thread_levels_g(array, SYNC_FSP_PAGE - 1));
- break;
- case SYNC_IBUF_PESS_INSERT_MUTEX:
- ut_a(sync_thread_levels_g(array, SYNC_FSP - 1)
- && !sync_thread_levels_contain(array, SYNC_IBUF_MUTEX));
- break;
- case SYNC_IBUF_HEADER:
- ut_a(sync_thread_levels_g(array, SYNC_FSP - 1)
- && !sync_thread_levels_contain(array, SYNC_IBUF_MUTEX)
- && !sync_thread_levels_contain(
- array, SYNC_IBUF_PESS_INSERT_MUTEX));
- break;
- case SYNC_DICT_AUTOINC_MUTEX:
- ut_a(sync_thread_levels_g(array, SYNC_DICT_AUTOINC_MUTEX));
- break;
- case SYNC_DICT_OPERATION:
- ut_a(sync_thread_levels_g(array, SYNC_DICT_OPERATION));
- break;
- case SYNC_DICT_HEADER:
- ut_a(sync_thread_levels_g(array, SYNC_DICT_HEADER));
- break;
- case SYNC_DICT:
-#ifdef UNIV_DEBUG
- ut_a(buf_debug_prints
- || sync_thread_levels_g(array, SYNC_DICT));
-#else /* UNIV_DEBUG */
- ut_a(sync_thread_levels_g(array, SYNC_DICT));
-#endif /* UNIV_DEBUG */
- break;
- default:
- ut_error;
- }
-
- for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
-
- slot = sync_thread_levels_get_nth(array, i);
-
- if (slot->latch == NULL) {
- slot->latch = latch;
- slot->level = level;
-
- break;
- }
- }
-
- ut_a(i < SYNC_THREAD_N_LEVELS);
-
- mutex_exit(&sync_thread_mutex);
-}
-
-/**********************************************************************
-Removes a latch from the thread level array if it is found there. */
-
-ibool
-sync_thread_reset_level(
-/*====================*/
- /* out: TRUE if found from the array; it is an error
- if the latch is not found */
- void* latch) /* in: pointer to a mutex or an rw-lock */
-{
- sync_level_t* array;
- sync_level_t* slot;
- sync_thread_t* thread_slot;
- ulint i;
-
- if (!sync_order_checks_on) {
-
- return(FALSE);
- }
-
- if ((latch == (void*)&sync_thread_mutex)
- || (latch == (void*)&mutex_list_mutex)
- || (latch == (void*)&rw_lock_debug_mutex)
- || (latch == (void*)&rw_lock_list_mutex)) {
-
- return(FALSE);
- }
-
- mutex_enter(&sync_thread_mutex);
-
- thread_slot = sync_thread_level_arrays_find_slot();
-
- if (thread_slot == NULL) {
-
- ut_error;
-
- mutex_exit(&sync_thread_mutex);
- return(FALSE);
- }
-
- array = thread_slot->levels;
-
- for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
-
- slot = sync_thread_levels_get_nth(array, i);
-
- if (slot->latch == latch) {
- slot->latch = NULL;
-
- mutex_exit(&sync_thread_mutex);
-
- return(TRUE);
- }
- }
-
- ut_error;
-
- mutex_exit(&sync_thread_mutex);
-
- return(FALSE);
-}
-#endif /* UNIV_SYNC_DEBUG */
-
-/**********************************************************************
-Initializes the synchronization data structures. */
-
-void
-sync_init(void)
-/*===========*/
-{
-#ifdef UNIV_SYNC_DEBUG
- sync_thread_t* thread_slot;
- ulint i;
-#endif /* UNIV_SYNC_DEBUG */
-
- ut_a(sync_initialized == FALSE);
-
- sync_initialized = TRUE;
-
- /* Create the primary system wait array which is protected by an OS
- mutex */
-
- sync_primary_wait_array = sync_array_create(OS_THREAD_MAX_N,
- SYNC_ARRAY_OS_MUTEX);
-#ifdef UNIV_SYNC_DEBUG
- /* Create the thread latch level array where the latch levels
- are stored for each OS thread */
-
- sync_thread_level_arrays = ut_malloc(OS_THREAD_MAX_N
- * sizeof(sync_thread_t));
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
-
- thread_slot = sync_thread_level_arrays_get_nth(i);
- thread_slot->levels = NULL;
- }
-#endif /* UNIV_SYNC_DEBUG */
- /* Init the mutex list and create the mutex to protect it. */
-
- UT_LIST_INIT(mutex_list);
- mutex_create(&mutex_list_mutex, SYNC_NO_ORDER_CHECK);
-#ifdef UNIV_SYNC_DEBUG
- mutex_create(&sync_thread_mutex, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
-
- /* Init the rw-lock list and create the mutex to protect it. */
-
- UT_LIST_INIT(rw_lock_list);
- mutex_create(&rw_lock_list_mutex, SYNC_NO_ORDER_CHECK);
-
-#ifdef UNIV_SYNC_DEBUG
- mutex_create(&rw_lock_debug_mutex, SYNC_NO_ORDER_CHECK);
-
- rw_lock_debug_event = os_event_create(NULL);
- rw_lock_debug_waiters = FALSE;
-#endif /* UNIV_SYNC_DEBUG */
-}
-
-/**********************************************************************
-Frees the resources in InnoDB's own synchronization data structures. Use
-os_sync_free() after calling this. */
-
-void
-sync_close(void)
-/*===========*/
-{
- mutex_t* mutex;
-
- sync_array_free(sync_primary_wait_array);
-
- mutex = UT_LIST_GET_FIRST(mutex_list);
-
- while (mutex) {
- mutex_free(mutex);
- mutex = UT_LIST_GET_FIRST(mutex_list);
- }
-
- mutex_free(&mutex_list_mutex);
-#ifdef UNIV_SYNC_DEBUG
- mutex_free(&sync_thread_mutex);
-#endif /* UNIV_SYNC_DEBUG */
-}
-
-/***********************************************************************
-Prints wait info of the sync system. */
-
-void
-sync_print_wait_info(
-/*=================*/
- FILE* file) /* in: file where to print */
-{
-#ifdef UNIV_SYNC_DEBUG
- fprintf(file, "Mutex exits %llu, rws exits %llu, rwx exits %llu\n",
- mutex_exit_count, rw_s_exit_count, rw_x_exit_count);
-#endif
-
- fprintf(file,
- "Mutex spin waits %llu, rounds %llu, OS waits %llu\n"
- "RW-shared spins %llu, OS waits %llu;"
- " RW-excl spins %llu, OS waits %llu\n",
- mutex_spin_wait_count,
- mutex_spin_round_count,
- mutex_os_wait_count,
- rw_s_spin_wait_count,
- rw_s_os_wait_count,
- rw_x_spin_wait_count,
- rw_x_os_wait_count);
-
- fprintf(file,
- "Spin rounds per wait: %.2f mutex, %.2f RW-shared, "
- "%.2f RW-excl\n",
- (double) mutex_spin_round_count /
- (mutex_spin_wait_count ? mutex_spin_wait_count : 1),
- (double) rw_s_spin_round_count /
- (rw_s_spin_wait_count ? rw_s_spin_wait_count : 1),
- (double) rw_x_spin_round_count /
- (rw_x_spin_wait_count ? rw_x_spin_wait_count : 1));
-}
-
-/***********************************************************************
-Prints info of the sync system. */
-
-void
-sync_print(
-/*=======*/
- FILE* file) /* in: file where to print */
-{
-#ifdef UNIV_SYNC_DEBUG
- mutex_list_print_info(file);
-
- rw_lock_list_print_info(file);
-#endif /* UNIV_SYNC_DEBUG */
-
- sync_array_print_info(file, sync_primary_wait_array);
-
- sync_print_wait_info(file);
-}
diff --git a/storage/innobase/thr/thr0loc.c b/storage/innobase/thr/thr0loc.c
deleted file mode 100644
index b803bd53101..00000000000
--- a/storage/innobase/thr/thr0loc.c
+++ /dev/null
@@ -1,228 +0,0 @@
-/******************************************************
-The thread local storage
-
-(c) 1995 Innobase Oy
-
-Created 10/5/1995 Heikki Tuuri
-*******************************************************/
-
-#include "thr0loc.h"
-#ifdef UNIV_NONINL
-#include "thr0loc.ic"
-#endif
-
-#include "sync0sync.h"
-#include "hash0hash.h"
-#include "mem0mem.h"
-#include "srv0srv.h"
-
-/*
- IMPLEMENTATION OF THREAD LOCAL STORAGE
- ======================================
-
-The threads sometimes need private data which depends on the thread id.
-This is implemented as a hash table, where the hash value is calculated
-from the thread id, to prepare for a large number of threads. The hash table
-is protected by a mutex. If you need modify the program and put new data to
-the thread local storage, just add it to struct thr_local_struct in the
-header file. */
-
-/* Mutex protecting the local storage hash table */
-mutex_t thr_local_mutex;
-
-/* The hash table. The module is not yet initialized when it is NULL. */
-hash_table_t* thr_local_hash = NULL;
-
-/* The private data for each thread should be put to
-the structure below and the accessor functions written
-for the field. */
-typedef struct thr_local_struct thr_local_t;
-
-struct thr_local_struct{
- os_thread_id_t id; /* id of the thread which owns this struct */
- os_thread_t handle; /* operating system handle to the thread */
- ulint slot_no;/* the index of the slot in the thread table
- for this thread */
- ibool in_ibuf;/* TRUE if the the thread is doing an ibuf
- operation */
- hash_node_t hash; /* hash chain node */
- ulint magic_n;
-};
-
-#define THR_LOCAL_MAGIC_N 1231234
-
-/***********************************************************************
-Returns the local storage struct for a thread. */
-static
-thr_local_t*
-thr_local_get(
-/*==========*/
- /* out: local storage */
- os_thread_id_t id) /* in: thread id of the thread */
-{
- thr_local_t* local;
-
-try_again:
- ut_ad(thr_local_hash);
- ut_ad(mutex_own(&thr_local_mutex));
-
- /* Look for the local struct in the hash table */
-
- local = NULL;
-
- HASH_SEARCH(hash, thr_local_hash, os_thread_pf(id),
- local, os_thread_eq(local->id, id));
- if (local == NULL) {
- mutex_exit(&thr_local_mutex);
-
- thr_local_create();
-
- mutex_enter(&thr_local_mutex);
-
- goto try_again;
- }
-
- ut_ad(local->magic_n == THR_LOCAL_MAGIC_N);
-
- return(local);
-}
-
-/***********************************************************************
-Gets the slot number in the thread table of a thread. */
-
-ulint
-thr_local_get_slot_no(
-/*==================*/
- /* out: slot number */
- os_thread_id_t id) /* in: thread id of the thread */
-{
- ulint slot_no;
- thr_local_t* local;
-
- mutex_enter(&thr_local_mutex);
-
- local = thr_local_get(id);
-
- slot_no = local->slot_no;
-
- mutex_exit(&thr_local_mutex);
-
- return(slot_no);
-}
-
-/***********************************************************************
-Sets the slot number in the thread table of a thread. */
-
-void
-thr_local_set_slot_no(
-/*==================*/
- os_thread_id_t id, /* in: thread id of the thread */
- ulint slot_no)/* in: slot number */
-{
- thr_local_t* local;
-
- mutex_enter(&thr_local_mutex);
-
- local = thr_local_get(id);
-
- local->slot_no = slot_no;
-
- mutex_exit(&thr_local_mutex);
-}
-
-/***********************************************************************
-Returns pointer to the 'in_ibuf' field within the current thread local
-storage. */
-
-ibool*
-thr_local_get_in_ibuf_field(void)
-/*=============================*/
- /* out: pointer to the in_ibuf field */
-{
- thr_local_t* local;
-
- mutex_enter(&thr_local_mutex);
-
- local = thr_local_get(os_thread_get_curr_id());
-
- mutex_exit(&thr_local_mutex);
-
- return(&(local->in_ibuf));
-}
-
-/***********************************************************************
-Creates a local storage struct for the calling new thread. */
-
-void
-thr_local_create(void)
-/*==================*/
-{
- thr_local_t* local;
-
- if (thr_local_hash == NULL) {
- thr_local_init();
- }
-
- local = mem_alloc(sizeof(thr_local_t));
-
- local->id = os_thread_get_curr_id();
- local->handle = os_thread_get_curr();
- local->magic_n = THR_LOCAL_MAGIC_N;
-
- local->in_ibuf = FALSE;
-
- mutex_enter(&thr_local_mutex);
-
- HASH_INSERT(thr_local_t, hash, thr_local_hash,
- os_thread_pf(os_thread_get_curr_id()),
- local);
-
- mutex_exit(&thr_local_mutex);
-}
-
-/***********************************************************************
-Frees the local storage struct for the specified thread. */
-
-void
-thr_local_free(
-/*===========*/
- os_thread_id_t id) /* in: thread id */
-{
- thr_local_t* local;
-
- mutex_enter(&thr_local_mutex);
-
- /* Look for the local struct in the hash table */
-
- HASH_SEARCH(hash, thr_local_hash, os_thread_pf(id),
- local, os_thread_eq(local->id, id));
- if (local == NULL) {
- mutex_exit(&thr_local_mutex);
-
- return;
- }
-
- HASH_DELETE(thr_local_t, hash, thr_local_hash,
- os_thread_pf(id), local);
-
- mutex_exit(&thr_local_mutex);
-
- ut_a(local->magic_n == THR_LOCAL_MAGIC_N);
-
- mem_free(local);
-}
-
-/********************************************************************
-Initializes the thread local storage module. */
-
-void
-thr_local_init(void)
-/*================*/
-{
-
- ut_a(thr_local_hash == NULL);
-
- thr_local_hash = hash_create(OS_THREAD_MAX_N + 100);
-
- mutex_create(&thr_local_mutex, SYNC_THR_LOCAL);
-}
diff --git a/storage/innobase/trx/trx0purge.c b/storage/innobase/trx/trx0purge.c
deleted file mode 100644
index f0e85ef1604..00000000000
--- a/storage/innobase/trx/trx0purge.c
+++ /dev/null
@@ -1,1148 +0,0 @@
-/******************************************************
-Purge old versions
-
-(c) 1996 Innobase Oy
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "trx0purge.h"
-
-#ifdef UNIV_NONINL
-#include "trx0purge.ic"
-#endif
-
-#include "fsp0fsp.h"
-#include "mach0data.h"
-#include "trx0rseg.h"
-#include "trx0trx.h"
-#include "trx0roll.h"
-#include "read0read.h"
-#include "fut0fut.h"
-#include "que0que.h"
-#include "row0purge.h"
-#include "row0upd.h"
-#include "trx0rec.h"
-#include "srv0que.h"
-#include "os0thread.h"
-
-/* The global data structure coordinating a purge */
-trx_purge_t* purge_sys = NULL;
-
-/* A dummy undo record used as a return value when we have a whole undo log
-which needs no purge */
-trx_undo_rec_t trx_purge_dummy_rec;
-
-/*********************************************************************
-Checks if trx_id is >= purge_view: then it is guaranteed that its update
-undo log still exists in the system. */
-
-ibool
-trx_purge_update_undo_must_exist(
-/*=============================*/
- /* out: TRUE if is sure that it is preserved, also
- if the function returns FALSE, it is possible that
- the undo log still exists in the system */
- dulint trx_id) /* in: transaction id */
-{
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
- if (!read_view_sees_trx_id(purge_sys->view, trx_id)) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*=================== PURGE RECORD ARRAY =============================*/
-
-/***********************************************************************
-Stores info of an undo log record during a purge. */
-static
-trx_undo_inf_t*
-trx_purge_arr_store_info(
-/*=====================*/
- /* out: pointer to the storage cell */
- dulint trx_no, /* in: transaction number */
- dulint undo_no)/* in: undo number */
-{
- trx_undo_inf_t* cell;
- trx_undo_arr_t* arr;
- ulint i;
-
- arr = purge_sys->arr;
-
- for (i = 0;; i++) {
- cell = trx_undo_arr_get_nth_info(arr, i);
-
- if (!(cell->in_use)) {
- /* Not in use, we may store here */
- cell->undo_no = undo_no;
- cell->trx_no = trx_no;
- cell->in_use = TRUE;
-
- arr->n_used++;
-
- return(cell);
- }
- }
-}
-
-/***********************************************************************
-Removes info of an undo log record during a purge. */
-UNIV_INLINE
-void
-trx_purge_arr_remove_info(
-/*======================*/
- trx_undo_inf_t* cell) /* in: pointer to the storage cell */
-{
- trx_undo_arr_t* arr;
-
- arr = purge_sys->arr;
-
- cell->in_use = FALSE;
-
- ut_ad(arr->n_used > 0);
-
- arr->n_used--;
-}
-
-/***********************************************************************
-Gets the biggest pair of a trx number and an undo number in a purge array. */
-static
-void
-trx_purge_arr_get_biggest(
-/*======================*/
- trx_undo_arr_t* arr, /* in: purge array */
- dulint* trx_no, /* out: transaction number: ut_dulint_zero
- if array is empty */
- dulint* undo_no)/* out: undo number */
-{
- trx_undo_inf_t* cell;
- dulint pair_trx_no;
- dulint pair_undo_no;
- int trx_cmp;
- ulint n_used;
- ulint i;
- ulint n;
-
- n = 0;
- n_used = arr->n_used;
- pair_trx_no = ut_dulint_zero;
- pair_undo_no = ut_dulint_zero;
-
- for (i = 0;; i++) {
- cell = trx_undo_arr_get_nth_info(arr, i);
-
- if (cell->in_use) {
- n++;
- trx_cmp = ut_dulint_cmp(cell->trx_no, pair_trx_no);
-
- if ((trx_cmp > 0)
- || ((trx_cmp == 0)
- && (ut_dulint_cmp(cell->undo_no,
- pair_undo_no) >= 0))) {
-
- pair_trx_no = cell->trx_no;
- pair_undo_no = cell->undo_no;
- }
- }
-
- if (n == n_used) {
- *trx_no = pair_trx_no;
- *undo_no = pair_undo_no;
-
- return;
- }
- }
-}
-
-/********************************************************************
-Builds a purge 'query' graph. The actual purge is performed by executing
-this query graph. */
-static
-que_t*
-trx_purge_graph_build(void)
-/*=======================*/
- /* out, own: the query graph */
-{
- mem_heap_t* heap;
- que_fork_t* fork;
- que_thr_t* thr;
- /* que_thr_t* thr2; */
-
- heap = mem_heap_create(512);
- fork = que_fork_create(NULL, NULL, QUE_FORK_PURGE, heap);
- fork->trx = purge_sys->trx;
-
- thr = que_thr_create(fork, heap);
-
- thr->child = row_purge_node_create(thr, heap);
-
- /* thr2 = que_thr_create(fork, fork, heap);
-
- thr2->child = row_purge_node_create(fork, thr2, heap); */
-
- return(fork);
-}
-
-/************************************************************************
-Creates the global purge system control structure and inits the history
-mutex. */
-
-void
-trx_purge_sys_create(void)
-/*======================*/
-{
- ut_ad(mutex_own(&kernel_mutex));
-
- purge_sys = mem_alloc(sizeof(trx_purge_t));
-
- purge_sys->state = TRX_STOP_PURGE;
-
- purge_sys->n_pages_handled = 0;
-
- purge_sys->purge_trx_no = ut_dulint_zero;
- purge_sys->purge_undo_no = ut_dulint_zero;
- purge_sys->next_stored = FALSE;
-
- rw_lock_create(&purge_sys->latch, SYNC_PURGE_LATCH);
-
- mutex_create(&purge_sys->mutex, SYNC_PURGE_SYS);
-
- purge_sys->heap = mem_heap_create(256);
-
- purge_sys->arr = trx_undo_arr_create();
-
- purge_sys->sess = sess_open();
-
- purge_sys->trx = purge_sys->sess->trx;
-
- purge_sys->trx->is_purge = 1;
-
- ut_a(trx_start_low(purge_sys->trx, ULINT_UNDEFINED));
-
- purge_sys->query = trx_purge_graph_build();
-
- purge_sys->view = read_view_oldest_copy_or_open_new(ut_dulint_zero,
- purge_sys->heap);
-}
-
-/*================ UNDO LOG HISTORY LIST =============================*/
-
-/************************************************************************
-Adds the update undo log as the first log in the history list. Removes the
-update undo log segment from the rseg slot if it is too big for reuse. */
-
-void
-trx_purge_add_update_undo_to_history(
-/*=================================*/
- trx_t* trx, /* in: transaction */
- page_t* undo_page, /* in: update undo log header page,
- x-latched */
- mtr_t* mtr) /* in: mtr */
-{
- trx_undo_t* undo;
- trx_rseg_t* rseg;
- trx_rsegf_t* rseg_header;
- trx_usegf_t* seg_header;
- trx_ulogf_t* undo_header;
- trx_upagef_t* page_header;
- ulint hist_size;
-
- undo = trx->update_undo;
-
- ut_ad(undo);
-
- rseg = undo->rseg;
-
- ut_ad(mutex_own(&(rseg->mutex)));
-
- rseg_header = trx_rsegf_get(rseg->space, rseg->page_no, mtr);
-
- undo_header = undo_page + undo->hdr_offset;
- seg_header = undo_page + TRX_UNDO_SEG_HDR;
- page_header = undo_page + TRX_UNDO_PAGE_HDR;
-
- if (undo->state != TRX_UNDO_CACHED) {
- /* The undo log segment will not be reused */
-
- if (undo->id >= TRX_RSEG_N_SLOTS) {
- fprintf(stderr,
- "InnoDB: Error: undo->id is %lu\n",
- (ulong) undo->id);
- ut_error;
- }
-
- trx_rsegf_set_nth_undo(rseg_header, undo->id, FIL_NULL, mtr);
-
- hist_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE,
- MLOG_4BYTES, mtr);
- ut_ad(undo->size == flst_get_len(
- seg_header + TRX_UNDO_PAGE_LIST, mtr));
-
- mlog_write_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE,
- hist_size + undo->size, MLOG_4BYTES, mtr);
- }
-
- /* Add the log as the first in the history list */
- flst_add_first(rseg_header + TRX_RSEG_HISTORY,
- undo_header + TRX_UNDO_HISTORY_NODE, mtr);
- mutex_enter(&kernel_mutex);
- trx_sys->rseg_history_len++;
- mutex_exit(&kernel_mutex);
-
- /* Write the trx number to the undo log header */
- mlog_write_dulint(undo_header + TRX_UNDO_TRX_NO, trx->no, mtr);
- /* Write information about delete markings to the undo log header */
-
- if (!undo->del_marks) {
- mlog_write_ulint(undo_header + TRX_UNDO_DEL_MARKS, FALSE,
- MLOG_2BYTES, mtr);
- }
-
- if (rseg->last_page_no == FIL_NULL) {
-
- rseg->last_page_no = undo->hdr_page_no;
- rseg->last_offset = undo->hdr_offset;
- rseg->last_trx_no = trx->no;
- rseg->last_del_marks = undo->del_marks;
- }
-}
-
-/**************************************************************************
-Frees an undo log segment which is in the history list. Cuts the end of the
-history list at the youngest undo log in this segment. */
-static
-void
-trx_purge_free_segment(
-/*===================*/
- trx_rseg_t* rseg, /* in: rollback segment */
- fil_addr_t hdr_addr, /* in: the file address of log_hdr */
- ulint n_removed_logs) /* in: count of how many undo logs we
- will cut off from the end of the
- history list */
-{
- page_t* undo_page;
- trx_rsegf_t* rseg_hdr;
- trx_ulogf_t* log_hdr;
- trx_usegf_t* seg_hdr;
- ibool freed;
- ulint seg_size;
- ulint hist_size;
- ibool marked = FALSE;
- mtr_t mtr;
-
- /* fputs("Freeing an update undo log segment\n", stderr); */
-
- ut_ad(mutex_own(&(purge_sys->mutex)));
-loop:
- mtr_start(&mtr);
- mutex_enter(&(rseg->mutex));
-
- rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr);
-
- undo_page = trx_undo_page_get(rseg->space, hdr_addr.page, &mtr);
- seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
- log_hdr = undo_page + hdr_addr.boffset;
-
- /* Mark the last undo log totally purged, so that if the system
- crashes, the tail of the undo log will not get accessed again. The
- list of pages in the undo log tail gets inconsistent during the
- freeing of the segment, and therefore purge should not try to access
- them again. */
-
- if (!marked) {
- mlog_write_ulint(log_hdr + TRX_UNDO_DEL_MARKS, FALSE,
- MLOG_2BYTES, &mtr);
- marked = TRUE;
- }
-
- freed = fseg_free_step_not_header(seg_hdr + TRX_UNDO_FSEG_HEADER,
- &mtr);
- if (!freed) {
- mutex_exit(&(rseg->mutex));
- mtr_commit(&mtr);
-
- goto loop;
- }
-
- /* The page list may now be inconsistent, but the length field
- stored in the list base node tells us how big it was before we
- started the freeing. */
-
- seg_size = flst_get_len(seg_hdr + TRX_UNDO_PAGE_LIST, &mtr);
-
- /* We may free the undo log segment header page; it must be freed
- within the same mtr as the undo log header is removed from the
- history list: otherwise, in case of a database crash, the segment
- could become inaccessible garbage in the file space. */
-
- flst_cut_end(rseg_hdr + TRX_RSEG_HISTORY,
- log_hdr + TRX_UNDO_HISTORY_NODE, n_removed_logs, &mtr);
-
- mutex_enter(&kernel_mutex);
- ut_ad(trx_sys->rseg_history_len >= n_removed_logs);
- trx_sys->rseg_history_len -= n_removed_logs;
- mutex_exit(&kernel_mutex);
-
- freed = FALSE;
-
- while (!freed) {
- /* Here we assume that a file segment with just the header
- page can be freed in a few steps, so that the buffer pool
- is not flooded with bufferfixed pages: see the note in
- fsp0fsp.c. */
-
- freed = fseg_free_step(seg_hdr + TRX_UNDO_FSEG_HEADER,
- &mtr);
- }
-
- hist_size = mtr_read_ulint(rseg_hdr + TRX_RSEG_HISTORY_SIZE,
- MLOG_4BYTES, &mtr);
- ut_ad(hist_size >= seg_size);
-
- mlog_write_ulint(rseg_hdr + TRX_RSEG_HISTORY_SIZE,
- hist_size - seg_size, MLOG_4BYTES, &mtr);
-
- ut_ad(rseg->curr_size >= seg_size);
-
- rseg->curr_size -= seg_size;
-
- mutex_exit(&(rseg->mutex));
-
- mtr_commit(&mtr);
-}
-
-/************************************************************************
-Removes unnecessary history data from a rollback segment. */
-static
-void
-trx_purge_truncate_rseg_history(
-/*============================*/
- trx_rseg_t* rseg, /* in: rollback segment */
- dulint limit_trx_no, /* in: remove update undo logs whose
- trx number is < limit_trx_no */
- dulint limit_undo_no) /* in: if transaction number is equal
- to limit_trx_no, truncate undo records
- with undo number < limit_undo_no */
-{
- fil_addr_t hdr_addr;
- fil_addr_t prev_hdr_addr;
- trx_rsegf_t* rseg_hdr;
- page_t* undo_page;
- trx_ulogf_t* log_hdr;
- trx_usegf_t* seg_hdr;
- int cmp;
- ulint n_removed_logs = 0;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(purge_sys->mutex)));
-
- mtr_start(&mtr);
- mutex_enter(&(rseg->mutex));
-
- rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr);
-
- hdr_addr = trx_purge_get_log_from_hist(
- flst_get_last(rseg_hdr + TRX_RSEG_HISTORY, &mtr));
-loop:
- if (hdr_addr.page == FIL_NULL) {
-
- mutex_exit(&(rseg->mutex));
-
- mtr_commit(&mtr);
-
- return;
- }
-
- undo_page = trx_undo_page_get(rseg->space, hdr_addr.page, &mtr);
-
- log_hdr = undo_page + hdr_addr.boffset;
-
- cmp = ut_dulint_cmp(mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO),
- limit_trx_no);
- if (cmp == 0) {
- trx_undo_truncate_start(rseg, rseg->space, hdr_addr.page,
- hdr_addr.boffset, limit_undo_no);
- }
-
- if (cmp >= 0) {
- mutex_enter(&kernel_mutex);
- ut_a(trx_sys->rseg_history_len >= n_removed_logs);
- trx_sys->rseg_history_len -= n_removed_logs;
- mutex_exit(&kernel_mutex);
-
- flst_truncate_end(rseg_hdr + TRX_RSEG_HISTORY,
- log_hdr + TRX_UNDO_HISTORY_NODE,
- n_removed_logs, &mtr);
-
- mutex_exit(&(rseg->mutex));
- mtr_commit(&mtr);
-
- return;
- }
-
- prev_hdr_addr = trx_purge_get_log_from_hist(
- flst_get_prev_addr(log_hdr + TRX_UNDO_HISTORY_NODE, &mtr));
- n_removed_logs++;
-
- seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
-
- if ((mach_read_from_2(seg_hdr + TRX_UNDO_STATE) == TRX_UNDO_TO_PURGE)
- && (mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG) == 0)) {
-
- /* We can free the whole log segment */
-
- mutex_exit(&(rseg->mutex));
- mtr_commit(&mtr);
-
- trx_purge_free_segment(rseg, hdr_addr, n_removed_logs);
-
- n_removed_logs = 0;
- } else {
- mutex_exit(&(rseg->mutex));
- mtr_commit(&mtr);
- }
-
- mtr_start(&mtr);
- mutex_enter(&(rseg->mutex));
-
- rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr);
-
- hdr_addr = prev_hdr_addr;
-
- goto loop;
-}
-
-/************************************************************************
-Removes unnecessary history data from rollback segments. NOTE that when this
-function is called, the caller must not have any latches on undo log pages! */
-static
-void
-trx_purge_truncate_history(void)
-/*============================*/
-{
- trx_rseg_t* rseg;
- dulint limit_trx_no;
- dulint limit_undo_no;
-
- ut_ad(mutex_own(&(purge_sys->mutex)));
-
- trx_purge_arr_get_biggest(purge_sys->arr, &limit_trx_no,
- &limit_undo_no);
-
- if (ut_dulint_cmp(limit_trx_no, ut_dulint_zero) == 0) {
-
- limit_trx_no = purge_sys->purge_trx_no;
- limit_undo_no = purge_sys->purge_undo_no;
- }
-
- /* We play safe and set the truncate limit at most to the purge view
- low_limit number, though this is not necessary */
-
- if (ut_dulint_cmp(limit_trx_no, purge_sys->view->low_limit_no) >= 0) {
- limit_trx_no = purge_sys->view->low_limit_no;
- limit_undo_no = ut_dulint_zero;
- }
-
- ut_ad((ut_dulint_cmp(limit_trx_no,
- purge_sys->view->low_limit_no) <= 0));
-
- rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
-
- while (rseg) {
- trx_purge_truncate_rseg_history(rseg, limit_trx_no,
- limit_undo_no);
- rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
- }
-}
-
-/************************************************************************
-Does a truncate if the purge array is empty. NOTE that when this function is
-called, the caller must not have any latches on undo log pages! */
-UNIV_INLINE
-ibool
-trx_purge_truncate_if_arr_empty(void)
-/*=================================*/
- /* out: TRUE if array empty */
-{
- ut_ad(mutex_own(&(purge_sys->mutex)));
-
- if (purge_sys->arr->n_used == 0) {
-
- trx_purge_truncate_history();
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/***************************************************************************
-Updates the last not yet purged history log info in rseg when we have purged
-a whole undo log. Advances also purge_sys->purge_trx_no past the purged log. */
-static
-void
-trx_purge_rseg_get_next_history_log(
-/*================================*/
- trx_rseg_t* rseg) /* in: rollback segment */
-{
- page_t* undo_page;
- trx_ulogf_t* log_hdr;
- trx_usegf_t* seg_hdr;
- fil_addr_t prev_log_addr;
- dulint trx_no;
- ibool del_marks;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(purge_sys->mutex)));
-
- mutex_enter(&(rseg->mutex));
-
- ut_a(rseg->last_page_no != FIL_NULL);
-
- purge_sys->purge_trx_no = ut_dulint_add(rseg->last_trx_no, 1);
- purge_sys->purge_undo_no = ut_dulint_zero;
- purge_sys->next_stored = FALSE;
-
- mtr_start(&mtr);
-
- undo_page = trx_undo_page_get_s_latched(rseg->space,
- rseg->last_page_no, &mtr);
- log_hdr = undo_page + rseg->last_offset;
- seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
-
- /* Increase the purge page count by one for every handled log */
-
- purge_sys->n_pages_handled++;
-
- prev_log_addr = trx_purge_get_log_from_hist(
- flst_get_prev_addr(log_hdr + TRX_UNDO_HISTORY_NODE, &mtr));
- if (prev_log_addr.page == FIL_NULL) {
- /* No logs left in the history list */
-
- rseg->last_page_no = FIL_NULL;
-
- mutex_exit(&(rseg->mutex));
- mtr_commit(&mtr);
-
- mutex_enter(&kernel_mutex);
-
- /* Add debug code to track history list corruption reported
- on the MySQL mailing list on Nov 9, 2004. The fut0lst.c
- file-based list was corrupt. The prev node pointer was
- FIL_NULL, even though the list length was over 8 million nodes!
- We assume that purge truncates the history list in moderate
- size pieces, and if we here reach the head of the list, the
- list cannot be longer than 20 000 undo logs now. */
-
- if (trx_sys->rseg_history_len > 20000) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: purge reached the"
- " head of the history list,\n"
- "InnoDB: but its length is still"
- " reported as %lu! Make a detailed bug\n"
- "InnoDB: report, and submit it"
- " to http://bugs.mysql.com\n",
- (ulong) trx_sys->rseg_history_len);
- }
-
- mutex_exit(&kernel_mutex);
-
- return;
- }
-
- mutex_exit(&(rseg->mutex));
- mtr_commit(&mtr);
-
- /* Read the trx number and del marks from the previous log header */
- mtr_start(&mtr);
-
- log_hdr = trx_undo_page_get_s_latched(rseg->space,
- prev_log_addr.page, &mtr)
- + prev_log_addr.boffset;
-
- trx_no = mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO);
-
- del_marks = mach_read_from_2(log_hdr + TRX_UNDO_DEL_MARKS);
-
- mtr_commit(&mtr);
-
- mutex_enter(&(rseg->mutex));
-
- rseg->last_page_no = prev_log_addr.page;
- rseg->last_offset = prev_log_addr.boffset;
- rseg->last_trx_no = trx_no;
- rseg->last_del_marks = del_marks;
-
- mutex_exit(&(rseg->mutex));
-}
-
-/***************************************************************************
-Chooses the next undo log to purge and updates the info in purge_sys. This
-function is used to initialize purge_sys when the next record to purge is
-not known, and also to update the purge system info on the next record when
-purge has handled the whole undo log for a transaction. */
-static
-void
-trx_purge_choose_next_log(void)
-/*===========================*/
-{
- trx_undo_rec_t* rec;
- trx_rseg_t* rseg;
- trx_rseg_t* min_rseg;
- dulint min_trx_no;
- ulint space = 0; /* remove warning (??? bug ???) */
- ulint page_no = 0; /* remove warning (??? bug ???) */
- ulint offset = 0; /* remove warning (??? bug ???) */
- mtr_t mtr;
-
- ut_ad(mutex_own(&(purge_sys->mutex)));
- ut_ad(purge_sys->next_stored == FALSE);
-
- rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
-
- min_trx_no = ut_dulint_max;
-
- min_rseg = NULL;
-
- while (rseg) {
- mutex_enter(&(rseg->mutex));
-
- if (rseg->last_page_no != FIL_NULL) {
-
- if ((min_rseg == NULL)
- || (ut_dulint_cmp(min_trx_no,
- rseg->last_trx_no) > 0)) {
-
- min_rseg = rseg;
- min_trx_no = rseg->last_trx_no;
- space = rseg->space;
- ut_a(space == 0); /* We assume in purge of
- externally stored fields
- that space id == 0 */
- page_no = rseg->last_page_no;
- offset = rseg->last_offset;
- }
- }
-
- mutex_exit(&(rseg->mutex));
-
- rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
- }
-
- if (min_rseg == NULL) {
-
- return;
- }
-
- mtr_start(&mtr);
-
- if (!min_rseg->last_del_marks) {
- /* No need to purge this log */
-
- rec = &trx_purge_dummy_rec;
- } else {
- rec = trx_undo_get_first_rec(space, page_no, offset,
- RW_S_LATCH, &mtr);
- if (rec == NULL) {
- /* Undo log empty */
-
- rec = &trx_purge_dummy_rec;
- }
- }
-
- purge_sys->next_stored = TRUE;
- purge_sys->rseg = min_rseg;
-
- purge_sys->hdr_page_no = page_no;
- purge_sys->hdr_offset = offset;
-
- purge_sys->purge_trx_no = min_trx_no;
-
- if (rec == &trx_purge_dummy_rec) {
-
- purge_sys->purge_undo_no = ut_dulint_zero;
- purge_sys->page_no = page_no;
- purge_sys->offset = 0;
- } else {
- purge_sys->purge_undo_no = trx_undo_rec_get_undo_no(rec);
-
- purge_sys->page_no = buf_frame_get_page_no(rec);
- purge_sys->offset = rec - buf_frame_align(rec);
- }
-
- mtr_commit(&mtr);
-}
-
-/***************************************************************************
-Gets the next record to purge and updates the info in the purge system. */
-static
-trx_undo_rec_t*
-trx_purge_get_next_rec(
-/*===================*/
- /* out: copy of an undo log record or
- pointer to the dummy undo log record */
- mem_heap_t* heap) /* in: memory heap where copied */
-{
- trx_undo_rec_t* rec;
- trx_undo_rec_t* rec_copy;
- trx_undo_rec_t* rec2;
- trx_undo_rec_t* next_rec;
- page_t* undo_page;
- page_t* page;
- ulint offset;
- ulint page_no;
- ulint space;
- ulint type;
- ulint cmpl_info;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(purge_sys->mutex)));
- ut_ad(purge_sys->next_stored);
-
- space = purge_sys->rseg->space;
- page_no = purge_sys->page_no;
- offset = purge_sys->offset;
-
- if (offset == 0) {
- /* It is the dummy undo log record, which means that there is
- no need to purge this undo log */
-
- trx_purge_rseg_get_next_history_log(purge_sys->rseg);
-
- /* Look for the next undo log and record to purge */
-
- trx_purge_choose_next_log();
-
- return(&trx_purge_dummy_rec);
- }
-
- mtr_start(&mtr);
-
- undo_page = trx_undo_page_get_s_latched(space, page_no, &mtr);
- rec = undo_page + offset;
-
- rec2 = rec;
-
- for (;;) {
- /* Try first to find the next record which requires a purge
- operation from the same page of the same undo log */
-
- next_rec = trx_undo_page_get_next_rec(rec2,
- purge_sys->hdr_page_no,
- purge_sys->hdr_offset);
- if (next_rec == NULL) {
- rec2 = trx_undo_get_next_rec(
- rec2, purge_sys->hdr_page_no,
- purge_sys->hdr_offset, &mtr);
- break;
- }
-
- rec2 = next_rec;
-
- type = trx_undo_rec_get_type(rec2);
-
- if (type == TRX_UNDO_DEL_MARK_REC) {
-
- break;
- }
-
- cmpl_info = trx_undo_rec_get_cmpl_info(rec2);
-
- if (trx_undo_rec_get_extern_storage(rec2)) {
- break;
- }
-
- if ((type == TRX_UNDO_UPD_EXIST_REC)
- && !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
- break;
- }
- }
-
- if (rec2 == NULL) {
- mtr_commit(&mtr);
-
- trx_purge_rseg_get_next_history_log(purge_sys->rseg);
-
- /* Look for the next undo log and record to purge */
-
- trx_purge_choose_next_log();
-
- mtr_start(&mtr);
-
- undo_page = trx_undo_page_get_s_latched(space, page_no, &mtr);
-
- rec = undo_page + offset;
- } else {
- page = buf_frame_align(rec2);
-
- purge_sys->purge_undo_no = trx_undo_rec_get_undo_no(rec2);
- purge_sys->page_no = buf_frame_get_page_no(page);
- purge_sys->offset = rec2 - page;
-
- if (undo_page != page) {
- /* We advance to a new page of the undo log: */
- purge_sys->n_pages_handled++;
- }
- }
-
- rec_copy = trx_undo_rec_copy(rec, heap);
-
- mtr_commit(&mtr);
-
- return(rec_copy);
-}
-
-/************************************************************************
-Fetches the next undo log record from the history list to purge. It must be
-released with the corresponding release function. */
-
-trx_undo_rec_t*
-trx_purge_fetch_next_rec(
-/*=====================*/
- /* out: copy of an undo log record or
- pointer to the dummy undo log record
- &trx_purge_dummy_rec, if the whole undo log
- can skipped in purge; NULL if none left */
- dulint* roll_ptr,/* out: roll pointer to undo record */
- trx_undo_inf_t** cell, /* out: storage cell for the record in the
- purge array */
- mem_heap_t* heap) /* in: memory heap where copied */
-{
- trx_undo_rec_t* undo_rec;
-
- mutex_enter(&(purge_sys->mutex));
-
- if (purge_sys->state == TRX_STOP_PURGE) {
- trx_purge_truncate_if_arr_empty();
-
- mutex_exit(&(purge_sys->mutex));
-
- return(NULL);
- }
-
- if (!purge_sys->next_stored) {
- trx_purge_choose_next_log();
-
- if (!purge_sys->next_stored) {
- purge_sys->state = TRX_STOP_PURGE;
-
- trx_purge_truncate_if_arr_empty();
-
- if (srv_print_thread_releases) {
- fprintf(stderr,
- "Purge: No logs left in the"
- " history list; pages handled %lu\n",
- (ulong) purge_sys->n_pages_handled);
- }
-
- mutex_exit(&(purge_sys->mutex));
-
- return(NULL);
- }
- }
-
- if (purge_sys->n_pages_handled >= purge_sys->handle_limit) {
-
- purge_sys->state = TRX_STOP_PURGE;
-
- trx_purge_truncate_if_arr_empty();
-
- mutex_exit(&(purge_sys->mutex));
-
- return(NULL);
- }
-
- if (ut_dulint_cmp(purge_sys->purge_trx_no,
- purge_sys->view->low_limit_no) >= 0) {
- purge_sys->state = TRX_STOP_PURGE;
-
- trx_purge_truncate_if_arr_empty();
-
- mutex_exit(&(purge_sys->mutex));
-
- return(NULL);
- }
-
- /* fprintf(stderr, "Thread %lu purging trx %lu undo record %lu\n",
- os_thread_get_curr_id(),
- ut_dulint_get_low(purge_sys->purge_trx_no),
- ut_dulint_get_low(purge_sys->purge_undo_no)); */
-
- *roll_ptr = trx_undo_build_roll_ptr(FALSE, (purge_sys->rseg)->id,
- purge_sys->page_no,
- purge_sys->offset);
-
- *cell = trx_purge_arr_store_info(purge_sys->purge_trx_no,
- purge_sys->purge_undo_no);
-
- ut_ad(ut_dulint_cmp(purge_sys->purge_trx_no,
- (purge_sys->view)->low_limit_no) < 0);
-
- /* The following call will advance the stored values of purge_trx_no
- and purge_undo_no, therefore we had to store them first */
-
- undo_rec = trx_purge_get_next_rec(heap);
-
- mutex_exit(&(purge_sys->mutex));
-
- return(undo_rec);
-}
-
-/***********************************************************************
-Releases a reserved purge undo record. */
-
-void
-trx_purge_rec_release(
-/*==================*/
- trx_undo_inf_t* cell) /* in: storage cell */
-{
- trx_undo_arr_t* arr;
-
- mutex_enter(&(purge_sys->mutex));
-
- arr = purge_sys->arr;
-
- trx_purge_arr_remove_info(cell);
-
- mutex_exit(&(purge_sys->mutex));
-}
-
-/***********************************************************************
-This function runs a purge batch. */
-
-ulint
-trx_purge(void)
-/*===========*/
- /* out: number of undo log pages handled in
- the batch */
-{
- que_thr_t* thr;
- /* que_thr_t* thr2; */
- ulint old_pages_handled;
-
- mutex_enter(&(purge_sys->mutex));
-
- if (purge_sys->trx->n_active_thrs > 0) {
-
- mutex_exit(&(purge_sys->mutex));
-
- /* Should not happen */
-
- ut_error;
-
- return(0);
- }
-
- rw_lock_x_lock(&(purge_sys->latch));
-
- mutex_enter(&kernel_mutex);
-
- /* Close and free the old purge view */
-
- read_view_close(purge_sys->view);
- purge_sys->view = NULL;
- mem_heap_empty(purge_sys->heap);
-
- /* Determine how much data manipulation language (DML) statements
- need to be delayed in order to reduce the lagging of the purge
- thread. */
- srv_dml_needed_delay = 0; /* in microseconds; default: no delay */
-
- /* If we cannot advance the 'purge view' because of an old
- 'consistent read view', then the DML statements cannot be delayed.
- Also, srv_max_purge_lag <= 0 means 'infinity'. */
- if (srv_max_purge_lag > 0
- && !UT_LIST_GET_LAST(trx_sys->view_list)) {
- float ratio = (float) trx_sys->rseg_history_len
- / srv_max_purge_lag;
- if (ratio > ULINT_MAX / 10000) {
- /* Avoid overflow: maximum delay is 4295 seconds */
- srv_dml_needed_delay = ULINT_MAX;
- } else if (ratio > 1) {
- /* If the history list length exceeds the
- innodb_max_purge_lag, the
- data manipulation statements are delayed
- by at least 5000 microseconds. */
- srv_dml_needed_delay = (ulint) ((ratio - .5) * 10000);
- }
- }
-
- purge_sys->view = read_view_oldest_copy_or_open_new(ut_dulint_zero,
- purge_sys->heap);
- mutex_exit(&kernel_mutex);
-
- rw_lock_x_unlock(&(purge_sys->latch));
-
- purge_sys->state = TRX_PURGE_ON;
-
- /* Handle at most 20 undo log pages in one purge batch */
-
- purge_sys->handle_limit = purge_sys->n_pages_handled + 20;
-
- old_pages_handled = purge_sys->n_pages_handled;
-
- mutex_exit(&(purge_sys->mutex));
-
- mutex_enter(&kernel_mutex);
-
- thr = que_fork_start_command(purge_sys->query);
-
- ut_ad(thr);
-
- /* thr2 = que_fork_start_command(purge_sys->query);
-
- ut_ad(thr2); */
-
-
- mutex_exit(&kernel_mutex);
-
- /* srv_que_task_enqueue(thr2); */
-
- if (srv_print_thread_releases) {
-
- fputs("Starting purge\n", stderr);
- }
-
- que_run_threads(thr);
-
- if (srv_print_thread_releases) {
-
- fprintf(stderr,
- "Purge ends; pages handled %lu\n",
- (ulong) purge_sys->n_pages_handled);
- }
-
- return(purge_sys->n_pages_handled - old_pages_handled);
-}
-
-/**********************************************************************
-Prints information of the purge system to stderr. */
-
-void
-trx_purge_sys_print(void)
-/*=====================*/
-{
- fprintf(stderr, "InnoDB: Purge system view:\n");
- read_view_print(purge_sys->view);
-
- fprintf(stderr, "InnoDB: Purge trx n:o %lu %lu, undo n_o %lu %lu\n",
- (ulong) ut_dulint_get_high(purge_sys->purge_trx_no),
- (ulong) ut_dulint_get_low(purge_sys->purge_trx_no),
- (ulong) ut_dulint_get_high(purge_sys->purge_undo_no),
- (ulong) ut_dulint_get_low(purge_sys->purge_undo_no));
- fprintf(stderr,
- "InnoDB: Purge next stored %lu, page_no %lu, offset %lu,\n"
- "InnoDB: Purge hdr_page_no %lu, hdr_offset %lu\n",
- (ulong) purge_sys->next_stored,
- (ulong) purge_sys->page_no,
- (ulong) purge_sys->offset,
- (ulong) purge_sys->hdr_page_no,
- (ulong) purge_sys->hdr_offset);
-}
diff --git a/storage/innobase/trx/trx0rec.c b/storage/innobase/trx/trx0rec.c
deleted file mode 100644
index 50f8b011463..00000000000
--- a/storage/innobase/trx/trx0rec.c
+++ /dev/null
@@ -1,1434 +0,0 @@
-/******************************************************
-Transaction undo log record
-
-(c) 1996 Innobase Oy
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "trx0rec.h"
-
-#ifdef UNIV_NONINL
-#include "trx0rec.ic"
-#endif
-
-#include "fsp0fsp.h"
-#include "mach0data.h"
-#include "trx0rseg.h"
-#include "trx0trx.h"
-#include "trx0undo.h"
-#include "dict0dict.h"
-#include "ut0mem.h"
-#include "row0upd.h"
-#include "que0que.h"
-#include "trx0purge.h"
-#include "row0row.h"
-
-/*=========== UNDO LOG RECORD CREATION AND DECODING ====================*/
-
-/**************************************************************************
-Writes the mtr log entry of the inserted undo log record on the undo log
-page. */
-UNIV_INLINE
-void
-trx_undof_page_add_undo_rec_log(
-/*============================*/
- page_t* undo_page, /* in: undo log page */
- ulint old_free, /* in: start offset of the inserted entry */
- ulint new_free, /* in: end offset of the entry */
- mtr_t* mtr) /* in: mtr */
-{
- byte* log_ptr;
- const byte* log_end;
- ulint len;
-
- log_ptr = mlog_open(mtr, 11 + 13 + MLOG_BUF_MARGIN);
-
- if (log_ptr == NULL) {
-
- return;
- }
-
- log_end = &log_ptr[11 + 13 + MLOG_BUF_MARGIN];
- log_ptr = mlog_write_initial_log_record_fast(
- undo_page, MLOG_UNDO_INSERT, log_ptr, mtr);
- len = new_free - old_free - 4;
-
- mach_write_to_2(log_ptr, len);
- log_ptr += 2;
-
- if (log_ptr + len <= log_end) {
- memcpy(log_ptr, undo_page + old_free + 2, len);
- mlog_close(mtr, log_ptr + len);
- } else {
- mlog_close(mtr, log_ptr);
- mlog_catenate_string(mtr, undo_page + old_free + 2, len);
- }
-}
-
-/***************************************************************
-Parses a redo log record of adding an undo log record. */
-
-byte*
-trx_undo_parse_add_undo_rec(
-/*========================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page) /* in: page or NULL */
-{
- ulint len;
- byte* rec;
- ulint first_free;
-
- if (end_ptr < ptr + 2) {
-
- return(NULL);
- }
-
- len = mach_read_from_2(ptr);
- ptr += 2;
-
- if (end_ptr < ptr + len) {
-
- return(NULL);
- }
-
- if (page == NULL) {
-
- return(ptr + len);
- }
-
- first_free = mach_read_from_2(page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_FREE);
- rec = page + first_free;
-
- mach_write_to_2(rec, first_free + 4 + len);
- mach_write_to_2(rec + 2 + len, first_free);
-
- mach_write_to_2(page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
- first_free + 4 + len);
- ut_memcpy(rec + 2, ptr, len);
-
- return(ptr + len);
-}
-
-/**************************************************************************
-Calculates the free space left for extending an undo log record. */
-UNIV_INLINE
-ulint
-trx_undo_left(
-/*==========*/
- /* out: bytes left */
- page_t* page, /* in: undo log page */
- byte* ptr) /* in: pointer to page */
-{
- /* The '- 10' is a safety margin, in case we have some small
- calculation error below */
-
- return(UNIV_PAGE_SIZE - (ptr - page) - 10 - FIL_PAGE_DATA_END);
-}
-
-/**************************************************************************
-Reports in the undo log of an insert of a clustered index record. */
-static
-ulint
-trx_undo_page_report_insert(
-/*========================*/
- /* out: offset of the inserted entry
- on the page if succeed, 0 if fail */
- page_t* undo_page, /* in: undo log page */
- trx_t* trx, /* in: transaction */
- dict_index_t* index, /* in: clustered index */
- dtuple_t* clust_entry, /* in: index entry which will be
- inserted to the clustered index */
- mtr_t* mtr) /* in: mtr */
-{
- ulint first_free;
- byte* ptr;
- ulint len;
- dfield_t* field;
- ulint flen;
- ulint i;
-
- ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_INSERT);
-
- first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_FREE);
- ptr = undo_page + first_free;
-
- ut_ad(first_free <= UNIV_PAGE_SIZE);
-
- if (trx_undo_left(undo_page, ptr) < 30) {
-
- /* NOTE: the value 30 must be big enough such that the general
- fields written below fit on the undo log page */
-
- return(0);
- }
-
- /* Reserve 2 bytes for the pointer to the next undo log record */
- ptr += 2;
-
- /* Store first some general parameters to the undo log */
- mach_write_to_1(ptr, TRX_UNDO_INSERT_REC);
- ptr++;
-
- len = mach_dulint_write_much_compressed(ptr, trx->undo_no);
- ptr += len;
-
- len = mach_dulint_write_much_compressed(ptr, (index->table)->id);
- ptr += len;
- /*----------------------------------------*/
- /* Store then the fields required to uniquely determine the record
- to be inserted in the clustered index */
-
- for (i = 0; i < dict_index_get_n_unique(index); i++) {
-
- field = dtuple_get_nth_field(clust_entry, i);
-
- flen = dfield_get_len(field);
-
- if (trx_undo_left(undo_page, ptr) < 5) {
-
- return(0);
- }
-
- len = mach_write_compressed(ptr, flen);
- ptr += len;
-
- if (flen != UNIV_SQL_NULL) {
- if (trx_undo_left(undo_page, ptr) < flen) {
-
- return(0);
- }
-
- ut_memcpy(ptr, dfield_get_data(field), flen);
- ptr += flen;
- }
- }
-
- if (trx_undo_left(undo_page, ptr) < 2) {
-
- return(0);
- }
-
- /*----------------------------------------*/
- /* Write pointers to the previous and the next undo log records */
-
- if (trx_undo_left(undo_page, ptr) < 2) {
-
- return(0);
- }
-
- mach_write_to_2(ptr, first_free);
- ptr += 2;
-
- mach_write_to_2(undo_page + first_free, ptr - undo_page);
-
- mach_write_to_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
- ptr - undo_page);
-
- /* Write the log entry to the REDO log of this change in the UNDO
- log */
- trx_undof_page_add_undo_rec_log(undo_page, first_free,
- ptr - undo_page, mtr);
- return(first_free);
-}
-
-/**************************************************************************
-Reads from an undo log record the general parameters. */
-
-byte*
-trx_undo_rec_get_pars(
-/*==================*/
- /* out: remaining part of undo log
- record after reading these values */
- trx_undo_rec_t* undo_rec, /* in: undo log record */
- ulint* type, /* out: undo record type:
- TRX_UNDO_INSERT_REC, ... */
- ulint* cmpl_info, /* out: compiler info, relevant only
- for update type records */
- ibool* updated_extern, /* out: TRUE if we updated an
- externally stored fild */
- dulint* undo_no, /* out: undo log record number */
- dulint* table_id) /* out: table id */
-{
- byte* ptr;
- ulint len;
- ulint type_cmpl;
-
- ptr = undo_rec + 2;
-
- type_cmpl = mach_read_from_1(ptr);
- ptr++;
-
- if (type_cmpl & TRX_UNDO_UPD_EXTERN) {
- *updated_extern = TRUE;
- type_cmpl -= TRX_UNDO_UPD_EXTERN;
- } else {
- *updated_extern = FALSE;
- }
-
- *type = type_cmpl & (TRX_UNDO_CMPL_INFO_MULT - 1);
- *cmpl_info = type_cmpl / TRX_UNDO_CMPL_INFO_MULT;
-
- *undo_no = mach_dulint_read_much_compressed(ptr);
- len = mach_dulint_get_much_compressed_size(*undo_no);
- ptr += len;
-
- *table_id = mach_dulint_read_much_compressed(ptr);
- len = mach_dulint_get_much_compressed_size(*table_id);
- ptr += len;
-
- return(ptr);
-}
-
-/**************************************************************************
-Reads from an undo log record a stored column value. */
-static
-byte*
-trx_undo_rec_get_col_val(
-/*=====================*/
- /* out: remaining part of undo log record after
- reading these values */
- byte* ptr, /* in: pointer to remaining part of undo log record */
- byte** field, /* out: pointer to stored field */
- ulint* len) /* out: length of the field, or UNIV_SQL_NULL */
-{
- *len = mach_read_compressed(ptr);
- ptr += mach_get_compressed_size(*len);
-
- *field = ptr;
-
- if (*len != UNIV_SQL_NULL) {
- if (*len >= UNIV_EXTERN_STORAGE_FIELD) {
- ptr += (*len - UNIV_EXTERN_STORAGE_FIELD);
- } else {
- ptr += *len;
- }
- }
-
- return(ptr);
-}
-
-/***********************************************************************
-Builds a row reference from an undo log record. */
-
-byte*
-trx_undo_rec_get_row_ref(
-/*=====================*/
- /* out: pointer to remaining part of undo
- record */
- byte* ptr, /* in: remaining part of a copy of an undo log
- record, at the start of the row reference;
- NOTE that this copy of the undo log record must
- be preserved as long as the row reference is
- used, as we do NOT copy the data in the
- record! */
- dict_index_t* index, /* in: clustered index */
- dtuple_t** ref, /* out, own: row reference */
- mem_heap_t* heap) /* in: memory heap from which the memory
- needed is allocated */
-{
- dfield_t* dfield;
- byte* field;
- ulint len;
- ulint ref_len;
- ulint i;
-
- ut_ad(index && ptr && ref && heap);
- ut_a(index->type & DICT_CLUSTERED);
-
- ref_len = dict_index_get_n_unique(index);
-
- *ref = dtuple_create(heap, ref_len);
-
- dict_index_copy_types(*ref, index, ref_len);
-
- for (i = 0; i < ref_len; i++) {
- dfield = dtuple_get_nth_field(*ref, i);
-
- ptr = trx_undo_rec_get_col_val(ptr, &field, &len);
-
- dfield_set_data(dfield, field, len);
- }
-
- return(ptr);
-}
-
-/***********************************************************************
-Skips a row reference from an undo log record. */
-
-byte*
-trx_undo_rec_skip_row_ref(
-/*======================*/
- /* out: pointer to remaining part of undo
- record */
- byte* ptr, /* in: remaining part in update undo log
- record, at the start of the row reference */
- dict_index_t* index) /* in: clustered index */
-{
- byte* field;
- ulint len;
- ulint ref_len;
- ulint i;
-
- ut_ad(index && ptr);
- ut_a(index->type & DICT_CLUSTERED);
-
- ref_len = dict_index_get_n_unique(index);
-
- for (i = 0; i < ref_len; i++) {
- ptr = trx_undo_rec_get_col_val(ptr, &field, &len);
- }
-
- return(ptr);
-}
-
-/**************************************************************************
-Reports in the undo log of an update or delete marking of a clustered index
-record. */
-static
-ulint
-trx_undo_page_report_modify(
-/*========================*/
- /* out: byte offset of the inserted
- undo log entry on the page if succeed,
- 0 if fail */
- page_t* undo_page, /* in: undo log page */
- trx_t* trx, /* in: transaction */
- dict_index_t* index, /* in: clustered index where update or
- delete marking is done */
- rec_t* rec, /* in: clustered index record which
- has NOT yet been modified */
- const ulint* offsets, /* in: rec_get_offsets(rec, index) */
- upd_t* update, /* in: update vector which tells the
- columns to be updated; in the case of
- a delete, this should be set to NULL */
- ulint cmpl_info, /* in: compiler info on secondary
- index updates */
- mtr_t* mtr) /* in: mtr */
-{
- dict_table_t* table;
- upd_field_t* upd_field;
- ulint first_free;
- byte* ptr;
- ulint len;
- byte* field;
- ulint flen;
- ulint pos;
- dulint roll_ptr;
- dulint trx_id;
- ulint bits;
- ulint col_no;
- byte* old_ptr;
- ulint type_cmpl;
- byte* type_cmpl_ptr;
- ulint i;
-
- ut_a(index->type & DICT_CLUSTERED);
- ut_ad(rec_offs_validate(rec, index, offsets));
- ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_UPDATE);
- table = index->table;
-
- first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_FREE);
- ptr = undo_page + first_free;
-
- ut_ad(first_free <= UNIV_PAGE_SIZE);
-
- if (trx_undo_left(undo_page, ptr) < 50) {
-
- /* NOTE: the value 50 must be big enough so that the general
- fields written below fit on the undo log page */
-
- return(0);
- }
-
- /* Reserve 2 bytes for the pointer to the next undo log record */
- ptr += 2;
-
- /* Store first some general parameters to the undo log */
-
- if (update) {
- if (rec_get_deleted_flag(rec, dict_table_is_comp(table))) {
- type_cmpl = TRX_UNDO_UPD_DEL_REC;
- } else {
- type_cmpl = TRX_UNDO_UPD_EXIST_REC;
- }
- } else {
- type_cmpl = TRX_UNDO_DEL_MARK_REC;
- }
-
- type_cmpl = type_cmpl | (cmpl_info * TRX_UNDO_CMPL_INFO_MULT);
-
- mach_write_to_1(ptr, type_cmpl);
-
- type_cmpl_ptr = ptr;
-
- ptr++;
- len = mach_dulint_write_much_compressed(ptr, trx->undo_no);
- ptr += len;
-
- len = mach_dulint_write_much_compressed(ptr, table->id);
- ptr += len;
-
- /*----------------------------------------*/
- /* Store the state of the info bits */
-
- bits = rec_get_info_bits(rec, dict_table_is_comp(table));
- mach_write_to_1(ptr, bits);
- ptr += 1;
-
- /* Store the values of the system columns */
- field = rec_get_nth_field(rec, offsets,
- dict_index_get_sys_col_pos(
- index, DATA_TRX_ID), &len);
- ut_ad(len == DATA_TRX_ID_LEN);
- trx_id = trx_read_trx_id(field);
- field = rec_get_nth_field(rec, offsets,
- dict_index_get_sys_col_pos(
- index, DATA_ROLL_PTR), &len);
- ut_ad(len == DATA_ROLL_PTR_LEN);
- roll_ptr = trx_read_roll_ptr(field);
-
- len = mach_dulint_write_compressed(ptr, trx_id);
- ptr += len;
-
- len = mach_dulint_write_compressed(ptr, roll_ptr);
- ptr += len;
-
- /*----------------------------------------*/
- /* Store then the fields required to uniquely determine the
- record which will be modified in the clustered index */
-
- for (i = 0; i < dict_index_get_n_unique(index); i++) {
-
- field = rec_get_nth_field(rec, offsets, i, &flen);
-
- if (trx_undo_left(undo_page, ptr) < 4) {
-
- return(0);
- }
-
- len = mach_write_compressed(ptr, flen);
- ptr += len;
-
- if (flen != UNIV_SQL_NULL) {
- if (trx_undo_left(undo_page, ptr) < flen) {
-
- return(0);
- }
-
- ut_memcpy(ptr, field, flen);
- ptr += flen;
- }
- }
-
- /*----------------------------------------*/
- /* Save to the undo log the old values of the columns to be updated. */
-
- if (update) {
- if (trx_undo_left(undo_page, ptr) < 5) {
-
- return(0);
- }
-
- len = mach_write_compressed(ptr, upd_get_n_fields(update));
- ptr += len;
-
- for (i = 0; i < upd_get_n_fields(update); i++) {
-
- upd_field = upd_get_nth_field(update, i);
- pos = upd_field->field_no;
-
- /* Write field number to undo log */
- if (trx_undo_left(undo_page, ptr) < 5) {
-
- return(0);
- }
-
- len = mach_write_compressed(ptr, pos);
- ptr += len;
-
- /* Save the old value of field */
- field = rec_get_nth_field(rec, offsets, pos, &flen);
-
- if (trx_undo_left(undo_page, ptr) < 5) {
-
- return(0);
- }
-
- if (rec_offs_nth_extern(offsets, pos)) {
- /* If a field has external storage, we add
- to flen the flag */
-
- len = mach_write_compressed(
- ptr,
- UNIV_EXTERN_STORAGE_FIELD + flen);
-
- /* Notify purge that it eventually has to
- free the old externally stored field */
-
- trx->update_undo->del_marks = TRUE;
-
- *type_cmpl_ptr = *type_cmpl_ptr
- | TRX_UNDO_UPD_EXTERN;
- } else {
- len = mach_write_compressed(ptr, flen);
- }
-
- ptr += len;
-
- if (flen != UNIV_SQL_NULL) {
- if (trx_undo_left(undo_page, ptr) < flen) {
-
- return(0);
- }
-
- ut_memcpy(ptr, field, flen);
- ptr += flen;
- }
- }
- }
-
- /*----------------------------------------*/
- /* In the case of a delete marking, and also in the case of an update
- where any ordering field of any index changes, store the values of all
- columns which occur as ordering fields in any index. This info is used
- in the purge of old versions where we use it to build and search the
- delete marked index records, to look if we can remove them from the
- index tree. Note that starting from 4.0.14 also externally stored
- fields can be ordering in some index. But we always store at least
- 384 first bytes locally to the clustered index record, which means
- we can construct the column prefix fields in the index from the
- stored data. */
-
- if (!update || !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
-
- trx->update_undo->del_marks = TRUE;
-
- if (trx_undo_left(undo_page, ptr) < 5) {
-
- return(0);
- }
-
- old_ptr = ptr;
-
- /* Reserve 2 bytes to write the number of bytes the stored
- fields take in this undo record */
-
- ptr += 2;
-
- for (col_no = 0; col_no < dict_table_get_n_cols(table);
- col_no++) {
-
- const dict_col_t* col
- = dict_table_get_nth_col(table, col_no);
-
- if (col->ord_part > 0) {
-
- pos = dict_index_get_nth_col_pos(index,
- col_no);
-
- /* Write field number to undo log */
- if (trx_undo_left(undo_page, ptr) < 5) {
-
- return(0);
- }
-
- len = mach_write_compressed(ptr, pos);
- ptr += len;
-
- /* Save the old value of field */
- field = rec_get_nth_field(rec, offsets, pos,
- &flen);
-
- if (trx_undo_left(undo_page, ptr) < 5) {
-
- return(0);
- }
-
- len = mach_write_compressed(ptr, flen);
- ptr += len;
-
- if (flen != UNIV_SQL_NULL) {
- if (trx_undo_left(undo_page, ptr)
- < flen) {
-
- return(0);
- }
-
- ut_memcpy(ptr, field, flen);
- ptr += flen;
- }
- }
- }
-
- mach_write_to_2(old_ptr, ptr - old_ptr);
- }
-
- /*----------------------------------------*/
- /* Write pointers to the previous and the next undo log records */
- if (trx_undo_left(undo_page, ptr) < 2) {
-
- return(0);
- }
-
- mach_write_to_2(ptr, first_free);
- ptr += 2;
- mach_write_to_2(undo_page + first_free, ptr - undo_page);
-
- mach_write_to_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
- ptr - undo_page);
-
- /* Write to the REDO log about this change in the UNDO log */
-
- trx_undof_page_add_undo_rec_log(undo_page, first_free,
- ptr - undo_page, mtr);
- return(first_free);
-}
-
-/**************************************************************************
-Reads from an undo log update record the system field values of the old
-version. */
-
-byte*
-trx_undo_update_rec_get_sys_cols(
-/*=============================*/
- /* out: remaining part of undo log
- record after reading these values */
- byte* ptr, /* in: remaining part of undo log
- record after reading general
- parameters */
- dulint* trx_id, /* out: trx id */
- dulint* roll_ptr, /* out: roll ptr */
- ulint* info_bits) /* out: info bits state */
-{
- ulint len;
-
- /* Read the state of the info bits */
- *info_bits = mach_read_from_1(ptr);
- ptr += 1;
-
- /* Read the values of the system columns */
-
- *trx_id = mach_dulint_read_compressed(ptr);
- len = mach_dulint_get_compressed_size(*trx_id);
- ptr += len;
-
- *roll_ptr = mach_dulint_read_compressed(ptr);
- len = mach_dulint_get_compressed_size(*roll_ptr);
- ptr += len;
-
- return(ptr);
-}
-
-/**************************************************************************
-Reads from an update undo log record the number of updated fields. */
-UNIV_INLINE
-byte*
-trx_undo_update_rec_get_n_upd_fields(
-/*=================================*/
- /* out: remaining part of undo log record after
- reading this value */
- byte* ptr, /* in: pointer to remaining part of undo log record */
- ulint* n) /* out: number of fields */
-{
- *n = mach_read_compressed(ptr);
- ptr += mach_get_compressed_size(*n);
-
- return(ptr);
-}
-
-/**************************************************************************
-Reads from an update undo log record a stored field number. */
-UNIV_INLINE
-byte*
-trx_undo_update_rec_get_field_no(
-/*=============================*/
- /* out: remaining part of undo log record after
- reading this value */
- byte* ptr, /* in: pointer to remaining part of undo log record */
- ulint* field_no)/* out: field number */
-{
- *field_no = mach_read_compressed(ptr);
- ptr += mach_get_compressed_size(*field_no);
-
- return(ptr);
-}
-
-/***********************************************************************
-Builds an update vector based on a remaining part of an undo log record. */
-
-byte*
-trx_undo_update_rec_get_update(
-/*===========================*/
- /* out: remaining part of the record,
- NULL if an error detected, which means that
- the record is corrupted */
- byte* ptr, /* in: remaining part in update undo log
- record, after reading the row reference
- NOTE that this copy of the undo log record must
- be preserved as long as the update vector is
- used, as we do NOT copy the data in the
- record! */
- dict_index_t* index, /* in: clustered index */
- ulint type, /* in: TRX_UNDO_UPD_EXIST_REC,
- TRX_UNDO_UPD_DEL_REC, or
- TRX_UNDO_DEL_MARK_REC; in the last case,
- only trx id and roll ptr fields are added to
- the update vector */
- dulint trx_id, /* in: transaction id from this undo record */
- dulint roll_ptr,/* in: roll pointer from this undo record */
- ulint info_bits,/* in: info bits from this undo record */
- trx_t* trx, /* in: transaction */
- mem_heap_t* heap, /* in: memory heap from which the memory
- needed is allocated */
- upd_t** upd) /* out, own: update vector */
-{
- upd_field_t* upd_field;
- upd_t* update;
- ulint n_fields;
- byte* buf;
- byte* field;
- ulint len;
- ulint field_no;
- ulint i;
-
- ut_a(index->type & DICT_CLUSTERED);
-
- if (type != TRX_UNDO_DEL_MARK_REC) {
- ptr = trx_undo_update_rec_get_n_upd_fields(ptr, &n_fields);
- } else {
- n_fields = 0;
- }
-
- update = upd_create(n_fields + 2, heap);
-
- update->info_bits = info_bits;
-
- /* Store first trx id and roll ptr to update vector */
-
- upd_field = upd_get_nth_field(update, n_fields);
- buf = mem_heap_alloc(heap, DATA_TRX_ID_LEN);
- trx_write_trx_id(buf, trx_id);
-
- upd_field_set_field_no(upd_field,
- dict_index_get_sys_col_pos(index, DATA_TRX_ID),
- index, trx);
- dfield_set_data(&(upd_field->new_val), buf, DATA_TRX_ID_LEN);
-
- upd_field = upd_get_nth_field(update, n_fields + 1);
- buf = mem_heap_alloc(heap, DATA_ROLL_PTR_LEN);
- trx_write_roll_ptr(buf, roll_ptr);
-
- upd_field_set_field_no(
- upd_field, dict_index_get_sys_col_pos(index, DATA_ROLL_PTR),
- index, trx);
- dfield_set_data(&(upd_field->new_val), buf, DATA_ROLL_PTR_LEN);
-
- /* Store then the updated ordinary columns to the update vector */
-
- for (i = 0; i < n_fields; i++) {
-
- ptr = trx_undo_update_rec_get_field_no(ptr, &field_no);
-
- if (field_no >= dict_index_get_n_fields(index)) {
- fprintf(stderr,
- "InnoDB: Error: trying to access"
- " update undo rec field %lu in ",
- (ulong) field_no);
- dict_index_name_print(stderr, trx, index);
- fprintf(stderr, "\n"
- "InnoDB: but index has only %lu fields\n"
- "InnoDB: Submit a detailed bug report"
- " to http://bugs.mysql.com\n"
- "InnoDB: Run also CHECK TABLE ",
- (ulong) dict_index_get_n_fields(index));
- ut_print_name(stderr, trx, TRUE, index->table_name);
- fprintf(stderr, "\n"
- "InnoDB: n_fields = %lu, i = %lu, ptr %p\n",
- (ulong) n_fields, (ulong) i, ptr);
- return(NULL);
- }
-
- ptr = trx_undo_rec_get_col_val(ptr, &field, &len);
-
- upd_field = upd_get_nth_field(update, i);
-
- upd_field_set_field_no(upd_field, field_no, index, trx);
-
- if (len != UNIV_SQL_NULL && len >= UNIV_EXTERN_STORAGE_FIELD) {
-
- upd_field->extern_storage = TRUE;
-
- len -= UNIV_EXTERN_STORAGE_FIELD;
- }
-
- dfield_set_data(&(upd_field->new_val), field, len);
- }
-
- *upd = update;
-
- return(ptr);
-}
-
-/***********************************************************************
-Builds a partial row from an update undo log record. It contains the
-columns which occur as ordering in any index of the table. */
-
-byte*
-trx_undo_rec_get_partial_row(
-/*=========================*/
- /* out: pointer to remaining part of undo
- record */
- byte* ptr, /* in: remaining part in update undo log
- record of a suitable type, at the start of
- the stored index columns;
- NOTE that this copy of the undo log record must
- be preserved as long as the partial row is
- used, as we do NOT copy the data in the
- record! */
- dict_index_t* index, /* in: clustered index */
- dtuple_t** row, /* out, own: partial row */
- mem_heap_t* heap) /* in: memory heap from which the memory
- needed is allocated */
-{
- dfield_t* dfield;
- byte* field;
- ulint len;
- ulint field_no;
- ulint col_no;
- ulint row_len;
- ulint total_len;
- byte* start_ptr;
- ulint i;
-
- ut_ad(index && ptr && row && heap);
-
- row_len = dict_table_get_n_cols(index->table);
-
- *row = dtuple_create(heap, row_len);
-
- dict_table_copy_types(*row, index->table);
-
- start_ptr = ptr;
-
- total_len = mach_read_from_2(ptr);
- ptr += 2;
-
- for (i = 0;; i++) {
-
- if (ptr == start_ptr + total_len) {
-
- break;
- }
-
- ptr = trx_undo_update_rec_get_field_no(ptr, &field_no);
-
- col_no = dict_index_get_nth_col_no(index, field_no);
-
- ptr = trx_undo_rec_get_col_val(ptr, &field, &len);
-
- dfield = dtuple_get_nth_field(*row, col_no);
-
- dfield_set_data(dfield, field, len);
- }
-
- return(ptr);
-}
-
-/***************************************************************************
-Erases the unused undo log page end. */
-static
-void
-trx_undo_erase_page_end(
-/*====================*/
- page_t* undo_page, /* in: undo page whose end to erase */
- mtr_t* mtr) /* in: mtr */
-{
- ulint first_free;
-
- first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_FREE);
- memset(undo_page + first_free, 0xff,
- (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END) - first_free);
-
- mlog_write_initial_log_record(undo_page, MLOG_UNDO_ERASE_END, mtr);
-}
-
-/***************************************************************
-Parses a redo log record of erasing of an undo page end. */
-
-byte*
-trx_undo_parse_erase_page_end(
-/*==========================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr __attribute__((unused)), /* in: buffer end */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr) /* in: mtr or NULL */
-{
- ut_ad(ptr && end_ptr);
-
- if (page == NULL) {
-
- return(ptr);
- }
-
- trx_undo_erase_page_end(page, mtr);
-
- return(ptr);
-}
-
-/***************************************************************************
-Writes information to an undo log about an insert, update, or a delete marking
-of a clustered index record. This information is used in a rollback of the
-transaction and in consistent reads that must look to the history of this
-transaction. */
-
-ulint
-trx_undo_report_row_operation(
-/*==========================*/
- /* out: DB_SUCCESS or error code */
- ulint flags, /* in: if BTR_NO_UNDO_LOG_FLAG bit is
- set, does nothing */
- ulint op_type, /* in: TRX_UNDO_INSERT_OP or
- TRX_UNDO_MODIFY_OP */
- que_thr_t* thr, /* in: query thread */
- dict_index_t* index, /* in: clustered index */
- dtuple_t* clust_entry, /* in: in the case of an insert,
- index entry to insert into the
- clustered index, otherwise NULL */
- upd_t* update, /* in: in the case of an update,
- the update vector, otherwise NULL */
- ulint cmpl_info, /* in: compiler info on secondary
- index updates */
- rec_t* rec, /* in: in case of an update or delete
- marking, the record in the clustered
- index, otherwise NULL */
- dulint* roll_ptr) /* out: rollback pointer to the
- inserted undo log record,
- ut_dulint_zero if BTR_NO_UNDO_LOG
- flag was specified */
-{
- trx_t* trx;
- trx_undo_t* undo;
- page_t* undo_page;
- ulint offset;
- ulint page_no;
- ibool is_insert;
- trx_rseg_t* rseg;
- mtr_t mtr;
- ulint err = DB_SUCCESS;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- *offsets_ = (sizeof offsets_) / sizeof *offsets_;
-
- ut_a(index->type & DICT_CLUSTERED);
-
- if (flags & BTR_NO_UNDO_LOG_FLAG) {
-
- *roll_ptr = ut_dulint_zero;
-
- return(err);
- }
-
- ut_ad(thr);
- ut_ad((op_type != TRX_UNDO_INSERT_OP)
- || (clust_entry && !update && !rec));
-
- trx = thr_get_trx(thr);
- rseg = trx->rseg;
-
- mutex_enter(&(trx->undo_mutex));
-
- /* If the undo log is not assigned yet, assign one */
-
- if (op_type == TRX_UNDO_INSERT_OP) {
-
- if (trx->insert_undo == NULL) {
-
- err = trx_undo_assign_undo(trx, TRX_UNDO_INSERT);
- }
-
- undo = trx->insert_undo;
- is_insert = TRUE;
- } else {
- ut_ad(op_type == TRX_UNDO_MODIFY_OP);
-
- if (trx->update_undo == NULL) {
-
- err = trx_undo_assign_undo(trx, TRX_UNDO_UPDATE);
-
- }
-
- undo = trx->update_undo;
- is_insert = FALSE;
- }
-
- if (err != DB_SUCCESS) {
- /* Did not succeed: return the error encountered */
- mutex_exit(&(trx->undo_mutex));
-
- return(err);
- }
-
- page_no = undo->last_page_no;
-
- mtr_start(&mtr);
-
- for (;;) {
- undo_page = buf_page_get_gen(undo->space, page_no,
- RW_X_LATCH, undo->guess_page,
- BUF_GET,
- __FILE__, __LINE__,
- &mtr);
-
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(undo_page, SYNC_TRX_UNDO_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
-
- if (op_type == TRX_UNDO_INSERT_OP) {
- offset = trx_undo_page_report_insert(
- undo_page, trx, index, clust_entry, &mtr);
- } else {
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- offset = trx_undo_page_report_modify(
- undo_page, trx, index, rec, offsets, update,
- cmpl_info, &mtr);
- }
-
- if (offset == 0) {
- /* The record did not fit on the page. We erase the
- end segment of the undo log page and write a log
- record of it: this is to ensure that in the debug
- version the replicate page constructed using the log
- records stays identical to the original page */
-
- trx_undo_erase_page_end(undo_page, &mtr);
- }
-
- mtr_commit(&mtr);
-
- if (offset != 0) {
- /* Success */
-
- break;
- }
-
- ut_ad(page_no == undo->last_page_no);
-
- /* We have to extend the undo log by one page */
-
- mtr_start(&mtr);
-
- /* When we add a page to an undo log, this is analogous to
- a pessimistic insert in a B-tree, and we must reserve the
- counterpart of the tree latch, which is the rseg mutex. */
-
- mutex_enter(&(rseg->mutex));
-
- page_no = trx_undo_add_page(trx, undo, &mtr);
-
- mutex_exit(&(rseg->mutex));
-
- if (page_no == FIL_NULL) {
- /* Did not succeed: out of space */
-
- mutex_exit(&(trx->undo_mutex));
- mtr_commit(&mtr);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(DB_OUT_OF_FILE_SPACE);
- }
- }
-
- undo->empty = FALSE;
- undo->top_page_no = page_no;
- undo->top_offset = offset;
- undo->top_undo_no = trx->undo_no;
- undo->guess_page = undo_page;
-
- UT_DULINT_INC(trx->undo_no);
-
- mutex_exit(&(trx->undo_mutex));
-
- *roll_ptr = trx_undo_build_roll_ptr(is_insert, rseg->id, page_no,
- offset);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(err);
-}
-
-/*============== BUILDING PREVIOUS VERSION OF A RECORD ===============*/
-
-/**********************************************************************
-Copies an undo record to heap. This function can be called if we know that
-the undo log record exists. */
-
-trx_undo_rec_t*
-trx_undo_get_undo_rec_low(
-/*======================*/
- /* out, own: copy of the record */
- dulint roll_ptr, /* in: roll pointer to record */
- mem_heap_t* heap) /* in: memory heap where copied */
-{
- trx_undo_rec_t* undo_rec;
- ulint rseg_id;
- ulint page_no;
- ulint offset;
- page_t* undo_page;
- trx_rseg_t* rseg;
- ibool is_insert;
- mtr_t mtr;
-
- trx_undo_decode_roll_ptr(roll_ptr, &is_insert, &rseg_id, &page_no,
- &offset);
- rseg = trx_rseg_get_on_id(rseg_id);
-
- mtr_start(&mtr);
-
- undo_page = trx_undo_page_get_s_latched(rseg->space, page_no, &mtr);
-
- undo_rec = trx_undo_rec_copy(undo_page + offset, heap);
-
- mtr_commit(&mtr);
-
- return(undo_rec);
-}
-
-/**********************************************************************
-Copies an undo record to heap. */
-
-ulint
-trx_undo_get_undo_rec(
-/*==================*/
- /* out: DB_SUCCESS, or
- DB_MISSING_HISTORY if the undo log
- has been truncated and we cannot
- fetch the old version; NOTE: the
- caller must have latches on the
- clustered index page and purge_view */
- dulint roll_ptr, /* in: roll pointer to record */
- dulint trx_id, /* in: id of the trx that generated
- the roll pointer: it points to an
- undo log of this transaction */
- trx_undo_rec_t** undo_rec, /* out, own: copy of the record */
- mem_heap_t* heap) /* in: memory heap where copied */
-{
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
- if (!trx_purge_update_undo_must_exist(trx_id)) {
-
- /* It may be that the necessary undo log has already been
- deleted */
-
- return(DB_MISSING_HISTORY);
- }
-
- *undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
-
- return(DB_SUCCESS);
-}
-
-/***********************************************************************
-Build a previous version of a clustered index record. This function checks
-that the caller has a latch on the index page of the clustered index record
-and an s-latch on the purge_view. This guarantees that the stack of versions
-is locked. */
-
-ulint
-trx_undo_prev_version_build(
-/*========================*/
- /* out: DB_SUCCESS, or DB_MISSING_HISTORY if
- the previous version is not >= purge_view,
- which means that it may have been removed,
- DB_ERROR if corrupted record */
- rec_t* index_rec,/* in: clustered index record in the
- index tree */
- mtr_t* index_mtr __attribute__((unused)),
- /* in: mtr which contains the latch to
- index_rec page and purge_view */
- rec_t* rec, /* in: version of a clustered index record */
- dict_index_t* index, /* in: clustered index */
- ulint* offsets,/* in: rec_get_offsets(rec, index) */
- mem_heap_t* heap, /* in: memory heap from which the memory
- needed is allocated */
- rec_t** old_vers)/* out, own: previous version, or NULL if
- rec is the first inserted version, or if
- history data has been deleted */
-{
- trx_undo_rec_t* undo_rec;
- dtuple_t* entry;
- dulint rec_trx_id;
- ulint type;
- dulint undo_no;
- dulint table_id;
- dulint trx_id;
- dulint roll_ptr;
- dulint old_roll_ptr;
- upd_t* update;
- byte* ptr;
- ulint info_bits;
- ulint cmpl_info;
- ibool dummy_extern;
- byte* buf;
- ulint err;
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(mtr_memo_contains(index_mtr, buf_block_align(index_rec),
- MTR_MEMO_PAGE_S_FIX)
- || mtr_memo_contains(index_mtr, buf_block_align(index_rec),
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- if (!(index->type & DICT_CLUSTERED)) {
- fprintf(stderr, "InnoDB: Error: trying to access"
- " update undo rec for non-clustered index %s\n"
- "InnoDB: Submit a detailed bug report to"
- " http://bugs.mysql.com\n"
- "InnoDB: index record ", index->name);
- rec_print(stderr, index_rec, index);
- fputs("\n"
- "InnoDB: record version ", stderr);
- rec_print_new(stderr, rec, offsets);
- putc('\n', stderr);
- return(DB_ERROR);
- }
-
- roll_ptr = row_get_rec_roll_ptr(rec, index, offsets);
- old_roll_ptr = roll_ptr;
-
- *old_vers = NULL;
-
- if (trx_undo_roll_ptr_is_insert(roll_ptr)) {
-
- /* The record rec is the first inserted version */
-
- return(DB_SUCCESS);
- }
-
- rec_trx_id = row_get_rec_trx_id(rec, index, offsets);
-
- err = trx_undo_get_undo_rec(roll_ptr, rec_trx_id, &undo_rec, heap);
-
- if (err != DB_SUCCESS) {
-
- return(err);
- }
-
- ptr = trx_undo_rec_get_pars(undo_rec, &type, &cmpl_info,
- &dummy_extern, &undo_no, &table_id);
-
- ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
- &info_bits);
- ptr = trx_undo_rec_skip_row_ref(ptr, index);
-
- ptr = trx_undo_update_rec_get_update(ptr, index, type, trx_id,
- roll_ptr, info_bits,
- NULL, heap, &update);
-
- if (ut_dulint_cmp(table_id, index->table->id) != 0) {
- ptr = NULL;
-
- fprintf(stderr,
- "InnoDB: Error: trying to access update undo rec"
- " for table %s\n"
- "InnoDB: but the table id in the"
- " undo record is wrong\n"
- "InnoDB: Submit a detailed bug report"
- " to http://bugs.mysql.com\n"
- "InnoDB: Run also CHECK TABLE %s\n",
- index->table_name, index->table_name);
- }
-
- if (ptr == NULL) {
- /* The record was corrupted, return an error; these printfs
- should catch an elusive bug in row_vers_old_has_index_entry */
-
- fprintf(stderr,
- "InnoDB: table %s, index %s, n_uniq %lu\n"
- "InnoDB: undo rec address %p, type %lu cmpl_info %lu\n"
- "InnoDB: undo rec table id %lu %lu,"
- " index table id %lu %lu\n"
- "InnoDB: dump of 150 bytes in undo rec: ",
- index->table_name, index->name,
- (ulong) dict_index_get_n_unique(index),
- undo_rec, (ulong) type, (ulong) cmpl_info,
- (ulong) ut_dulint_get_high(table_id),
- (ulong) ut_dulint_get_low(table_id),
- (ulong) ut_dulint_get_high(index->table->id),
- (ulong) ut_dulint_get_low(index->table->id));
- ut_print_buf(stderr, undo_rec, 150);
- fputs("\n"
- "InnoDB: index record ", stderr);
- rec_print(stderr, index_rec, index);
- fputs("\n"
- "InnoDB: record version ", stderr);
- rec_print_new(stderr, rec, offsets);
- fprintf(stderr, "\n"
- "InnoDB: Record trx id %lu %lu, update rec"
- " trx id %lu %lu\n"
- "InnoDB: Roll ptr in rec %lu %lu, in update rec"
- " %lu %lu\n",
- (ulong) ut_dulint_get_high(rec_trx_id),
- (ulong) ut_dulint_get_low(rec_trx_id),
- (ulong) ut_dulint_get_high(trx_id),
- (ulong) ut_dulint_get_low(trx_id),
- (ulong) ut_dulint_get_high(old_roll_ptr),
- (ulong) ut_dulint_get_low(old_roll_ptr),
- (ulong) ut_dulint_get_high(roll_ptr),
- (ulong) ut_dulint_get_low(roll_ptr));
-
- trx_purge_sys_print();
- return(DB_ERROR);
- }
-
- if (row_upd_changes_field_size_or_external(index, offsets, update)) {
- ulint* ext_vect;
- ulint n_ext_vect;
-
- /* We have to set the appropriate extern storage bits in the
- old version of the record: the extern bits in rec for those
- fields that update does NOT update, as well as the the bits for
- those fields that update updates to become externally stored
- fields. Store the info to ext_vect: */
-
- ext_vect = mem_alloc(sizeof(ulint)
- * rec_offs_n_fields(offsets));
- n_ext_vect = btr_push_update_extern_fields(ext_vect, offsets,
- update);
- entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec,
- heap);
- row_upd_index_replace_new_col_vals(entry, index, update, heap);
-
- buf = mem_heap_alloc(heap,
- rec_get_converted_size(index, entry));
-
- *old_vers = rec_convert_dtuple_to_rec(buf, index, entry);
-
- /* Now set the extern bits in the old version of the record */
- rec_set_field_extern_bits(*old_vers, index,
- ext_vect, n_ext_vect, NULL);
- mem_free(ext_vect);
- } else {
- buf = mem_heap_alloc(heap, rec_offs_size(offsets));
- *old_vers = rec_copy(buf, rec, offsets);
- rec_offs_make_valid(*old_vers, index, offsets);
- row_upd_rec_in_place(*old_vers, offsets, update);
- }
-
- return(DB_SUCCESS);
-}
diff --git a/storage/innobase/trx/trx0roll.c b/storage/innobase/trx/trx0roll.c
deleted file mode 100644
index 8934fe87c7e..00000000000
--- a/storage/innobase/trx/trx0roll.c
+++ /dev/null
@@ -1,1341 +0,0 @@
-/******************************************************
-Transaction rollback
-
-(c) 1996 Innobase Oy
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "trx0roll.h"
-
-#ifdef UNIV_NONINL
-#include "trx0roll.ic"
-#endif
-
-#include "fsp0fsp.h"
-#include "mach0data.h"
-#include "trx0rseg.h"
-#include "trx0trx.h"
-#include "trx0undo.h"
-#include "trx0rec.h"
-#include "que0que.h"
-#include "usr0sess.h"
-#include "srv0que.h"
-#include "srv0start.h"
-#include "row0undo.h"
-#include "row0mysql.h"
-#include "lock0lock.h"
-#include "pars0pars.h"
-
-/* This many pages must be undone before a truncate is tried within rollback */
-#define TRX_ROLL_TRUNC_THRESHOLD 1
-
-/* In crash recovery, the current trx to be rolled back */
-trx_t* trx_roll_crash_recv_trx = NULL;
-
-/* In crash recovery we set this to the undo n:o of the current trx to be
-rolled back. Then we can print how many % the rollback has progressed. */
-ib_longlong trx_roll_max_undo_no;
-
-/* Auxiliary variable which tells the previous progress % we printed */
-ulint trx_roll_progress_printed_pct;
-
-/***********************************************************************
-Rollback a transaction used in MySQL. */
-
-int
-trx_general_rollback_for_mysql(
-/*===========================*/
- /* out: error code or DB_SUCCESS */
- trx_t* trx, /* in: transaction handle */
- ibool partial,/* in: TRUE if partial rollback requested */
- trx_savept_t* savept) /* in: pointer to savepoint undo number, if
- partial rollback requested */
-{
-#ifndef UNIV_HOTBACKUP
- mem_heap_t* heap;
- que_thr_t* thr;
- roll_node_t* roll_node;
-
- /* Tell Innobase server that there might be work for
- utility threads: */
-
- srv_active_wake_master_thread();
-
- trx_start_if_not_started(trx);
-
- heap = mem_heap_create(512);
-
- roll_node = roll_node_create(heap);
-
- roll_node->partial = partial;
-
- if (partial) {
- roll_node->savept = *savept;
- }
-
- trx->error_state = DB_SUCCESS;
-
- thr = pars_complete_graph_for_exec(roll_node, trx, heap);
-
- ut_a(thr == que_fork_start_command(que_node_get_parent(thr)));
- que_run_threads(thr);
-
- mutex_enter(&kernel_mutex);
-
- while (trx->que_state != TRX_QUE_RUNNING) {
-
- mutex_exit(&kernel_mutex);
-
- os_thread_sleep(100000);
-
- mutex_enter(&kernel_mutex);
- }
-
- mutex_exit(&kernel_mutex);
-
- mem_heap_free(heap);
-
- ut_a(trx->error_state == DB_SUCCESS);
-
- /* Tell Innobase server that there might be work for
- utility threads: */
-
- srv_active_wake_master_thread();
-
- return((int) trx->error_state);
-#else /* UNIV_HOTBACKUP */
- /* This function depends on MySQL code that is not included in
- InnoDB Hot Backup builds. Besides, this function should never
- be called in InnoDB Hot Backup. */
- ut_error;
- return(DB_FAIL);
-#endif /* UNIV_HOTBACKUP */
-}
-
-/***********************************************************************
-Rollback a transaction used in MySQL. */
-
-int
-trx_rollback_for_mysql(
-/*===================*/
- /* out: error code or DB_SUCCESS */
- trx_t* trx) /* in: transaction handle */
-{
- int err;
-
- if (trx->conc_state == TRX_NOT_STARTED) {
-
- return(DB_SUCCESS);
- }
-
- trx->op_info = "rollback";
-
- /* If we are doing the XA recovery of prepared transactions, then
- the transaction object does not have an InnoDB session object, and we
- set a dummy session that we use for all MySQL transactions. */
-
- mutex_enter(&kernel_mutex);
-
- if (trx->sess == NULL) {
- /* Open a dummy session */
-
- if (!trx_dummy_sess) {
- trx_dummy_sess = sess_open();
- }
-
- trx->sess = trx_dummy_sess;
- }
-
- mutex_exit(&kernel_mutex);
-
- err = trx_general_rollback_for_mysql(trx, FALSE, NULL);
-
- trx->op_info = "";
-
- return(err);
-}
-
-/***********************************************************************
-Rollback the latest SQL statement for MySQL. */
-
-int
-trx_rollback_last_sql_stat_for_mysql(
-/*=================================*/
- /* out: error code or DB_SUCCESS */
- trx_t* trx) /* in: transaction handle */
-{
- int err;
-
- if (trx->conc_state == TRX_NOT_STARTED) {
-
- return(DB_SUCCESS);
- }
-
- trx->op_info = "rollback of SQL statement";
-
- err = trx_general_rollback_for_mysql(trx, TRUE,
- &(trx->last_sql_stat_start));
- /* The following call should not be needed, but we play safe: */
- trx_mark_sql_stat_end(trx);
-
- trx->op_info = "";
-
- return(err);
-}
-
-/***********************************************************************
-Frees a single savepoint struct. */
-
-void
-trx_roll_savepoint_free(
-/*=====================*/
- trx_t* trx, /* in: transaction handle */
- trx_named_savept_t* savep) /* in: savepoint to free */
-{
- ut_a(savep != NULL);
- ut_a(UT_LIST_GET_LEN(trx->trx_savepoints) > 0);
-
- UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep);
- mem_free(savep->name);
- mem_free(savep);
-}
-
-/***********************************************************************
-Frees savepoint structs starting from savep, if savep == NULL then
-free all savepoints. */
-
-void
-trx_roll_savepoints_free(
-/*=====================*/
- trx_t* trx, /* in: transaction handle */
- trx_named_savept_t* savep) /* in: free all savepoints > this one;
- if this is NULL, free all savepoints
- of trx */
-{
- trx_named_savept_t* next_savep;
-
- if (savep == NULL) {
- savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
- } else {
- savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
- }
-
- while (savep != NULL) {
- next_savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
-
- trx_roll_savepoint_free(trx, savep);
-
- savep = next_savep;
- }
-}
-
-/***********************************************************************
-Rolls back a transaction back to a named savepoint. Modifications after the
-savepoint are undone but InnoDB does NOT release the corresponding locks
-which are stored in memory. If a lock is 'implicit', that is, a new inserted
-row holds a lock where the lock information is carried by the trx id stored in
-the row, these locks are naturally released in the rollback. Savepoints which
-were set after this savepoint are deleted. */
-
-ulint
-trx_rollback_to_savepoint_for_mysql(
-/*================================*/
- /* out: if no savepoint
- of the name found then
- DB_NO_SAVEPOINT,
- otherwise DB_SUCCESS */
- trx_t* trx, /* in: transaction handle */
- const char* savepoint_name, /* in: savepoint name */
- ib_longlong* mysql_binlog_cache_pos) /* out: the MySQL binlog cache
- position corresponding to this
- savepoint; MySQL needs this
- information to remove the
- binlog entries of the queries
- executed after the savepoint */
-{
- trx_named_savept_t* savep;
- ulint err;
-
- savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
-
- while (savep != NULL) {
- if (0 == ut_strcmp(savep->name, savepoint_name)) {
- /* Found */
- break;
- }
- savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
- }
-
- if (savep == NULL) {
-
- return(DB_NO_SAVEPOINT);
- }
-
- if (trx->conc_state == TRX_NOT_STARTED) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: transaction has a savepoint ", stderr);
- ut_print_name(stderr, trx, FALSE, savep->name);
- fputs(" though it is not started\n", stderr);
- return(DB_ERROR);
- }
-
- /* We can now free all savepoints strictly later than this one */
-
- trx_roll_savepoints_free(trx, savep);
-
- *mysql_binlog_cache_pos = savep->mysql_binlog_cache_pos;
-
- trx->op_info = "rollback to a savepoint";
-
- err = trx_general_rollback_for_mysql(trx, TRUE, &(savep->savept));
-
- /* Store the current undo_no of the transaction so that we know where
- to roll back if we have to roll back the next SQL statement: */
-
- trx_mark_sql_stat_end(trx);
-
- trx->op_info = "";
-
- return(err);
-}
-
-/***********************************************************************
-Creates a named savepoint. If the transaction is not yet started, starts it.
-If there is already a savepoint of the same name, this call erases that old
-savepoint and replaces it with a new. Savepoints are deleted in a transaction
-commit or rollback. */
-
-ulint
-trx_savepoint_for_mysql(
-/*====================*/
- /* out: always DB_SUCCESS */
- trx_t* trx, /* in: transaction handle */
- const char* savepoint_name, /* in: savepoint name */
- ib_longlong binlog_cache_pos) /* in: MySQL binlog cache
- position corresponding to this
- connection at the time of the
- savepoint */
-{
- trx_named_savept_t* savep;
-
- ut_a(trx);
- ut_a(savepoint_name);
-
- trx_start_if_not_started(trx);
-
- savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
-
- while (savep != NULL) {
- if (0 == ut_strcmp(savep->name, savepoint_name)) {
- /* Found */
- break;
- }
- savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
- }
-
- if (savep) {
- /* There is a savepoint with the same name: free that */
-
- UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep);
-
- mem_free(savep->name);
- mem_free(savep);
- }
-
- /* Create a new savepoint and add it as the last in the list */
-
- savep = mem_alloc(sizeof(trx_named_savept_t));
-
- savep->name = mem_strdup(savepoint_name);
-
- savep->savept = trx_savept_take(trx);
-
- savep->mysql_binlog_cache_pos = binlog_cache_pos;
-
- UT_LIST_ADD_LAST(trx_savepoints, trx->trx_savepoints, savep);
-
- return(DB_SUCCESS);
-}
-
-/***********************************************************************
-Releases only the named savepoint. Savepoints which were set after this
-savepoint are left as is. */
-
-ulint
-trx_release_savepoint_for_mysql(
-/*============================*/
- /* out: if no savepoint
- of the name found then
- DB_NO_SAVEPOINT,
- otherwise DB_SUCCESS */
- trx_t* trx, /* in: transaction handle */
- const char* savepoint_name) /* in: savepoint name */
-{
- trx_named_savept_t* savep;
-
- savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
-
- /* Search for the savepoint by name and free if found. */
- while (savep != NULL) {
- if (0 == ut_strcmp(savep->name, savepoint_name)) {
- trx_roll_savepoint_free(trx, savep);
- return(DB_SUCCESS);
- }
- savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
- }
-
- return(DB_NO_SAVEPOINT);
-}
-
-/***********************************************************************
-Returns a transaction savepoint taken at this point in time. */
-
-trx_savept_t
-trx_savept_take(
-/*============*/
- /* out: savepoint */
- trx_t* trx) /* in: transaction */
-{
- trx_savept_t savept;
-
- savept.least_undo_no = trx->undo_no;
-
- return(savept);
-}
-
-/***********************************************************************
-Rollback or clean up transactions which have no user session. If the
-transaction already was committed, then we clean up a possible insert
-undo log. If the transaction was not yet committed, then we roll it back.
-Note: this is done in a background thread. */
-
-os_thread_ret_t
-trx_rollback_or_clean_all_without_sess(
-/*===================================*/
- /* out: a dummy parameter */
- void* arg __attribute__((unused)))
- /* in: a dummy parameter required by
- os_thread_create */
-{
- mem_heap_t* heap;
- que_fork_t* fork;
- que_thr_t* thr;
- roll_node_t* roll_node;
- trx_t* trx;
- dict_table_t* table;
- ib_longlong rows_to_undo;
- const char* unit = "";
- int err;
-
- mutex_enter(&kernel_mutex);
-
- /* Open a dummy session */
-
- if (!trx_dummy_sess) {
- trx_dummy_sess = sess_open();
- }
-
- mutex_exit(&kernel_mutex);
-
- if (UT_LIST_GET_FIRST(trx_sys->trx_list)) {
-
- fprintf(stderr,
- "InnoDB: Starting in background the rollback"
- " of uncommitted transactions\n");
- } else {
- goto leave_function;
- }
-loop:
- heap = mem_heap_create(512);
-
- mutex_enter(&kernel_mutex);
-
- trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
-
- while (trx) {
- if ((trx->sess || (trx->conc_state == TRX_NOT_STARTED))) {
- trx = UT_LIST_GET_NEXT(trx_list, trx);
- } else if (trx->conc_state == TRX_PREPARED) {
-
- trx->sess = trx_dummy_sess;
- trx = UT_LIST_GET_NEXT(trx_list, trx);
- } else {
- break;
- }
- }
-
- mutex_exit(&kernel_mutex);
-
- if (trx == NULL) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Rollback of non-prepared transactions"
- " completed\n");
-
- mem_heap_free(heap);
-
- goto leave_function;
- }
-
- trx->sess = trx_dummy_sess;
-
- if (trx->conc_state == TRX_COMMITTED_IN_MEMORY) {
- fprintf(stderr, "InnoDB: Cleaning up trx with id %lu %lu\n",
- (ulong) ut_dulint_get_high(trx->id),
- (ulong) ut_dulint_get_low(trx->id));
-
- trx_cleanup_at_db_startup(trx);
-
- mem_heap_free(heap);
-
- goto loop;
- }
-
- fork = que_fork_create(NULL, NULL, QUE_FORK_RECOVERY, heap);
- fork->trx = trx;
-
- thr = que_thr_create(fork, heap);
-
- roll_node = roll_node_create(heap);
-
- thr->child = roll_node;
- roll_node->common.parent = thr;
-
- mutex_enter(&kernel_mutex);
-
- trx->graph = fork;
-
- ut_a(thr == que_fork_start_command(fork));
-
- trx_roll_crash_recv_trx = trx;
- trx_roll_max_undo_no = ut_conv_dulint_to_longlong(trx->undo_no);
- trx_roll_progress_printed_pct = 0;
- rows_to_undo = trx_roll_max_undo_no;
-
- if (rows_to_undo > 1000000000) {
- rows_to_undo = rows_to_undo / 1000000;
- unit = "M";
- }
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Rolling back trx with id %lu %lu, %lu%s"
- " rows to undo\n",
- (ulong) ut_dulint_get_high(trx->id),
- (ulong) ut_dulint_get_low(trx->id),
- (ulong) rows_to_undo, unit);
- mutex_exit(&kernel_mutex);
-
- trx->mysql_thread_id = os_thread_get_curr_id();
-
- trx->mysql_process_no = os_proc_get_number();
-
- if (trx->dict_operation) {
- row_mysql_lock_data_dictionary(trx);
- }
-
- que_run_threads(thr);
-
- mutex_enter(&kernel_mutex);
-
- while (trx->que_state != TRX_QUE_RUNNING) {
-
- mutex_exit(&kernel_mutex);
-
- fprintf(stderr,
- "InnoDB: Waiting for rollback of trx id %lu to end\n",
- (ulong) ut_dulint_get_low(trx->id));
- os_thread_sleep(100000);
-
- mutex_enter(&kernel_mutex);
- }
-
- mutex_exit(&kernel_mutex);
-
- if (trx->dict_operation) {
- /* If the transaction was for a dictionary operation, we
- drop the relevant table, if it still exists */
-
- fprintf(stderr,
- "InnoDB: Dropping table with id %lu %lu"
- " in recovery if it exists\n",
- (ulong) ut_dulint_get_high(trx->table_id),
- (ulong) ut_dulint_get_low(trx->table_id));
-
- table = dict_table_get_on_id_low(trx->table_id);
-
- if (table) {
- fputs("InnoDB: Table found: dropping table ", stderr);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs(" in recovery\n", stderr);
-
- err = row_drop_table_for_mysql(table->name, trx, TRUE);
-
- ut_a(err == (int) DB_SUCCESS);
- }
- }
-
- if (trx->dict_operation) {
- row_mysql_unlock_data_dictionary(trx);
- }
-
- fprintf(stderr, "\nInnoDB: Rolling back of trx id %lu %lu completed\n",
- (ulong) ut_dulint_get_high(trx->id),
- (ulong) ut_dulint_get_low(trx->id));
- mem_heap_free(heap);
-
- trx_roll_crash_recv_trx = NULL;
-
- goto loop;
-
-leave_function:
- /* We count the number of threads in os_thread_exit(). A created
- thread should always use that to exit and not use return() to exit. */
-
- os_thread_exit(NULL);
-
- OS_THREAD_DUMMY_RETURN;
-}
-
-/***********************************************************************
-Creates an undo number array. */
-
-trx_undo_arr_t*
-trx_undo_arr_create(void)
-/*=====================*/
-{
- trx_undo_arr_t* arr;
- mem_heap_t* heap;
- ulint i;
-
- heap = mem_heap_create(1024);
-
- arr = mem_heap_alloc(heap, sizeof(trx_undo_arr_t));
-
- arr->infos = mem_heap_alloc(heap, sizeof(trx_undo_inf_t)
- * UNIV_MAX_PARALLELISM);
- arr->n_cells = UNIV_MAX_PARALLELISM;
- arr->n_used = 0;
-
- arr->heap = heap;
-
- for (i = 0; i < UNIV_MAX_PARALLELISM; i++) {
-
- (trx_undo_arr_get_nth_info(arr, i))->in_use = FALSE;
- }
-
- return(arr);
-}
-
-/***********************************************************************
-Frees an undo number array. */
-
-void
-trx_undo_arr_free(
-/*==============*/
- trx_undo_arr_t* arr) /* in: undo number array */
-{
- ut_ad(arr->n_used == 0);
-
- mem_heap_free(arr->heap);
-}
-
-/***********************************************************************
-Stores info of an undo log record to the array if it is not stored yet. */
-static
-ibool
-trx_undo_arr_store_info(
-/*====================*/
- /* out: FALSE if the record already existed in the
- array */
- trx_t* trx, /* in: transaction */
- dulint undo_no)/* in: undo number */
-{
- trx_undo_inf_t* cell;
- trx_undo_inf_t* stored_here;
- trx_undo_arr_t* arr;
- ulint n_used;
- ulint n;
- ulint i;
-
- n = 0;
- arr = trx->undo_no_arr;
- n_used = arr->n_used;
- stored_here = NULL;
-
- for (i = 0;; i++) {
- cell = trx_undo_arr_get_nth_info(arr, i);
-
- if (!cell->in_use) {
- if (!stored_here) {
- /* Not in use, we may store here */
- cell->undo_no = undo_no;
- cell->in_use = TRUE;
-
- arr->n_used++;
-
- stored_here = cell;
- }
- } else {
- n++;
-
- if (0 == ut_dulint_cmp(cell->undo_no, undo_no)) {
-
- if (stored_here) {
- stored_here->in_use = FALSE;
- ut_ad(arr->n_used > 0);
- arr->n_used--;
- }
-
- ut_ad(arr->n_used == n_used);
-
- return(FALSE);
- }
- }
-
- if (n == n_used && stored_here) {
-
- ut_ad(arr->n_used == 1 + n_used);
-
- return(TRUE);
- }
- }
-}
-
-/***********************************************************************
-Removes an undo number from the array. */
-static
-void
-trx_undo_arr_remove_info(
-/*=====================*/
- trx_undo_arr_t* arr, /* in: undo number array */
- dulint undo_no)/* in: undo number */
-{
- trx_undo_inf_t* cell;
- ulint n_used;
- ulint n;
- ulint i;
-
- n_used = arr->n_used;
- n = 0;
-
- for (i = 0;; i++) {
- cell = trx_undo_arr_get_nth_info(arr, i);
-
- if (cell->in_use
- && 0 == ut_dulint_cmp(cell->undo_no, undo_no)) {
-
- cell->in_use = FALSE;
-
- ut_ad(arr->n_used > 0);
-
- arr->n_used--;
-
- return;
- }
- }
-}
-
-/***********************************************************************
-Gets the biggest undo number in an array. */
-static
-dulint
-trx_undo_arr_get_biggest(
-/*=====================*/
- /* out: biggest value, ut_dulint_zero if
- the array is empty */
- trx_undo_arr_t* arr) /* in: undo number array */
-{
- trx_undo_inf_t* cell;
- ulint n_used;
- dulint biggest;
- ulint n;
- ulint i;
-
- n = 0;
- n_used = arr->n_used;
- biggest = ut_dulint_zero;
-
- for (i = 0;; i++) {
- cell = trx_undo_arr_get_nth_info(arr, i);
-
- if (cell->in_use) {
- n++;
- if (ut_dulint_cmp(cell->undo_no, biggest) > 0) {
-
- biggest = cell->undo_no;
- }
- }
-
- if (n == n_used) {
- return(biggest);
- }
- }
-}
-
-/***************************************************************************
-Tries truncate the undo logs. */
-
-void
-trx_roll_try_truncate(
-/*==================*/
- trx_t* trx) /* in: transaction */
-{
- trx_undo_arr_t* arr;
- dulint limit;
- dulint biggest;
-
- ut_ad(mutex_own(&(trx->undo_mutex)));
- ut_ad(mutex_own(&((trx->rseg)->mutex)));
-
- trx->pages_undone = 0;
-
- arr = trx->undo_no_arr;
-
- limit = trx->undo_no;
-
- if (arr->n_used > 0) {
- biggest = trx_undo_arr_get_biggest(arr);
-
- if (ut_dulint_cmp(biggest, limit) >= 0) {
-
- limit = ut_dulint_add(biggest, 1);
- }
- }
-
- if (trx->insert_undo) {
- trx_undo_truncate_end(trx, trx->insert_undo, limit);
- }
-
- if (trx->update_undo) {
- trx_undo_truncate_end(trx, trx->update_undo, limit);
- }
-}
-
-/***************************************************************************
-Pops the topmost undo log record in a single undo log and updates the info
-about the topmost record in the undo log memory struct. */
-static
-trx_undo_rec_t*
-trx_roll_pop_top_rec(
-/*=================*/
- /* out: undo log record, the page s-latched */
- trx_t* trx, /* in: transaction */
- trx_undo_t* undo, /* in: undo log */
- mtr_t* mtr) /* in: mtr */
-{
- page_t* undo_page;
- ulint offset;
- trx_undo_rec_t* prev_rec;
- page_t* prev_rec_page;
-
- ut_ad(mutex_own(&(trx->undo_mutex)));
-
- undo_page = trx_undo_page_get_s_latched(undo->space,
- undo->top_page_no, mtr);
- offset = undo->top_offset;
-
- /* fprintf(stderr, "Thread %lu undoing trx %lu undo record %lu\n",
- os_thread_get_curr_id(), ut_dulint_get_low(trx->id),
- ut_dulint_get_low(undo->top_undo_no)); */
-
- prev_rec = trx_undo_get_prev_rec(undo_page + offset,
- undo->hdr_page_no, undo->hdr_offset,
- mtr);
- if (prev_rec == NULL) {
-
- undo->empty = TRUE;
- } else {
- prev_rec_page = buf_frame_align(prev_rec);
-
- if (prev_rec_page != undo_page) {
-
- trx->pages_undone++;
- }
-
- undo->top_page_no = buf_frame_get_page_no(prev_rec_page);
- undo->top_offset = prev_rec - prev_rec_page;
- undo->top_undo_no = trx_undo_rec_get_undo_no(prev_rec);
- }
-
- return(undo_page + offset);
-}
-
-/************************************************************************
-Pops the topmost record when the two undo logs of a transaction are seen
-as a single stack of records ordered by their undo numbers. Inserts the
-undo number of the popped undo record to the array of currently processed
-undo numbers in the transaction. When the query thread finishes processing
-of this undo record, it must be released with trx_undo_rec_release. */
-
-trx_undo_rec_t*
-trx_roll_pop_top_rec_of_trx(
-/*========================*/
- /* out: undo log record copied to heap, NULL
- if none left, or if the undo number of the
- top record would be less than the limit */
- trx_t* trx, /* in: transaction */
- dulint limit, /* in: least undo number we need */
- dulint* roll_ptr,/* out: roll pointer to undo record */
- mem_heap_t* heap) /* in: memory heap where copied */
-{
- trx_undo_t* undo;
- trx_undo_t* ins_undo;
- trx_undo_t* upd_undo;
- trx_undo_rec_t* undo_rec;
- trx_undo_rec_t* undo_rec_copy;
- dulint undo_no;
- ibool is_insert;
- trx_rseg_t* rseg;
- ulint progress_pct;
- mtr_t mtr;
-
- rseg = trx->rseg;
-try_again:
- mutex_enter(&(trx->undo_mutex));
-
- if (trx->pages_undone >= TRX_ROLL_TRUNC_THRESHOLD) {
- mutex_enter(&(rseg->mutex));
-
- trx_roll_try_truncate(trx);
-
- mutex_exit(&(rseg->mutex));
- }
-
- ins_undo = trx->insert_undo;
- upd_undo = trx->update_undo;
-
- if (!ins_undo || ins_undo->empty) {
- undo = upd_undo;
- } else if (!upd_undo || upd_undo->empty) {
- undo = ins_undo;
- } else if (ut_dulint_cmp(upd_undo->top_undo_no,
- ins_undo->top_undo_no) > 0) {
- undo = upd_undo;
- } else {
- undo = ins_undo;
- }
-
- if (!undo || undo->empty
- || (ut_dulint_cmp(limit, undo->top_undo_no) > 0)) {
-
- if ((trx->undo_no_arr)->n_used == 0) {
- /* Rollback is ending */
-
- mutex_enter(&(rseg->mutex));
-
- trx_roll_try_truncate(trx);
-
- mutex_exit(&(rseg->mutex));
- }
-
- mutex_exit(&(trx->undo_mutex));
-
- return(NULL);
- }
-
- if (undo == ins_undo) {
- is_insert = TRUE;
- } else {
- is_insert = FALSE;
- }
-
- *roll_ptr = trx_undo_build_roll_ptr(is_insert, (undo->rseg)->id,
- undo->top_page_no,
- undo->top_offset);
- mtr_start(&mtr);
-
- undo_rec = trx_roll_pop_top_rec(trx, undo, &mtr);
-
- undo_no = trx_undo_rec_get_undo_no(undo_rec);
-
- ut_ad(ut_dulint_cmp(ut_dulint_add(undo_no, 1), trx->undo_no) == 0);
-
- /* We print rollback progress info if we are in a crash recovery
- and the transaction has at least 1000 row operations to undo. */
-
- if (trx == trx_roll_crash_recv_trx && trx_roll_max_undo_no > 1000) {
-
- progress_pct = 100 - (ulint)
- ((ut_conv_dulint_to_longlong(undo_no) * 100)
- / trx_roll_max_undo_no);
- if (progress_pct != trx_roll_progress_printed_pct) {
- if (trx_roll_progress_printed_pct == 0) {
- fprintf(stderr,
- "\nInnoDB: Progress in percents:"
- " %lu", (ulong) progress_pct);
- } else {
- fprintf(stderr,
- " %lu", (ulong) progress_pct);
- }
- fflush(stderr);
- trx_roll_progress_printed_pct = progress_pct;
- }
- }
-
- trx->undo_no = undo_no;
-
- if (!trx_undo_arr_store_info(trx, undo_no)) {
- /* A query thread is already processing this undo log record */
-
- mutex_exit(&(trx->undo_mutex));
-
- mtr_commit(&mtr);
-
- goto try_again;
- }
-
- undo_rec_copy = trx_undo_rec_copy(undo_rec, heap);
-
- mutex_exit(&(trx->undo_mutex));
-
- mtr_commit(&mtr);
-
- return(undo_rec_copy);
-}
-
-/************************************************************************
-Reserves an undo log record for a query thread to undo. This should be
-called if the query thread gets the undo log record not using the pop
-function above. */
-
-ibool
-trx_undo_rec_reserve(
-/*=================*/
- /* out: TRUE if succeeded */
- trx_t* trx, /* in: transaction */
- dulint undo_no)/* in: undo number of the record */
-{
- ibool ret;
-
- mutex_enter(&(trx->undo_mutex));
-
- ret = trx_undo_arr_store_info(trx, undo_no);
-
- mutex_exit(&(trx->undo_mutex));
-
- return(ret);
-}
-
-/***********************************************************************
-Releases a reserved undo record. */
-
-void
-trx_undo_rec_release(
-/*=================*/
- trx_t* trx, /* in: transaction */
- dulint undo_no)/* in: undo number */
-{
- trx_undo_arr_t* arr;
-
- mutex_enter(&(trx->undo_mutex));
-
- arr = trx->undo_no_arr;
-
- trx_undo_arr_remove_info(arr, undo_no);
-
- mutex_exit(&(trx->undo_mutex));
-}
-
-/*************************************************************************
-Starts a rollback operation. */
-
-void
-trx_rollback(
-/*=========*/
- trx_t* trx, /* in: transaction */
- trx_sig_t* sig, /* in: signal starting the rollback */
- que_thr_t** next_thr)/* in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread; if the passed value is
- NULL, the parameter is ignored */
-{
- que_t* roll_graph;
- que_thr_t* thr;
- /* que_thr_t* thr2; */
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad((trx->undo_no_arr == NULL) || ((trx->undo_no_arr)->n_used == 0));
-
- /* Initialize the rollback field in the transaction */
-
- if (sig->type == TRX_SIG_TOTAL_ROLLBACK) {
-
- trx->roll_limit = ut_dulint_zero;
-
- } else if (sig->type == TRX_SIG_ROLLBACK_TO_SAVEPT) {
-
- trx->roll_limit = (sig->savept).least_undo_no;
-
- } else if (sig->type == TRX_SIG_ERROR_OCCURRED) {
-
- trx->roll_limit = trx->last_sql_stat_start.least_undo_no;
- } else {
- ut_error;
- }
-
- ut_a(ut_dulint_cmp(trx->roll_limit, trx->undo_no) <= 0);
-
- trx->pages_undone = 0;
-
- if (trx->undo_no_arr == NULL) {
- trx->undo_no_arr = trx_undo_arr_create();
- }
-
- /* Build a 'query' graph which will perform the undo operations */
-
- roll_graph = trx_roll_graph_build(trx);
-
- trx->graph = roll_graph;
- trx->que_state = TRX_QUE_ROLLING_BACK;
-
- thr = que_fork_start_command(roll_graph);
-
- ut_ad(thr);
-
- /* thr2 = que_fork_start_command(roll_graph);
-
- ut_ad(thr2); */
-
- if (next_thr && (*next_thr == NULL)) {
- *next_thr = thr;
- /* srv_que_task_enqueue_low(thr2); */
- } else {
- srv_que_task_enqueue_low(thr);
- /* srv_que_task_enqueue_low(thr2); */
- }
-}
-
-/********************************************************************
-Builds an undo 'query' graph for a transaction. The actual rollback is
-performed by executing this query graph like a query subprocedure call.
-The reply about the completion of the rollback will be sent by this
-graph. */
-
-que_t*
-trx_roll_graph_build(
-/*=================*/
- /* out, own: the query graph */
- trx_t* trx) /* in: trx handle */
-{
- mem_heap_t* heap;
- que_fork_t* fork;
- que_thr_t* thr;
- /* que_thr_t* thr2; */
-
- ut_ad(mutex_own(&kernel_mutex));
-
- heap = mem_heap_create(512);
- fork = que_fork_create(NULL, NULL, QUE_FORK_ROLLBACK, heap);
- fork->trx = trx;
-
- thr = que_thr_create(fork, heap);
- /* thr2 = que_thr_create(fork, heap); */
-
- thr->child = row_undo_node_create(trx, thr, heap);
- /* thr2->child = row_undo_node_create(trx, thr2, heap); */
-
- return(fork);
-}
-
-/*************************************************************************
-Finishes error processing after the necessary partial rollback has been
-done. */
-static
-void
-trx_finish_error_processing(
-/*========================*/
- trx_t* trx) /* in: transaction */
-{
- trx_sig_t* sig;
- trx_sig_t* next_sig;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- sig = UT_LIST_GET_FIRST(trx->signals);
-
- while (sig != NULL) {
- next_sig = UT_LIST_GET_NEXT(signals, sig);
-
- if (sig->type == TRX_SIG_ERROR_OCCURRED) {
-
- trx_sig_remove(trx, sig);
- }
-
- sig = next_sig;
- }
-
- trx->que_state = TRX_QUE_RUNNING;
-}
-
-/*************************************************************************
-Finishes a partial rollback operation. */
-static
-void
-trx_finish_partial_rollback_off_kernel(
-/*===================================*/
- trx_t* trx, /* in: transaction */
- que_thr_t** next_thr)/* in/out: next query thread to run;
- if the value which is passed in is a pointer
- to a NULL pointer, then the calling function
- can start running a new query thread; if this
- parameter is NULL, it is ignored */
-{
- trx_sig_t* sig;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- sig = UT_LIST_GET_FIRST(trx->signals);
-
- /* Remove the signal from the signal queue and send reply message
- to it */
-
- trx_sig_reply(sig, next_thr);
- trx_sig_remove(trx, sig);
-
- trx->que_state = TRX_QUE_RUNNING;
-}
-
-/********************************************************************
-Finishes a transaction rollback. */
-
-void
-trx_finish_rollback_off_kernel(
-/*===========================*/
- que_t* graph, /* in: undo graph which can now be freed */
- trx_t* trx, /* in: transaction */
- que_thr_t** next_thr)/* in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread; if this parameter is
- NULL, it is ignored */
-{
- trx_sig_t* sig;
- trx_sig_t* next_sig;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- ut_a(trx->undo_no_arr == NULL || trx->undo_no_arr->n_used == 0);
-
- /* Free the memory reserved by the undo graph */
- que_graph_free(graph);
-
- sig = UT_LIST_GET_FIRST(trx->signals);
-
- if (sig->type == TRX_SIG_ROLLBACK_TO_SAVEPT) {
-
- trx_finish_partial_rollback_off_kernel(trx, next_thr);
-
- return;
-
- } else if (sig->type == TRX_SIG_ERROR_OCCURRED) {
-
- trx_finish_error_processing(trx);
-
- return;
- }
-
-#ifdef UNIV_DEBUG
- if (lock_print_waits) {
- fprintf(stderr, "Trx %lu rollback finished\n",
- (ulong) ut_dulint_get_low(trx->id));
- }
-#endif /* UNIV_DEBUG */
-
- trx_commit_off_kernel(trx);
-
- /* Remove all TRX_SIG_TOTAL_ROLLBACK signals from the signal queue and
- send reply messages to them */
-
- trx->que_state = TRX_QUE_RUNNING;
-
- while (sig != NULL) {
- next_sig = UT_LIST_GET_NEXT(signals, sig);
-
- if (sig->type == TRX_SIG_TOTAL_ROLLBACK) {
-
- trx_sig_reply(sig, next_thr);
-
- trx_sig_remove(trx, sig);
- }
-
- sig = next_sig;
- }
-}
-
-/*************************************************************************
-Creates a rollback command node struct. */
-
-roll_node_t*
-roll_node_create(
-/*=============*/
- /* out, own: rollback node struct */
- mem_heap_t* heap) /* in: mem heap where created */
-{
- roll_node_t* node;
-
- node = mem_heap_alloc(heap, sizeof(roll_node_t));
- node->common.type = QUE_NODE_ROLLBACK;
- node->state = ROLL_NODE_SEND;
-
- node->partial = FALSE;
-
- return(node);
-}
-
-/***************************************************************
-Performs an execution step for a rollback command node in a query graph. */
-
-que_thr_t*
-trx_rollback_step(
-/*==============*/
- /* out: query thread to run next, or NULL */
- que_thr_t* thr) /* in: query thread */
-{
- roll_node_t* node;
- ulint sig_no;
- trx_savept_t* savept;
-
- node = thr->run_node;
-
- ut_ad(que_node_get_type(node) == QUE_NODE_ROLLBACK);
-
- if (thr->prev_node == que_node_get_parent(node)) {
- node->state = ROLL_NODE_SEND;
- }
-
- if (node->state == ROLL_NODE_SEND) {
- mutex_enter(&kernel_mutex);
-
- node->state = ROLL_NODE_WAIT;
-
- if (node->partial) {
- sig_no = TRX_SIG_ROLLBACK_TO_SAVEPT;
- savept = &(node->savept);
- } else {
- sig_no = TRX_SIG_TOTAL_ROLLBACK;
- savept = NULL;
- }
-
- /* Send a rollback signal to the transaction */
-
- trx_sig_send(thr_get_trx(thr), sig_no, TRX_SIG_SELF, thr,
- savept, NULL);
-
- thr->state = QUE_THR_SIG_REPLY_WAIT;
-
- mutex_exit(&kernel_mutex);
-
- return(NULL);
- }
-
- ut_ad(node->state == ROLL_NODE_WAIT);
-
- thr->run_node = que_node_get_parent(node);
-
- return(thr);
-}
diff --git a/storage/innobase/trx/trx0rseg.c b/storage/innobase/trx/trx0rseg.c
deleted file mode 100644
index 020f217c90b..00000000000
--- a/storage/innobase/trx/trx0rseg.c
+++ /dev/null
@@ -1,254 +0,0 @@
-/******************************************************
-Rollback segment
-
-(c) 1996 Innobase Oy
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "trx0rseg.h"
-
-#ifdef UNIV_NONINL
-#include "trx0rseg.ic"
-#endif
-
-#include "trx0undo.h"
-#include "fut0lst.h"
-#include "srv0srv.h"
-#include "trx0purge.h"
-
-/**********************************************************************
-Looks for a rollback segment, based on the rollback segment id. */
-
-trx_rseg_t*
-trx_rseg_get_on_id(
-/*===============*/
- /* out: rollback segment */
- ulint id) /* in: rollback segment id */
-{
- trx_rseg_t* rseg;
-
- rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
- ut_ad(rseg);
-
- while (rseg->id != id) {
- rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
- ut_ad(rseg);
- }
-
- return(rseg);
-}
-
-/********************************************************************
-Creates a rollback segment header. This function is called only when
-a new rollback segment is created in the database. */
-
-ulint
-trx_rseg_header_create(
-/*===================*/
- /* out: page number of the created segment,
- FIL_NULL if fail */
- ulint space, /* in: space id */
- ulint max_size, /* in: max size in pages */
- ulint* slot_no, /* out: rseg id == slot number in trx sys */
- mtr_t* mtr) /* in: mtr */
-{
- ulint page_no;
- trx_rsegf_t* rsegf;
- trx_sysf_t* sys_header;
- ulint i;
- page_t* page;
-
- ut_ad(mtr);
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space),
- MTR_MEMO_X_LOCK));
- sys_header = trx_sysf_get(mtr);
-
- *slot_no = trx_sysf_rseg_find_free(mtr);
-
- if (*slot_no == ULINT_UNDEFINED) {
-
- return(FIL_NULL);
- }
-
- /* Allocate a new file segment for the rollback segment */
- page = fseg_create(space, 0, TRX_RSEG + TRX_RSEG_FSEG_HEADER, mtr);
-
- if (page == NULL) {
- /* No space left */
-
- return(FIL_NULL);
- }
-
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_RSEG_HEADER_NEW);
-#endif /* UNIV_SYNC_DEBUG */
-
- page_no = buf_frame_get_page_no(page);
-
- /* Get the rollback segment file page */
- rsegf = trx_rsegf_get_new(space, page_no, mtr);
-
- /* Initialize max size field */
- mlog_write_ulint(rsegf + TRX_RSEG_MAX_SIZE, max_size,
- MLOG_4BYTES, mtr);
-
- /* Initialize the history list */
-
- mlog_write_ulint(rsegf + TRX_RSEG_HISTORY_SIZE, 0, MLOG_4BYTES, mtr);
- flst_init(rsegf + TRX_RSEG_HISTORY, mtr);
-
- /* Reset the undo log slots */
- for (i = 0; i < TRX_RSEG_N_SLOTS; i++) {
-
- trx_rsegf_set_nth_undo(rsegf, i, FIL_NULL, mtr);
- }
-
- /* Add the rollback segment info to the free slot in the trx system
- header */
-
- trx_sysf_rseg_set_space(sys_header, *slot_no, space, mtr);
- trx_sysf_rseg_set_page_no(sys_header, *slot_no, page_no, mtr);
-
- return(page_no);
-}
-
-/***************************************************************************
-Creates and initializes a rollback segment object. The values for the
-fields are read from the header. The object is inserted to the rseg
-list of the trx system object and a pointer is inserted in the rseg
-array in the trx system object. */
-static
-trx_rseg_t*
-trx_rseg_mem_create(
-/*================*/
- /* out, own: rollback segment object */
- ulint id, /* in: rollback segment id */
- ulint space, /* in: space where the segment placed */
- ulint page_no, /* in: page number of the segment header */
- mtr_t* mtr) /* in: mtr */
-{
- trx_rsegf_t* rseg_header;
- trx_rseg_t* rseg;
- trx_ulogf_t* undo_log_hdr;
- fil_addr_t node_addr;
- ulint sum_of_undo_sizes;
- ulint len;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- rseg = mem_alloc(sizeof(trx_rseg_t));
-
- rseg->id = id;
- rseg->space = space;
- rseg->page_no = page_no;
-
- mutex_create(&rseg->mutex, SYNC_RSEG);
-
- UT_LIST_ADD_LAST(rseg_list, trx_sys->rseg_list, rseg);
-
- trx_sys_set_nth_rseg(trx_sys, id, rseg);
-
- rseg_header = trx_rsegf_get_new(space, page_no, mtr);
-
- rseg->max_size = mtr_read_ulint(rseg_header + TRX_RSEG_MAX_SIZE,
- MLOG_4BYTES, mtr);
-
- /* Initialize the undo log lists according to the rseg header */
-
- sum_of_undo_sizes = trx_undo_lists_init(rseg);
-
- rseg->curr_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE,
- MLOG_4BYTES, mtr)
- + 1 + sum_of_undo_sizes;
-
- len = flst_get_len(rseg_header + TRX_RSEG_HISTORY, mtr);
- if (len > 0) {
- trx_sys->rseg_history_len += len;
-
- node_addr = trx_purge_get_log_from_hist(
- flst_get_last(rseg_header + TRX_RSEG_HISTORY, mtr));
- rseg->last_page_no = node_addr.page;
- rseg->last_offset = node_addr.boffset;
-
- undo_log_hdr = trx_undo_page_get(rseg->space, node_addr.page,
- mtr) + node_addr.boffset;
-
- rseg->last_trx_no = mtr_read_dulint(
- undo_log_hdr + TRX_UNDO_TRX_NO, mtr);
- rseg->last_del_marks = mtr_read_ulint(
- undo_log_hdr + TRX_UNDO_DEL_MARKS, MLOG_2BYTES, mtr);
- } else {
- rseg->last_page_no = FIL_NULL;
- }
-
- return(rseg);
-}
-
-/*************************************************************************
-Creates the memory copies for rollback segments and initializes the
-rseg list and array in trx_sys at a database startup. */
-
-void
-trx_rseg_list_and_array_init(
-/*=========================*/
- trx_sysf_t* sys_header, /* in: trx system header */
- mtr_t* mtr) /* in: mtr */
-{
- ulint i;
- ulint page_no;
- ulint space;
-
- UT_LIST_INIT(trx_sys->rseg_list);
-
- trx_sys->rseg_history_len = 0;
-
- for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
-
- page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr);
-
- if (page_no == FIL_NULL) {
-
- trx_sys_set_nth_rseg(trx_sys, i, NULL);
- } else {
- space = trx_sysf_rseg_get_space(sys_header, i, mtr);
-
- trx_rseg_mem_create(i, space, page_no, mtr);
- }
- }
-}
-
-/********************************************************************
-Creates a new rollback segment to the database. */
-
-trx_rseg_t*
-trx_rseg_create(
-/*============*/
- /* out: the created segment object, NULL if
- fail */
- ulint space, /* in: space id */
- ulint max_size, /* in: max size in pages */
- ulint* id, /* out: rseg id */
- mtr_t* mtr) /* in: mtr */
-{
- ulint page_no;
- trx_rseg_t* rseg;
-
- mtr_x_lock(fil_space_get_latch(space), mtr);
- mutex_enter(&kernel_mutex);
-
- page_no = trx_rseg_header_create(space, max_size, id, mtr);
-
- if (page_no == FIL_NULL) {
-
- mutex_exit(&kernel_mutex);
- return(NULL);
- }
-
- rseg = trx_rseg_mem_create(*id, space, page_no, mtr);
-
- mutex_exit(&kernel_mutex);
-
- return(rseg);
-}
diff --git a/storage/innobase/trx/trx0sys.c b/storage/innobase/trx/trx0sys.c
deleted file mode 100644
index 40348dd4199..00000000000
--- a/storage/innobase/trx/trx0sys.c
+++ /dev/null
@@ -1,997 +0,0 @@
-/******************************************************
-Transaction system
-
-(c) 1996 Innobase Oy
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "trx0sys.h"
-
-#ifdef UNIV_NONINL
-#include "trx0sys.ic"
-#endif
-
-#include "fsp0fsp.h"
-#include "mtr0mtr.h"
-#include "trx0trx.h"
-#include "trx0rseg.h"
-#include "trx0undo.h"
-#include "srv0srv.h"
-#include "trx0purge.h"
-#include "log0log.h"
-#include "os0file.h"
-
-/* The transaction system */
-trx_sys_t* trx_sys = NULL;
-trx_doublewrite_t* trx_doublewrite = NULL;
-
-/* The following is set to TRUE when we are upgrading from the old format data
-files to the new >= 4.1.x format multiple tablespaces format data files */
-
-ibool trx_doublewrite_must_reset_space_ids = FALSE;
-
-/* The following is TRUE when we are using the database in the new format,
-i.e., we have successfully upgraded, or have created a new database
-installation */
-
-ibool trx_sys_multiple_tablespace_format = FALSE;
-
-/* In a MySQL replication slave, in crash recovery we store the master log
-file name and position here. We have successfully got the updates to InnoDB
-up to this position. If .._pos is -1, it means no crash recovery was needed,
-or there was no master log position info inside InnoDB. */
-
-char trx_sys_mysql_master_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
-ib_longlong trx_sys_mysql_master_log_pos = -1;
-
-/* If this MySQL server uses binary logging, after InnoDB has been inited
-and if it has done a crash recovery, we store the binlog file name and position
-here. If .._pos is -1, it means there was no binlog position info inside
-InnoDB. */
-
-char trx_sys_mysql_bin_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
-ib_longlong trx_sys_mysql_bin_log_pos = -1;
-
-
-/********************************************************************
-Determines if a page number is located inside the doublewrite buffer. */
-
-ibool
-trx_doublewrite_page_inside(
-/*========================*/
- /* out: TRUE if the location is inside
- the two blocks of the doublewrite buffer */
- ulint page_no) /* in: page number */
-{
- if (trx_doublewrite == NULL) {
-
- return(FALSE);
- }
-
- if (page_no >= trx_doublewrite->block1
- && page_no < trx_doublewrite->block1
- + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
- return(TRUE);
- }
-
- if (page_no >= trx_doublewrite->block2
- && page_no < trx_doublewrite->block2
- + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/********************************************************************
-Creates or initialializes the doublewrite buffer at a database start. */
-static
-void
-trx_doublewrite_init(
-/*=================*/
- byte* doublewrite) /* in: pointer to the doublewrite buf
- header on trx sys page */
-{
- trx_doublewrite = mem_alloc(sizeof(trx_doublewrite_t));
-
- /* Since we now start to use the doublewrite buffer, no need to call
- fsync() after every write to a data file */
-#ifdef UNIV_DO_FLUSH
- os_do_not_call_flush_at_each_write = TRUE;
-#endif /* UNIV_DO_FLUSH */
-
- mutex_create(&trx_doublewrite->mutex, SYNC_DOUBLEWRITE);
-
- trx_doublewrite->first_free = 0;
-
- trx_doublewrite->block1 = mach_read_from_4(
- doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK1);
- trx_doublewrite->block2 = mach_read_from_4(
- doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK2);
- trx_doublewrite->write_buf_unaligned = ut_malloc(
- (1 + 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) * UNIV_PAGE_SIZE);
-
- trx_doublewrite->write_buf = ut_align(
- trx_doublewrite->write_buf_unaligned, UNIV_PAGE_SIZE);
- trx_doublewrite->buf_block_arr = mem_alloc(
- 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * sizeof(void*));
-}
-
-/********************************************************************
-Marks the trx sys header when we have successfully upgraded to the >= 4.1.x
-multiple tablespace format. */
-
-void
-trx_sys_mark_upgraded_to_multiple_tablespaces(void)
-/*===============================================*/
-{
- page_t* page;
- byte* doublewrite;
- mtr_t mtr;
-
- /* We upgraded to 4.1.x and reset the space id fields in the
- doublewrite buffer. Let us mark to the trx_sys header that the upgrade
- has been done. */
-
- mtr_start(&mtr);
-
- page = buf_page_get(TRX_SYS_SPACE, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
-
- doublewrite = page + TRX_SYS_DOUBLEWRITE;
-
- mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED,
- TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
- MLOG_4BYTES, &mtr);
- mtr_commit(&mtr);
-
- /* Flush the modified pages to disk and make a checkpoint */
- log_make_checkpoint_at(ut_dulint_max, TRUE);
-
- trx_sys_multiple_tablespace_format = TRUE;
-}
-
-/********************************************************************
-Creates the doublewrite buffer to a new InnoDB installation. The header of the
-doublewrite buffer is placed on the trx system header page. */
-
-void
-trx_sys_create_doublewrite_buf(void)
-/*================================*/
-{
- page_t* page;
- page_t* page2;
- page_t* new_page;
- byte* doublewrite;
- byte* fseg_header;
- ulint page_no;
- ulint prev_page_no;
- ulint i;
- mtr_t mtr;
-
- if (trx_doublewrite) {
- /* Already inited */
-
- return;
- }
-
-start_again:
- mtr_start(&mtr);
-
- page = buf_page_get(TRX_SYS_SPACE, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
-
- doublewrite = page + TRX_SYS_DOUBLEWRITE;
-
- if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
- == TRX_SYS_DOUBLEWRITE_MAGIC_N) {
- /* The doublewrite buffer has already been created:
- just read in some numbers */
-
- trx_doublewrite_init(doublewrite);
-
- mtr_commit(&mtr);
- } else {
- fprintf(stderr,
- "InnoDB: Doublewrite buffer not found:"
- " creating new\n");
-
- if (buf_pool_get_curr_size()
- < ((2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
- + FSP_EXTENT_SIZE / 2 + 100)
- * UNIV_PAGE_SIZE)) {
- fprintf(stderr,
- "InnoDB: Cannot create doublewrite buffer:"
- " you must\n"
- "InnoDB: increase your buffer pool size.\n"
- "InnoDB: Cannot continue operation.\n");
-
- exit(1);
- }
-
- page2 = fseg_create(TRX_SYS_SPACE, TRX_SYS_PAGE_NO,
- TRX_SYS_DOUBLEWRITE
- + TRX_SYS_DOUBLEWRITE_FSEG, &mtr);
-
- /* fseg_create acquires a second latch on the page,
- therefore we must declare it: */
-
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page2, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
-
- if (page2 == NULL) {
- fprintf(stderr,
- "InnoDB: Cannot create doublewrite buffer:"
- " you must\n"
- "InnoDB: increase your tablespace size.\n"
- "InnoDB: Cannot continue operation.\n");
-
- /* We exit without committing the mtr to prevent
- its modifications to the database getting to disk */
-
- exit(1);
- }
-
- fseg_header = page + TRX_SYS_DOUBLEWRITE
- + TRX_SYS_DOUBLEWRITE_FSEG;
- prev_page_no = 0;
-
- for (i = 0; i < 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
- + FSP_EXTENT_SIZE / 2; i++) {
- page_no = fseg_alloc_free_page(fseg_header,
- prev_page_no + 1,
- FSP_UP, &mtr);
- if (page_no == FIL_NULL) {
- fprintf(stderr,
- "InnoDB: Cannot create doublewrite"
- " buffer: you must\n"
- "InnoDB: increase your"
- " tablespace size.\n"
- "InnoDB: Cannot continue operation.\n"
- );
-
- exit(1);
- }
-
- /* We read the allocated pages to the buffer pool;
- when they are written to disk in a flush, the space
- id and page number fields are also written to the
- pages. When we at database startup read pages
- from the doublewrite buffer, we know that if the
- space id and page number in them are the same as
- the page position in the tablespace, then the page
- has not been written to in doublewrite. */
-
- new_page = buf_page_get(TRX_SYS_SPACE, page_no,
- RW_X_LATCH, &mtr);
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(new_page, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
-
- /* Make a dummy change to the page to ensure it will
- be written to disk in a flush */
-
- mlog_write_ulint(new_page + FIL_PAGE_DATA,
- TRX_SYS_DOUBLEWRITE_MAGIC_N,
- MLOG_4BYTES, &mtr);
-
- if (i == FSP_EXTENT_SIZE / 2) {
- mlog_write_ulint(doublewrite
- + TRX_SYS_DOUBLEWRITE_BLOCK1,
- page_no, MLOG_4BYTES, &mtr);
- mlog_write_ulint(doublewrite
- + TRX_SYS_DOUBLEWRITE_REPEAT
- + TRX_SYS_DOUBLEWRITE_BLOCK1,
- page_no, MLOG_4BYTES, &mtr);
- } else if (i == FSP_EXTENT_SIZE / 2
- + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
- mlog_write_ulint(doublewrite
- + TRX_SYS_DOUBLEWRITE_BLOCK2,
- page_no, MLOG_4BYTES, &mtr);
- mlog_write_ulint(doublewrite
- + TRX_SYS_DOUBLEWRITE_REPEAT
- + TRX_SYS_DOUBLEWRITE_BLOCK2,
- page_no, MLOG_4BYTES, &mtr);
- } else if (i > FSP_EXTENT_SIZE / 2) {
- ut_a(page_no == prev_page_no + 1);
- }
-
- prev_page_no = page_no;
- }
-
- mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC,
- TRX_SYS_DOUBLEWRITE_MAGIC_N,
- MLOG_4BYTES, &mtr);
- mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC
- + TRX_SYS_DOUBLEWRITE_REPEAT,
- TRX_SYS_DOUBLEWRITE_MAGIC_N,
- MLOG_4BYTES, &mtr);
-
- mlog_write_ulint(doublewrite
- + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED,
- TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
- MLOG_4BYTES, &mtr);
- mtr_commit(&mtr);
-
- /* Flush the modified pages to disk and make a checkpoint */
- log_make_checkpoint_at(ut_dulint_max, TRUE);
-
- fprintf(stderr, "InnoDB: Doublewrite buffer created\n");
-
- trx_sys_multiple_tablespace_format = TRUE;
-
- goto start_again;
- }
-}
-
-/********************************************************************
-At a database startup initializes the doublewrite buffer memory structure if
-we already have a doublewrite buffer created in the data files. If we are
-upgrading to an InnoDB version which supports multiple tablespaces, then this
-function performs the necessary update operations. If we are in a crash
-recovery, this function uses a possible doublewrite buffer to restore
-half-written pages in the data files. */
-
-void
-trx_sys_doublewrite_init_or_restore_pages(
-/*======================================*/
- ibool restore_corrupt_pages)
-{
- byte* buf;
- byte* read_buf;
- byte* unaligned_read_buf;
- ulint block1;
- ulint block2;
- ulint source_page_no;
- byte* page;
- byte* doublewrite;
- ulint space_id;
- ulint page_no;
- ulint i;
-
- /* We do the file i/o past the buffer pool */
-
- unaligned_read_buf = ut_malloc(2 * UNIV_PAGE_SIZE);
- read_buf = ut_align(unaligned_read_buf, UNIV_PAGE_SIZE);
-
- /* Read the trx sys header to check if we are using the doublewrite
- buffer */
-
- fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, TRX_SYS_PAGE_NO, 0,
- UNIV_PAGE_SIZE, read_buf, NULL);
- doublewrite = read_buf + TRX_SYS_DOUBLEWRITE;
-
- if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
- == TRX_SYS_DOUBLEWRITE_MAGIC_N) {
- /* The doublewrite buffer has been created */
-
- trx_doublewrite_init(doublewrite);
-
- block1 = trx_doublewrite->block1;
- block2 = trx_doublewrite->block2;
-
- buf = trx_doublewrite->write_buf;
- } else {
- goto leave_func;
- }
-
- if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED)
- != TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N) {
-
- /* We are upgrading from a version < 4.1.x to a version where
- multiple tablespaces are supported. We must reset the space id
- field in the pages in the doublewrite buffer because starting
- from this version the space id is stored to
- FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID. */
-
- trx_doublewrite_must_reset_space_ids = TRUE;
-
- fprintf(stderr,
- "InnoDB: Resetting space id's in the"
- " doublewrite buffer\n");
- } else {
- trx_sys_multiple_tablespace_format = TRUE;
- }
-
- /* Read the pages from the doublewrite buffer to memory */
-
- fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, block1, 0,
- TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
- buf, NULL);
- fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, block2, 0,
- TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
- buf + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
- NULL);
- /* Check if any of these pages is half-written in data files, in the
- intended position */
-
- page = buf;
-
- for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 2; i++) {
-
- page_no = mach_read_from_4(page + FIL_PAGE_OFFSET);
-
- if (trx_doublewrite_must_reset_space_ids) {
-
- space_id = 0;
- mach_write_to_4(page
- + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0);
- /* We do not need to calculate new checksums for the
- pages because the field .._SPACE_ID does not affect
- them. Write the page back to where we read it from. */
-
- if (i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
- source_page_no = block1 + i;
- } else {
- source_page_no = block2
- + i - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
- }
-
- fil_io(OS_FILE_WRITE, TRUE, 0, source_page_no, 0,
- UNIV_PAGE_SIZE, page, NULL);
- /* printf("Resetting space id in page %lu\n",
- source_page_no); */
- } else {
- space_id = mach_read_from_4(
- page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
- }
-
- if (!restore_corrupt_pages) {
- /* The database was shut down gracefully: no need to
- restore pages */
-
- } else if (!fil_tablespace_exists_in_mem(space_id)) {
- /* Maybe we have dropped the single-table tablespace
- and this page once belonged to it: do nothing */
-
- } else if (!fil_check_adress_in_tablespace(space_id,
- page_no)) {
- fprintf(stderr,
- "InnoDB: Warning: a page in the"
- " doublewrite buffer is not within space\n"
- "InnoDB: bounds; space id %lu"
- " page number %lu, page %lu in"
- " doublewrite buf.\n",
- (ulong) space_id, (ulong) page_no, (ulong) i);
-
- } else if (space_id == TRX_SYS_SPACE
- && ((page_no >= block1
- && page_no
- < block1 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
- || (page_no >= block2
- && page_no
- < (block2
- + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)))) {
-
- /* It is an unwritten doublewrite buffer page:
- do nothing */
- } else {
- /* Read in the actual page from the data files */
-
- fil_io(OS_FILE_READ, TRUE, space_id, page_no, 0,
- UNIV_PAGE_SIZE, read_buf, NULL);
- /* Check if the page is corrupt */
-
- if (buf_page_is_corrupted(read_buf)) {
-
- fprintf(stderr,
- "InnoDB: Warning: database page"
- " corruption or a failed\n"
- "InnoDB: file read of page %lu.\n",
- (ulong) page_no);
- fprintf(stderr,
- "InnoDB: Trying to recover it from"
- " the doublewrite buffer.\n");
-
- if (buf_page_is_corrupted(page)) {
- fprintf(stderr,
- "InnoDB: Dump of the page:\n");
- buf_page_print(read_buf);
- fprintf(stderr,
- "InnoDB: Dump of"
- " corresponding page"
- " in doublewrite buffer:\n");
- buf_page_print(page);
-
- fprintf(stderr,
- "InnoDB: Also the page in the"
- " doublewrite buffer"
- " is corrupt.\n"
- "InnoDB: Cannot continue"
- " operation.\n"
- "InnoDB: You can try to"
- " recover the database"
- " with the my.cnf\n"
- "InnoDB: option:\n"
- "InnoDB: set-variable="
- "innodb_force_recovery=6\n");
- exit(1);
- }
-
- /* Write the good page from the
- doublewrite buffer to the intended
- position */
-
- fil_io(OS_FILE_WRITE, TRUE, space_id,
- page_no, 0,
- UNIV_PAGE_SIZE, page, NULL);
- fprintf(stderr,
- "InnoDB: Recovered the page from"
- " the doublewrite buffer.\n");
- }
- }
-
- page += UNIV_PAGE_SIZE;
- }
-
- fil_flush_file_spaces(FIL_TABLESPACE);
-
-leave_func:
- ut_free(unaligned_read_buf);
-}
-
-/********************************************************************
-Checks that trx is in the trx list. */
-
-ibool
-trx_in_trx_list(
-/*============*/
- /* out: TRUE if is in */
- trx_t* in_trx) /* in: trx */
-{
- trx_t* trx;
-
- ut_ad(mutex_own(&(kernel_mutex)));
-
- trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
-
- while (trx != NULL) {
-
- if (trx == in_trx) {
-
- return(TRUE);
- }
-
- trx = UT_LIST_GET_NEXT(trx_list, trx);
- }
-
- return(FALSE);
-}
-
-/*********************************************************************
-Writes the value of max_trx_id to the file based trx system header. */
-
-void
-trx_sys_flush_max_trx_id(void)
-/*==========================*/
-{
- trx_sysf_t* sys_header;
- mtr_t mtr;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- mtr_start(&mtr);
-
- sys_header = trx_sysf_get(&mtr);
-
- mlog_write_dulint(sys_header + TRX_SYS_TRX_ID_STORE,
- trx_sys->max_trx_id, &mtr);
- mtr_commit(&mtr);
-}
-
-/*********************************************************************
-Updates the offset information about the end of the MySQL binlog entry
-which corresponds to the transaction just being committed. In a MySQL
-replication slave updates the latest master binlog position up to which
-replication has proceeded. */
-
-void
-trx_sys_update_mysql_binlog_offset(
-/*===============================*/
- const char* file_name,/* in: MySQL log file name */
- ib_longlong offset, /* in: position in that log file */
- ulint field, /* in: offset of the MySQL log info field in
- the trx sys header */
- mtr_t* mtr) /* in: mtr */
-{
- trx_sysf_t* sys_header;
-
- if (ut_strlen(file_name) >= TRX_SYS_MYSQL_LOG_NAME_LEN) {
-
- /* We cannot fit the name to the 512 bytes we have reserved */
-
- return;
- }
-
- sys_header = trx_sysf_get(mtr);
-
- if (mach_read_from_4(sys_header + field
- + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
- != TRX_SYS_MYSQL_LOG_MAGIC_N) {
-
- mlog_write_ulint(sys_header + field
- + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD,
- TRX_SYS_MYSQL_LOG_MAGIC_N,
- MLOG_4BYTES, mtr);
- }
-
- if (0 != strcmp((char*) (sys_header + field + TRX_SYS_MYSQL_LOG_NAME),
- file_name)) {
-
- mlog_write_string(sys_header + field
- + TRX_SYS_MYSQL_LOG_NAME,
- (byte*) file_name, 1 + ut_strlen(file_name),
- mtr);
- }
-
- if (mach_read_from_4(sys_header + field
- + TRX_SYS_MYSQL_LOG_OFFSET_HIGH) > 0
- || (offset >> 32) > 0) {
-
- mlog_write_ulint(sys_header + field
- + TRX_SYS_MYSQL_LOG_OFFSET_HIGH,
- (ulint)(offset >> 32),
- MLOG_4BYTES, mtr);
- }
-
- mlog_write_ulint(sys_header + field
- + TRX_SYS_MYSQL_LOG_OFFSET_LOW,
- (ulint)(offset & 0xFFFFFFFFUL),
- MLOG_4BYTES, mtr);
-}
-
-#ifdef UNIV_HOTBACKUP
-/*********************************************************************
-Prints to stderr the MySQL binlog info in the system header if the
-magic number shows it valid. */
-
-void
-trx_sys_print_mysql_binlog_offset_from_page(
-/*========================================*/
- byte* page) /* in: buffer containing the trx system header page,
- i.e., page number TRX_SYS_PAGE_NO in the tablespace */
-{
- trx_sysf_t* sys_header;
-
- sys_header = page + TRX_SYS;
-
- if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
- == TRX_SYS_MYSQL_LOG_MAGIC_N) {
-
- fprintf(stderr,
- "ibbackup: Last MySQL binlog file position %lu %lu,"
- " file name %s\n",
- (ulong) mach_read_from_4(
- sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
- (ulong) mach_read_from_4(
- sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET_LOW),
- sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_NAME);
- }
-}
-#endif /* UNIV_HOTBACKUP */
-
-/*********************************************************************
-Stores the MySQL binlog offset info in the trx system header if
-the magic number shows it valid, and print the info to stderr */
-
-void
-trx_sys_print_mysql_binlog_offset(void)
-/*===================================*/
-{
- trx_sysf_t* sys_header;
- mtr_t mtr;
- ulint trx_sys_mysql_bin_log_pos_high;
- ulint trx_sys_mysql_bin_log_pos_low;
-
- mtr_start(&mtr);
-
- sys_header = trx_sysf_get(&mtr);
-
- if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
- != TRX_SYS_MYSQL_LOG_MAGIC_N) {
-
- mtr_commit(&mtr);
-
- return;
- }
-
- trx_sys_mysql_bin_log_pos_high = mach_read_from_4(
- sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET_HIGH);
- trx_sys_mysql_bin_log_pos_low = mach_read_from_4(
- sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET_LOW);
-
- trx_sys_mysql_bin_log_pos
- = (((ib_longlong)trx_sys_mysql_bin_log_pos_high) << 32)
- + (ib_longlong)trx_sys_mysql_bin_log_pos_low;
-
- ut_memcpy(trx_sys_mysql_bin_log_name,
- sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_NAME, TRX_SYS_MYSQL_LOG_NAME_LEN);
-
- fprintf(stderr,
- "InnoDB: Last MySQL binlog file position %lu %lu,"
- " file name %s\n",
- trx_sys_mysql_bin_log_pos_high, trx_sys_mysql_bin_log_pos_low,
- trx_sys_mysql_bin_log_name);
-
- mtr_commit(&mtr);
-}
-
-/*********************************************************************
-Prints to stderr the MySQL master log offset info in the trx system header if
-the magic number shows it valid. */
-
-void
-trx_sys_print_mysql_master_log_pos(void)
-/*====================================*/
-{
- trx_sysf_t* sys_header;
- mtr_t mtr;
-
- mtr_start(&mtr);
-
- sys_header = trx_sysf_get(&mtr);
-
- if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
- + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
- != TRX_SYS_MYSQL_LOG_MAGIC_N) {
-
- mtr_commit(&mtr);
-
- return;
- }
-
- fprintf(stderr,
- "InnoDB: In a MySQL replication slave the last"
- " master binlog file\n"
- "InnoDB: position %lu %lu, file name %s\n",
- (ulong) mach_read_from_4(sys_header
- + TRX_SYS_MYSQL_MASTER_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
- (ulong) mach_read_from_4(sys_header
- + TRX_SYS_MYSQL_MASTER_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET_LOW),
- sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
- + TRX_SYS_MYSQL_LOG_NAME);
- /* Copy the master log position info to global variables we can
- use in ha_innobase.cc to initialize glob_mi to right values */
-
- ut_memcpy(trx_sys_mysql_master_log_name,
- sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
- + TRX_SYS_MYSQL_LOG_NAME,
- TRX_SYS_MYSQL_LOG_NAME_LEN);
-
- trx_sys_mysql_master_log_pos
- = (((ib_longlong) mach_read_from_4(
- sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET_HIGH)) << 32)
- + ((ib_longlong) mach_read_from_4(
- sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET_LOW));
- mtr_commit(&mtr);
-}
-
-/********************************************************************
-Looks for a free slot for a rollback segment in the trx system file copy. */
-
-ulint
-trx_sysf_rseg_find_free(
-/*====================*/
- /* out: slot index or ULINT_UNDEFINED if not found */
- mtr_t* mtr) /* in: mtr */
-{
- trx_sysf_t* sys_header;
- ulint page_no;
- ulint i;
-
- ut_ad(mutex_own(&(kernel_mutex)));
-
- sys_header = trx_sysf_get(mtr);
-
- for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
-
- page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr);
-
- if (page_no == FIL_NULL) {
-
- return(i);
- }
- }
-
- return(ULINT_UNDEFINED);
-}
-
-/*********************************************************************
-Creates the file page for the transaction system. This function is called only
-at the database creation, before trx_sys_init. */
-static
-void
-trx_sysf_create(
-/*============*/
- mtr_t* mtr) /* in: mtr */
-{
- trx_sysf_t* sys_header;
- ulint slot_no;
- page_t* page;
- ulint page_no;
- ulint i;
-
- ut_ad(mtr);
-
- /* Note that below we first reserve the file space x-latch, and
- then enter the kernel: we must do it in this order to conform
- to the latching order rules. */
-
- mtr_x_lock(fil_space_get_latch(TRX_SYS_SPACE), mtr);
- mutex_enter(&kernel_mutex);
-
- /* Create the trx sys file block in a new allocated file segment */
- page = fseg_create(TRX_SYS_SPACE, 0, TRX_SYS + TRX_SYS_FSEG_HEADER,
- mtr);
- ut_a(buf_frame_get_page_no(page) == TRX_SYS_PAGE_NO);
-
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(page, SYNC_TRX_SYS_HEADER);
-#endif /* UNIV_SYNC_DEBUG */
-
- mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_TRX_SYS,
- MLOG_2BYTES, mtr);
-
- /* Reset the doublewrite buffer magic number to zero so that we
- know that the doublewrite buffer has not yet been created (this
- suppresses a Valgrind warning) */
-
- mlog_write_ulint(page + TRX_SYS_DOUBLEWRITE
- + TRX_SYS_DOUBLEWRITE_MAGIC, 0, MLOG_4BYTES, mtr);
-
- sys_header = trx_sysf_get(mtr);
-
- /* Start counting transaction ids from number 1 up */
- mlog_write_dulint(sys_header + TRX_SYS_TRX_ID_STORE,
- ut_dulint_create(0, 1), mtr);
-
- /* Reset the rollback segment slots */
- for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
-
- trx_sysf_rseg_set_space(sys_header, i, ULINT_UNDEFINED, mtr);
- trx_sysf_rseg_set_page_no(sys_header, i, FIL_NULL, mtr);
- }
-
- /* The remaining area (up to the page trailer) is uninitialized.
- Silence Valgrind warnings about it. */
- UNIV_MEM_VALID(sys_header + (TRX_SYS_RSEGS
- + TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE
- + TRX_SYS_RSEG_SPACE),
- (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END
- - (TRX_SYS_RSEGS
- + TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE
- + TRX_SYS_RSEG_SPACE))
- + page - sys_header);
-
- /* Create the first rollback segment in the SYSTEM tablespace */
- page_no = trx_rseg_header_create(TRX_SYS_SPACE, ULINT_MAX, &slot_no,
- mtr);
- ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID);
- ut_a(page_no != FIL_NULL);
-
- mutex_exit(&kernel_mutex);
-}
-
-/*********************************************************************
-Creates and initializes the central memory structures for the transaction
-system. This is called when the database is started. */
-
-void
-trx_sys_init_at_db_start(void)
-/*==========================*/
-{
- trx_sysf_t* sys_header;
- ib_longlong rows_to_undo = 0;
- const char* unit = "";
- trx_t* trx;
- mtr_t mtr;
-
- mtr_start(&mtr);
-
- ut_ad(trx_sys == NULL);
-
- mutex_enter(&kernel_mutex);
-
- trx_sys = mem_alloc(sizeof(trx_sys_t));
-
- sys_header = trx_sysf_get(&mtr);
-
- trx_rseg_list_and_array_init(sys_header, &mtr);
-
- trx_sys->latest_rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
-
- /* VERY important: after the database is started, max_trx_id value is
- divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the 'if' in
- trx_sys_get_new_trx_id will evaluate to TRUE when the function
- is first time called, and the value for trx id will be written
- to the disk-based header! Thus trx id values will not overlap when
- the database is repeatedly started! */
-
- trx_sys->max_trx_id = ut_dulint_add(
- ut_dulint_align_up(mtr_read_dulint(
- sys_header
- + TRX_SYS_TRX_ID_STORE, &mtr),
- TRX_SYS_TRX_ID_WRITE_MARGIN),
- 2 * TRX_SYS_TRX_ID_WRITE_MARGIN);
-
- UT_LIST_INIT(trx_sys->mysql_trx_list);
- trx_lists_init_at_db_start();
-
- if (UT_LIST_GET_LEN(trx_sys->trx_list) > 0) {
- trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
-
- for (;;) {
-
- if ( trx->conc_state != TRX_PREPARED) {
- rows_to_undo += ut_conv_dulint_to_longlong(
- trx->undo_no);
- }
-
- trx = UT_LIST_GET_NEXT(trx_list, trx);
-
- if (!trx) {
- break;
- }
- }
-
- if (rows_to_undo > 1000000000) {
- unit = "M";
- rows_to_undo = rows_to_undo / 1000000;
- }
-
- fprintf(stderr,
- "InnoDB: %lu transaction(s) which must be"
- " rolled back or cleaned up\n"
- "InnoDB: in total %lu%s row operations to undo\n",
- (ulong) UT_LIST_GET_LEN(trx_sys->trx_list),
- (ulong) rows_to_undo, unit);
-
- fprintf(stderr, "InnoDB: Trx id counter is %lu %lu\n",
- (ulong) ut_dulint_get_high(trx_sys->max_trx_id),
- (ulong) ut_dulint_get_low(trx_sys->max_trx_id));
- }
-
- UT_LIST_INIT(trx_sys->view_list);
-
- trx_purge_sys_create();
-
- mutex_exit(&kernel_mutex);
-
- mtr_commit(&mtr);
-}
-
-/*********************************************************************
-Creates and initializes the transaction system at the database creation. */
-
-void
-trx_sys_create(void)
-/*================*/
-{
- mtr_t mtr;
-
- mtr_start(&mtr);
-
- trx_sysf_create(&mtr);
-
- mtr_commit(&mtr);
-
- trx_sys_init_at_db_start();
-}
diff --git a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c
deleted file mode 100644
index 8ada38845c5..00000000000
--- a/storage/innobase/trx/trx0trx.c
+++ /dev/null
@@ -1,2086 +0,0 @@
-/******************************************************
-The transaction
-
-(c) 1996 Innobase Oy
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "trx0trx.h"
-
-#ifdef UNIV_NONINL
-#include "trx0trx.ic"
-#endif
-
-#include "trx0undo.h"
-#include "trx0rseg.h"
-#include "log0log.h"
-#include "que0que.h"
-#include "lock0lock.h"
-#include "trx0roll.h"
-#include "usr0sess.h"
-#include "read0read.h"
-#include "srv0srv.h"
-#include "thr0loc.h"
-#include "btr0sea.h"
-#include "os0proc.h"
-#include "trx0xa.h"
-#include "ha_prototypes.h"
-
-/* Copy of the prototype for innobase_mysql_print_thd: this
-copy MUST be equal to the one in mysql/sql/ha_innodb.cc ! */
-
-void innobase_mysql_print_thd(
- FILE* f,
- void* thd,
- ulint max_query_len);
-
-/* Dummy session used currently in MySQL interface */
-sess_t* trx_dummy_sess = NULL;
-
-/* Number of transactions currently allocated for MySQL: protected by
-the kernel mutex */
-ulint trx_n_mysql_transactions = 0;
-
-/*****************************************************************
-Starts the transaction if it is not yet started. */
-
-void
-trx_start_if_not_started_noninline(
-/*===============================*/
- trx_t* trx) /* in: transaction */
-{
- trx_start_if_not_started(trx);
-}
-
-/*****************************************************************
-Set detailed error message for the transaction. */
-
-void
-trx_set_detailed_error(
-/*===================*/
- trx_t* trx, /* in: transaction struct */
- const char* msg) /* in: detailed error message */
-{
- ut_strlcpy(trx->detailed_error, msg, sizeof(trx->detailed_error));
-}
-
-/*****************************************************************
-Set detailed error message for the transaction from a file. Note that the
-file is rewinded before reading from it. */
-
-void
-trx_set_detailed_error_from_file(
-/*=============================*/
- trx_t* trx, /* in: transaction struct */
- FILE* file) /* in: file to read message from */
-{
- os_file_read_string(file, trx->detailed_error,
- sizeof(trx->detailed_error));
-}
-
-/********************************************************************
-Retrieves the error_info field from a trx. */
-
-void*
-trx_get_error_info(
-/*===============*/
- /* out: the error info */
- trx_t* trx) /* in: trx object */
-{
- return(trx->error_info);
-}
-
-/********************************************************************
-Creates and initializes a transaction object. */
-
-trx_t*
-trx_create(
-/*=======*/
- /* out, own: the transaction */
- sess_t* sess) /* in: session or NULL */
-{
- trx_t* trx;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- trx = mem_alloc(sizeof(trx_t));
-
- trx->magic_n = TRX_MAGIC_N;
-
- trx->op_info = "";
-
- trx->is_purge = 0;
- trx->conc_state = TRX_NOT_STARTED;
- trx->start_time = time(NULL);
-
- trx->isolation_level = TRX_ISO_REPEATABLE_READ;
-
- trx->id = ut_dulint_zero;
- trx->no = ut_dulint_max;
-
- trx->support_xa = TRUE;
-
- trx->check_foreigns = TRUE;
- trx->check_unique_secondary = TRUE;
-
- trx->flush_log_later = FALSE;
- trx->must_flush_log_later = FALSE;
-
- trx->dict_operation = FALSE;
-
- trx->mysql_thd = NULL;
- trx->mysql_query_str = NULL;
- trx->active_trans = 0;
- trx->duplicates = 0;
-
- trx->n_mysql_tables_in_use = 0;
- trx->mysql_n_tables_locked = 0;
-
- trx->mysql_log_file_name = NULL;
- trx->mysql_log_offset = 0;
-
- mutex_create(&trx->undo_mutex, SYNC_TRX_UNDO);
-
- trx->rseg = NULL;
-
- trx->undo_no = ut_dulint_zero;
- trx->last_sql_stat_start.least_undo_no = ut_dulint_zero;
- trx->insert_undo = NULL;
- trx->update_undo = NULL;
- trx->undo_no_arr = NULL;
-
- trx->error_state = DB_SUCCESS;
- trx->detailed_error[0] = '\0';
-
- trx->sess = sess;
- trx->que_state = TRX_QUE_RUNNING;
- trx->n_active_thrs = 0;
-
- trx->handling_signals = FALSE;
-
- UT_LIST_INIT(trx->signals);
- UT_LIST_INIT(trx->reply_signals);
-
- trx->graph = NULL;
-
- trx->wait_lock = NULL;
- trx->was_chosen_as_deadlock_victim = FALSE;
- UT_LIST_INIT(trx->wait_thrs);
-
- trx->lock_heap = mem_heap_create_in_buffer(256);
- UT_LIST_INIT(trx->trx_locks);
-
- UT_LIST_INIT(trx->trx_savepoints);
-
- trx->dict_operation_lock_mode = 0;
- trx->has_search_latch = FALSE;
- trx->search_latch_timeout = BTR_SEA_TIMEOUT;
-
- trx->declared_to_be_inside_innodb = FALSE;
- trx->n_tickets_to_enter_innodb = 0;
-
- trx->auto_inc_lock = NULL;
-
- trx->global_read_view_heap = mem_heap_create(256);
- trx->global_read_view = NULL;
- trx->read_view = NULL;
-
- /* Set X/Open XA transaction identification to NULL */
- memset(&trx->xid, 0, sizeof(trx->xid));
- trx->xid.formatID = -1;
-
- trx->n_autoinc_rows = 0;
-
- return(trx);
-}
-
-/************************************************************************
-Creates a transaction object for MySQL. */
-
-trx_t*
-trx_allocate_for_mysql(void)
-/*========================*/
- /* out, own: transaction object */
-{
- trx_t* trx;
-
- mutex_enter(&kernel_mutex);
-
- /* Open a dummy session */
-
- if (!trx_dummy_sess) {
- trx_dummy_sess = sess_open();
- }
-
- trx = trx_create(trx_dummy_sess);
-
- trx_n_mysql_transactions++;
-
- UT_LIST_ADD_FIRST(mysql_trx_list, trx_sys->mysql_trx_list, trx);
-
- mutex_exit(&kernel_mutex);
-
- trx->mysql_thread_id = os_thread_get_curr_id();
-
- trx->mysql_process_no = os_proc_get_number();
-
- return(trx);
-}
-
-/************************************************************************
-Creates a transaction object for background operations by the master thread. */
-
-trx_t*
-trx_allocate_for_background(void)
-/*=============================*/
- /* out, own: transaction object */
-{
- trx_t* trx;
-
- mutex_enter(&kernel_mutex);
-
- /* Open a dummy session */
-
- if (!trx_dummy_sess) {
- trx_dummy_sess = sess_open();
- }
-
- trx = trx_create(trx_dummy_sess);
-
- mutex_exit(&kernel_mutex);
-
- return(trx);
-}
-
-/************************************************************************
-Releases the search latch if trx has reserved it. */
-
-void
-trx_search_latch_release_if_reserved(
-/*=================================*/
- trx_t* trx) /* in: transaction */
-{
- if (trx->has_search_latch) {
- rw_lock_s_unlock(&btr_search_latch);
-
- trx->has_search_latch = FALSE;
- }
-}
-
-/************************************************************************
-Frees a transaction object. */
-
-void
-trx_free(
-/*=====*/
- trx_t* trx) /* in, own: trx object */
-{
- ut_ad(mutex_own(&kernel_mutex));
-
- if (trx->declared_to_be_inside_innodb) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: Freeing a trx which is declared"
- " to be processing\n"
- "InnoDB: inside InnoDB.\n", stderr);
- trx_print(stderr, trx, 600);
- putc('\n', stderr);
-
- /* This is an error but not a fatal error. We must keep
- the counters like srv_conc_n_threads accurate. */
- srv_conc_force_exit_innodb(trx);
- }
-
- if (trx->n_mysql_tables_in_use != 0
- || trx->mysql_n_tables_locked != 0) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: MySQL is freeing a thd\n"
- "InnoDB: though trx->n_mysql_tables_in_use is %lu\n"
- "InnoDB: and trx->mysql_n_tables_locked is %lu.\n",
- (ulong)trx->n_mysql_tables_in_use,
- (ulong)trx->mysql_n_tables_locked);
-
- trx_print(stderr, trx, 600);
-
- ut_print_buf(stderr, trx, sizeof(trx_t));
- }
-
- ut_a(trx->magic_n == TRX_MAGIC_N);
-
- trx->magic_n = 11112222;
-
- ut_a(trx->conc_state == TRX_NOT_STARTED);
-
- mutex_free(&(trx->undo_mutex));
-
- ut_a(trx->insert_undo == NULL);
- ut_a(trx->update_undo == NULL);
-
- if (trx->undo_no_arr) {
- trx_undo_arr_free(trx->undo_no_arr);
- }
-
- ut_a(UT_LIST_GET_LEN(trx->signals) == 0);
- ut_a(UT_LIST_GET_LEN(trx->reply_signals) == 0);
-
- ut_a(trx->wait_lock == NULL);
- ut_a(UT_LIST_GET_LEN(trx->wait_thrs) == 0);
-
- ut_a(!trx->has_search_latch);
- ut_a(!trx->auto_inc_lock);
-
- ut_a(trx->dict_operation_lock_mode == 0);
-
- if (trx->lock_heap) {
- mem_heap_free(trx->lock_heap);
- }
-
- ut_a(UT_LIST_GET_LEN(trx->trx_locks) == 0);
-
- if (trx->global_read_view_heap) {
- mem_heap_free(trx->global_read_view_heap);
- }
-
- trx->global_read_view = NULL;
-
- ut_a(trx->read_view == NULL);
-
- mem_free(trx);
-}
-
-/************************************************************************
-Frees a transaction object for MySQL. */
-
-void
-trx_free_for_mysql(
-/*===============*/
- trx_t* trx) /* in, own: trx object */
-{
- mutex_enter(&kernel_mutex);
-
- UT_LIST_REMOVE(mysql_trx_list, trx_sys->mysql_trx_list, trx);
-
- trx_free(trx);
-
- ut_a(trx_n_mysql_transactions > 0);
-
- trx_n_mysql_transactions--;
-
- mutex_exit(&kernel_mutex);
-}
-
-/************************************************************************
-Frees a transaction object of a background operation of the master thread. */
-
-void
-trx_free_for_background(
-/*====================*/
- trx_t* trx) /* in, own: trx object */
-{
- mutex_enter(&kernel_mutex);
-
- trx_free(trx);
-
- mutex_exit(&kernel_mutex);
-}
-
-/********************************************************************
-Inserts the trx handle in the trx system trx list in the right position.
-The list is sorted on the trx id so that the biggest id is at the list
-start. This function is used at the database startup to insert incomplete
-transactions to the list. */
-static
-void
-trx_list_insert_ordered(
-/*====================*/
- trx_t* trx) /* in: trx handle */
-{
- trx_t* trx2;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- trx2 = UT_LIST_GET_FIRST(trx_sys->trx_list);
-
- while (trx2 != NULL) {
- if (ut_dulint_cmp(trx->id, trx2->id) >= 0) {
-
- ut_ad(ut_dulint_cmp(trx->id, trx2->id) == 1);
- break;
- }
- trx2 = UT_LIST_GET_NEXT(trx_list, trx2);
- }
-
- if (trx2 != NULL) {
- trx2 = UT_LIST_GET_PREV(trx_list, trx2);
-
- if (trx2 == NULL) {
- UT_LIST_ADD_FIRST(trx_list, trx_sys->trx_list, trx);
- } else {
- UT_LIST_INSERT_AFTER(trx_list, trx_sys->trx_list,
- trx2, trx);
- }
- } else {
- UT_LIST_ADD_LAST(trx_list, trx_sys->trx_list, trx);
- }
-}
-
-/********************************************************************
-Creates trx objects for transactions and initializes the trx list of
-trx_sys at database start. Rollback segment and undo log lists must
-already exist when this function is called, because the lists of
-transactions to be rolled back or cleaned up are built based on the
-undo log lists. */
-
-void
-trx_lists_init_at_db_start(void)
-/*============================*/
-{
- trx_rseg_t* rseg;
- trx_undo_t* undo;
- trx_t* trx;
-
- UT_LIST_INIT(trx_sys->trx_list);
-
- /* Look from the rollback segments if there exist undo logs for
- transactions */
-
- rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
-
- while (rseg != NULL) {
- undo = UT_LIST_GET_FIRST(rseg->insert_undo_list);
-
- while (undo != NULL) {
-
- trx = trx_create(NULL);
-
- trx->id = undo->trx_id;
- trx->xid = undo->xid;
- trx->insert_undo = undo;
- trx->rseg = rseg;
-
- if (undo->state != TRX_UNDO_ACTIVE) {
-
- /* Prepared transactions are left in
- the prepared state waiting for a
- commit or abort decision from MySQL */
-
- if (undo->state == TRX_UNDO_PREPARED) {
-
- fprintf(stderr,
- "InnoDB: Transaction %lu %lu"
- " was in the"
- " XA prepared state.\n",
- ut_dulint_get_high(trx->id),
- ut_dulint_get_low(trx->id));
-
- if (srv_force_recovery == 0) {
-
- trx->conc_state = TRX_PREPARED;
- } else {
- fprintf(stderr,
- "InnoDB: Since"
- " innodb_force_recovery"
- " > 0, we will"
- " rollback it"
- " anyway.\n");
-
- trx->conc_state = TRX_ACTIVE;
- }
- } else {
- trx->conc_state
- = TRX_COMMITTED_IN_MEMORY;
- }
-
- /* We give a dummy value for the trx no;
- this should have no relevance since purge
- is not interested in committed transaction
- numbers, unless they are in the history
- list, in which case it looks the number
- from the disk based undo log structure */
-
- trx->no = trx->id;
- } else {
- trx->conc_state = TRX_ACTIVE;
-
- /* A running transaction always has the number
- field inited to ut_dulint_max */
-
- trx->no = ut_dulint_max;
- }
-
- if (undo->dict_operation) {
- trx->dict_operation = undo->dict_operation;
- trx->table_id = undo->table_id;
- }
-
- if (!undo->empty) {
- trx->undo_no = ut_dulint_add(undo->top_undo_no,
- 1);
- }
-
- trx_list_insert_ordered(trx);
-
- undo = UT_LIST_GET_NEXT(undo_list, undo);
- }
-
- undo = UT_LIST_GET_FIRST(rseg->update_undo_list);
-
- while (undo != NULL) {
- trx = trx_get_on_id(undo->trx_id);
-
- if (NULL == trx) {
- trx = trx_create(NULL);
-
- trx->id = undo->trx_id;
- trx->xid = undo->xid;
-
- if (undo->state != TRX_UNDO_ACTIVE) {
-
- /* Prepared transactions are left in
- the prepared state waiting for a
- commit or abort decision from MySQL */
-
- if (undo->state == TRX_UNDO_PREPARED) {
- fprintf(stderr,
- "InnoDB: Transaction"
- " %lu %lu was in the"
- " XA prepared state.\n",
- ut_dulint_get_high(
- trx->id),
- ut_dulint_get_low(
- trx->id));
-
- if (srv_force_recovery == 0) {
-
- trx->conc_state
- = TRX_PREPARED;
- } else {
- fprintf(stderr,
- "InnoDB: Since"
- " innodb_force_recovery"
- " > 0, we will"
- " rollback it"
- " anyway.\n");
-
- trx->conc_state
- = TRX_ACTIVE;
- }
- } else {
- trx->conc_state
- = TRX_COMMITTED_IN_MEMORY;
- }
-
- /* We give a dummy value for the trx
- number */
-
- trx->no = trx->id;
- } else {
- trx->conc_state = TRX_ACTIVE;
-
- /* A running transaction always has
- the number field inited to
- ut_dulint_max */
-
- trx->no = ut_dulint_max;
- }
-
- trx->rseg = rseg;
- trx_list_insert_ordered(trx);
-
- if (undo->dict_operation) {
- trx->dict_operation
- = undo->dict_operation;
- trx->table_id = undo->table_id;
- }
- }
-
- trx->update_undo = undo;
-
- if ((!undo->empty)
- && (ut_dulint_cmp(undo->top_undo_no,
- trx->undo_no) >= 0)) {
-
- trx->undo_no = ut_dulint_add(undo->top_undo_no,
- 1);
- }
-
- undo = UT_LIST_GET_NEXT(undo_list, undo);
- }
-
- rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
- }
-}
-
-/**********************************************************************
-Assigns a rollback segment to a transaction in a round-robin fashion.
-Skips the SYSTEM rollback segment if another is available. */
-UNIV_INLINE
-ulint
-trx_assign_rseg(void)
-/*=================*/
- /* out: assigned rollback segment id */
-{
- trx_rseg_t* rseg = trx_sys->latest_rseg;
-
- ut_ad(mutex_own(&kernel_mutex));
-loop:
- /* Get next rseg in a round-robin fashion */
-
- rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
-
- if (rseg == NULL) {
- rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
- }
-
- /* If it is the SYSTEM rollback segment, and there exist others, skip
- it */
-
- if ((rseg->id == TRX_SYS_SYSTEM_RSEG_ID)
- && (UT_LIST_GET_LEN(trx_sys->rseg_list) > 1)) {
- goto loop;
- }
-
- trx_sys->latest_rseg = rseg;
-
- return(rseg->id);
-}
-
-/********************************************************************
-Starts a new transaction. */
-
-ibool
-trx_start_low(
-/*==========*/
- /* out: TRUE */
- trx_t* trx, /* in: transaction */
- ulint rseg_id)/* in: rollback segment id; if ULINT_UNDEFINED
- is passed, the system chooses the rollback segment
- automatically in a round-robin fashion */
-{
- trx_rseg_t* rseg;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(trx->rseg == NULL);
-
- if (trx->is_purge) {
- trx->id = ut_dulint_zero;
- trx->conc_state = TRX_ACTIVE;
- trx->start_time = time(NULL);
-
- return(TRUE);
- }
-
- ut_ad(trx->conc_state != TRX_ACTIVE);
-
- if (rseg_id == ULINT_UNDEFINED) {
-
- rseg_id = trx_assign_rseg();
- }
-
- rseg = trx_sys_get_nth_rseg(trx_sys, rseg_id);
-
- trx->id = trx_sys_get_new_trx_id();
-
- /* The initial value for trx->no: ut_dulint_max is used in
- read_view_open_now: */
-
- trx->no = ut_dulint_max;
-
- trx->rseg = rseg;
-
- trx->conc_state = TRX_ACTIVE;
- trx->start_time = time(NULL);
-
- UT_LIST_ADD_FIRST(trx_list, trx_sys->trx_list, trx);
-
- return(TRUE);
-}
-
-/********************************************************************
-Starts a new transaction. */
-
-ibool
-trx_start(
-/*======*/
- /* out: TRUE */
- trx_t* trx, /* in: transaction */
- ulint rseg_id)/* in: rollback segment id; if ULINT_UNDEFINED
- is passed, the system chooses the rollback segment
- automatically in a round-robin fashion */
-{
- ibool ret;
-
- mutex_enter(&kernel_mutex);
-
- ret = trx_start_low(trx, rseg_id);
-
- mutex_exit(&kernel_mutex);
-
- return(ret);
-}
-
-/********************************************************************
-Commits a transaction. */
-
-void
-trx_commit_off_kernel(
-/*==================*/
- trx_t* trx) /* in: transaction */
-{
- page_t* update_hdr_page;
- dulint lsn;
- trx_rseg_t* rseg;
- trx_undo_t* undo;
- ibool must_flush_log = FALSE;
- mtr_t mtr;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- trx->must_flush_log_later = FALSE;
-
- rseg = trx->rseg;
-
- if (trx->insert_undo != NULL || trx->update_undo != NULL) {
-
- mutex_exit(&kernel_mutex);
-
- mtr_start(&mtr);
-
- must_flush_log = TRUE;
-
- /* Change the undo log segment states from TRX_UNDO_ACTIVE
- to some other state: these modifications to the file data
- structure define the transaction as committed in the file
- based world, at the serialization point of the log sequence
- number lsn obtained below. */
-
- mutex_enter(&(rseg->mutex));
-
- if (trx->insert_undo != NULL) {
- trx_undo_set_state_at_finish(
- rseg, trx, trx->insert_undo, &mtr);
- }
-
- undo = trx->update_undo;
-
- if (undo) {
- mutex_enter(&kernel_mutex);
- trx->no = trx_sys_get_new_trx_no();
-
- mutex_exit(&kernel_mutex);
-
- /* It is not necessary to obtain trx->undo_mutex here
- because only a single OS thread is allowed to do the
- transaction commit for this transaction. */
-
- update_hdr_page = trx_undo_set_state_at_finish(
- rseg, trx, undo, &mtr);
-
- /* We have to do the cleanup for the update log while
- holding the rseg mutex because update log headers
- have to be put to the history list in the order of
- the trx number. */
-
- trx_undo_update_cleanup(trx, update_hdr_page, &mtr);
- }
-
- mutex_exit(&(rseg->mutex));
-
- /* Update the latest MySQL binlog name and offset info
- in trx sys header if MySQL binlogging is on or the database
- server is a MySQL replication slave */
-
- if (trx->mysql_log_file_name
- && trx->mysql_log_file_name[0] != '\0') {
- trx_sys_update_mysql_binlog_offset(
- trx->mysql_log_file_name,
- trx->mysql_log_offset,
- TRX_SYS_MYSQL_LOG_INFO, &mtr);
- trx->mysql_log_file_name = NULL;
- }
-
- /* The following call commits the mini-transaction, making the
- whole transaction committed in the file-based world, at this
- log sequence number. The transaction becomes 'durable' when
- we write the log to disk, but in the logical sense the commit
- in the file-based data structures (undo logs etc.) happens
- here.
-
- NOTE that transaction numbers, which are assigned only to
- transactions with an update undo log, do not necessarily come
- in exactly the same order as commit lsn's, if the transactions
- have different rollback segments. To get exactly the same
- order we should hold the kernel mutex up to this point,
- adding to to the contention of the kernel mutex. However, if
- a transaction T2 is able to see modifications made by
- a transaction T1, T2 will always get a bigger transaction
- number and a bigger commit lsn than T1. */
-
- /*--------------*/
- mtr_commit(&mtr);
- /*--------------*/
- lsn = mtr.end_lsn;
-
- mutex_enter(&kernel_mutex);
- }
-
- ut_ad(trx->conc_state == TRX_ACTIVE
- || trx->conc_state == TRX_PREPARED);
- ut_ad(mutex_own(&kernel_mutex));
-
- /* The following assignment makes the transaction committed in memory
- and makes its changes to data visible to other transactions.
- NOTE that there is a small discrepancy from the strict formal
- visibility rules here: a human user of the database can see
- modifications made by another transaction T even before the necessary
- log segment has been flushed to the disk. If the database happens to
- crash before the flush, the user has seen modifications from T which
- will never be a committed transaction. However, any transaction T2
- which sees the modifications of the committing transaction T, and
- which also itself makes modifications to the database, will get an lsn
- larger than the committing transaction T. In the case where the log
- flush fails, and T never gets committed, also T2 will never get
- committed. */
-
- /*--------------------------------------*/
- trx->conc_state = TRX_COMMITTED_IN_MEMORY;
- /*--------------------------------------*/
-
- lock_release_off_kernel(trx);
-
- if (trx->global_read_view) {
- read_view_close(trx->global_read_view);
- mem_heap_empty(trx->global_read_view_heap);
- trx->global_read_view = NULL;
- }
-
- trx->read_view = NULL;
-
- if (must_flush_log) {
-
- mutex_exit(&kernel_mutex);
-
- if (trx->insert_undo != NULL) {
-
- trx_undo_insert_cleanup(trx);
- }
-
- /* NOTE that we could possibly make a group commit more
- efficient here: call os_thread_yield here to allow also other
- trxs to come to commit! */
-
- /*-------------------------------------*/
-
- /* Depending on the my.cnf options, we may now write the log
- buffer to the log files, making the transaction durable if
- the OS does not crash. We may also flush the log files to
- disk, making the transaction durable also at an OS crash or a
- power outage.
-
- The idea in InnoDB's group commit is that a group of
- transactions gather behind a trx doing a physical disk write
- to log files, and when that physical write has been completed,
- one of those transactions does a write which commits the whole
- group. Note that this group commit will only bring benefit if
- there are > 2 users in the database. Then at least 2 users can
- gather behind one doing the physical log write to disk.
-
- If we are calling trx_commit() under MySQL's binlog mutex, we
- will delay possible log write and flush to a separate function
- trx_commit_complete_for_mysql(), which is only called when the
- thread has released the binlog mutex. This is to make the
- group commit algorithm to work. Otherwise, the MySQL binlog
- mutex would serialize all commits and prevent a group of
- transactions from gathering. */
-
- if (trx->flush_log_later) {
- /* Do nothing yet */
- trx->must_flush_log_later = TRUE;
- } else if (srv_flush_log_at_trx_commit == 0) {
- /* Do nothing */
- } else if (srv_flush_log_at_trx_commit == 1) {
- if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
- /* Write the log but do not flush it to disk */
-
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
- FALSE);
- } else {
- /* Write the log to the log files AND flush
- them to disk */
-
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
- }
- } else if (srv_flush_log_at_trx_commit == 2) {
-
- /* Write the log but do not flush it to disk */
-
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
- } else {
- ut_error;
- }
-
- trx->commit_lsn = lsn;
-
- /*-------------------------------------*/
-
- mutex_enter(&kernel_mutex);
- }
-
- /* Free all savepoints */
- trx_roll_free_all_savepoints(trx);
-
- trx->conc_state = TRX_NOT_STARTED;
- trx->rseg = NULL;
- trx->undo_no = ut_dulint_zero;
- trx->last_sql_stat_start.least_undo_no = ut_dulint_zero;
- trx->mysql_query_str = NULL;
-
- ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0);
- ut_ad(UT_LIST_GET_LEN(trx->trx_locks) == 0);
-
- UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx);
-}
-
-/********************************************************************
-Cleans up a transaction at database startup. The cleanup is needed if
-the transaction already got to the middle of a commit when the database
-crashed, andf we cannot roll it back. */
-
-void
-trx_cleanup_at_db_startup(
-/*======================*/
- trx_t* trx) /* in: transaction */
-{
- if (trx->insert_undo != NULL) {
-
- trx_undo_insert_cleanup(trx);
- }
-
- trx->conc_state = TRX_NOT_STARTED;
- trx->rseg = NULL;
- trx->undo_no = ut_dulint_zero;
- trx->last_sql_stat_start.least_undo_no = ut_dulint_zero;
-
- UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx);
-}
-
-/************************************************************************
-Assigns a read view for a consistent read query. All the consistent reads
-within the same transaction will get the same read view, which is created
-when this function is first called for a new started transaction. */
-
-read_view_t*
-trx_assign_read_view(
-/*=================*/
- /* out: consistent read view */
- trx_t* trx) /* in: active transaction */
-{
- ut_ad(trx->conc_state == TRX_ACTIVE);
-
- if (trx->read_view) {
- return(trx->read_view);
- }
-
- mutex_enter(&kernel_mutex);
-
- if (!trx->read_view) {
- trx->read_view = read_view_open_now(
- trx->id, trx->global_read_view_heap);
- trx->global_read_view = trx->read_view;
- }
-
- mutex_exit(&kernel_mutex);
-
- return(trx->read_view);
-}
-
-/********************************************************************
-Commits a transaction. NOTE that the kernel mutex is temporarily released. */
-static
-void
-trx_handle_commit_sig_off_kernel(
-/*=============================*/
- trx_t* trx, /* in: transaction */
- que_thr_t** next_thr) /* in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread */
-{
- trx_sig_t* sig;
- trx_sig_t* next_sig;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- trx->que_state = TRX_QUE_COMMITTING;
-
- trx_commit_off_kernel(trx);
-
- ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0);
-
- /* Remove all TRX_SIG_COMMIT signals from the signal queue and send
- reply messages to them */
-
- sig = UT_LIST_GET_FIRST(trx->signals);
-
- while (sig != NULL) {
- next_sig = UT_LIST_GET_NEXT(signals, sig);
-
- if (sig->type == TRX_SIG_COMMIT) {
-
- trx_sig_reply(sig, next_thr);
- trx_sig_remove(trx, sig);
- }
-
- sig = next_sig;
- }
-
- trx->que_state = TRX_QUE_RUNNING;
-}
-
-/***************************************************************
-The transaction must be in the TRX_QUE_LOCK_WAIT state. Puts it to
-the TRX_QUE_RUNNING state and releases query threads which were
-waiting for a lock in the wait_thrs list. */
-
-void
-trx_end_lock_wait(
-/*==============*/
- trx_t* trx) /* in: transaction */
-{
- que_thr_t* thr;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT);
-
- thr = UT_LIST_GET_FIRST(trx->wait_thrs);
-
- while (thr != NULL) {
- que_thr_end_wait_no_next_thr(thr);
-
- UT_LIST_REMOVE(trx_thrs, trx->wait_thrs, thr);
-
- thr = UT_LIST_GET_FIRST(trx->wait_thrs);
- }
-
- trx->que_state = TRX_QUE_RUNNING;
-}
-
-/***************************************************************
-Moves the query threads in the lock wait list to the SUSPENDED state and puts
-the transaction to the TRX_QUE_RUNNING state. */
-static
-void
-trx_lock_wait_to_suspended(
-/*=======================*/
- trx_t* trx) /* in: transaction in the TRX_QUE_LOCK_WAIT state */
-{
- que_thr_t* thr;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT);
-
- thr = UT_LIST_GET_FIRST(trx->wait_thrs);
-
- while (thr != NULL) {
- thr->state = QUE_THR_SUSPENDED;
-
- UT_LIST_REMOVE(trx_thrs, trx->wait_thrs, thr);
-
- thr = UT_LIST_GET_FIRST(trx->wait_thrs);
- }
-
- trx->que_state = TRX_QUE_RUNNING;
-}
-
-/***************************************************************
-Moves the query threads in the sig reply wait list of trx to the SUSPENDED
-state. */
-static
-void
-trx_sig_reply_wait_to_suspended(
-/*============================*/
- trx_t* trx) /* in: transaction */
-{
- trx_sig_t* sig;
- que_thr_t* thr;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- sig = UT_LIST_GET_FIRST(trx->reply_signals);
-
- while (sig != NULL) {
- thr = sig->receiver;
-
- ut_ad(thr->state == QUE_THR_SIG_REPLY_WAIT);
-
- thr->state = QUE_THR_SUSPENDED;
-
- sig->receiver = NULL;
-
- UT_LIST_REMOVE(reply_signals, trx->reply_signals, sig);
-
- sig = UT_LIST_GET_FIRST(trx->reply_signals);
- }
-}
-
-/*********************************************************************
-Checks the compatibility of a new signal with the other signals in the
-queue. */
-static
-ibool
-trx_sig_is_compatible(
-/*==================*/
- /* out: TRUE if the signal can be queued */
- trx_t* trx, /* in: trx handle */
- ulint type, /* in: signal type */
- ulint sender) /* in: TRX_SIG_SELF or TRX_SIG_OTHER_SESS */
-{
- trx_sig_t* sig;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- if (UT_LIST_GET_LEN(trx->signals) == 0) {
-
- return(TRUE);
- }
-
- if (sender == TRX_SIG_SELF) {
- if (type == TRX_SIG_ERROR_OCCURRED) {
-
- return(TRUE);
-
- } else if (type == TRX_SIG_BREAK_EXECUTION) {
-
- return(TRUE);
- } else {
- return(FALSE);
- }
- }
-
- ut_ad(sender == TRX_SIG_OTHER_SESS);
-
- sig = UT_LIST_GET_FIRST(trx->signals);
-
- if (type == TRX_SIG_COMMIT) {
- while (sig != NULL) {
-
- if (sig->type == TRX_SIG_TOTAL_ROLLBACK) {
-
- return(FALSE);
- }
-
- sig = UT_LIST_GET_NEXT(signals, sig);
- }
-
- return(TRUE);
-
- } else if (type == TRX_SIG_TOTAL_ROLLBACK) {
- while (sig != NULL) {
-
- if (sig->type == TRX_SIG_COMMIT) {
-
- return(FALSE);
- }
-
- sig = UT_LIST_GET_NEXT(signals, sig);
- }
-
- return(TRUE);
-
- } else if (type == TRX_SIG_BREAK_EXECUTION) {
-
- return(TRUE);
- } else {
- ut_error;
-
- return(FALSE);
- }
-}
-
-/********************************************************************
-Sends a signal to a trx object. */
-
-void
-trx_sig_send(
-/*=========*/
- trx_t* trx, /* in: trx handle */
- ulint type, /* in: signal type */
- ulint sender, /* in: TRX_SIG_SELF or
- TRX_SIG_OTHER_SESS */
- que_thr_t* receiver_thr, /* in: query thread which wants the
- reply, or NULL; if type is
- TRX_SIG_END_WAIT, this must be NULL */
- trx_savept_t* savept, /* in: possible rollback savepoint, or
- NULL */
- que_thr_t** next_thr) /* in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread; if the parameter
- is NULL, it is ignored */
-{
- trx_sig_t* sig;
- trx_t* receiver_trx;
-
- ut_ad(trx);
- ut_ad(mutex_own(&kernel_mutex));
-
- if (!trx_sig_is_compatible(trx, type, sender)) {
- /* The signal is not compatible with the other signals in
- the queue: die */
-
- ut_error;
- }
-
- /* Queue the signal object */
-
- if (UT_LIST_GET_LEN(trx->signals) == 0) {
-
- /* The signal list is empty: the 'sig' slot must be unused
- (we improve performance a bit by avoiding mem_alloc) */
- sig = &(trx->sig);
- } else {
- /* It might be that the 'sig' slot is unused also in this
- case, but we choose the easy way of using mem_alloc */
-
- sig = mem_alloc(sizeof(trx_sig_t));
- }
-
- UT_LIST_ADD_LAST(signals, trx->signals, sig);
-
- sig->type = type;
- sig->sender = sender;
- sig->receiver = receiver_thr;
-
- if (savept) {
- sig->savept = *savept;
- }
-
- if (receiver_thr) {
- receiver_trx = thr_get_trx(receiver_thr);
-
- UT_LIST_ADD_LAST(reply_signals, receiver_trx->reply_signals,
- sig);
- }
-
- if (trx->sess->state == SESS_ERROR) {
-
- trx_sig_reply_wait_to_suspended(trx);
- }
-
- if ((sender != TRX_SIG_SELF) || (type == TRX_SIG_BREAK_EXECUTION)) {
- ut_error;
- }
-
- /* If there were no other signals ahead in the queue, try to start
- handling of the signal */
-
- if (UT_LIST_GET_FIRST(trx->signals) == sig) {
-
- trx_sig_start_handle(trx, next_thr);
- }
-}
-
-/********************************************************************
-Ends signal handling. If the session is in the error state, and
-trx->graph_before_signal_handling != NULL, then returns control to the error
-handling routine of the graph (currently just returns the control to the
-graph root which then will send an error message to the client). */
-
-void
-trx_end_signal_handling(
-/*====================*/
- trx_t* trx) /* in: trx */
-{
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(trx->handling_signals == TRUE);
-
- trx->handling_signals = FALSE;
-
- trx->graph = trx->graph_before_signal_handling;
-
- if (trx->graph && (trx->sess->state == SESS_ERROR)) {
-
- que_fork_error_handle(trx, trx->graph);
- }
-}
-
-/********************************************************************
-Starts handling of a trx signal. */
-
-void
-trx_sig_start_handle(
-/*=================*/
- trx_t* trx, /* in: trx handle */
- que_thr_t** next_thr) /* in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread; if the parameter
- is NULL, it is ignored */
-{
- trx_sig_t* sig;
- ulint type;
-loop:
- /* We loop in this function body as long as there are queued signals
- we can process immediately */
-
- ut_ad(trx);
- ut_ad(mutex_own(&kernel_mutex));
-
- if (trx->handling_signals && (UT_LIST_GET_LEN(trx->signals) == 0)) {
-
- trx_end_signal_handling(trx);
-
- return;
- }
-
- if (trx->conc_state == TRX_NOT_STARTED) {
-
- trx_start_low(trx, ULINT_UNDEFINED);
- }
-
- /* If the trx is in a lock wait state, moves the waiting query threads
- to the suspended state */
-
- if (trx->que_state == TRX_QUE_LOCK_WAIT) {
-
- trx_lock_wait_to_suspended(trx);
- }
-
- /* If the session is in the error state and this trx has threads
- waiting for reply from signals, moves these threads to the suspended
- state, canceling wait reservations; note that if the transaction has
- sent a commit or rollback signal to itself, and its session is not in
- the error state, then nothing is done here. */
-
- if (trx->sess->state == SESS_ERROR) {
- trx_sig_reply_wait_to_suspended(trx);
- }
-
- /* If there are no running query threads, we can start processing of a
- signal, otherwise we have to wait until all query threads of this
- transaction are aware of the arrival of the signal. */
-
- if (trx->n_active_thrs > 0) {
-
- return;
- }
-
- if (trx->handling_signals == FALSE) {
- trx->graph_before_signal_handling = trx->graph;
-
- trx->handling_signals = TRUE;
- }
-
- sig = UT_LIST_GET_FIRST(trx->signals);
- type = sig->type;
-
- if (type == TRX_SIG_COMMIT) {
-
- trx_handle_commit_sig_off_kernel(trx, next_thr);
-
- } else if ((type == TRX_SIG_TOTAL_ROLLBACK)
- || (type == TRX_SIG_ROLLBACK_TO_SAVEPT)) {
-
- trx_rollback(trx, sig, next_thr);
-
- /* No further signals can be handled until the rollback
- completes, therefore we return */
-
- return;
-
- } else if (type == TRX_SIG_ERROR_OCCURRED) {
-
- trx_rollback(trx, sig, next_thr);
-
- /* No further signals can be handled until the rollback
- completes, therefore we return */
-
- return;
-
- } else if (type == TRX_SIG_BREAK_EXECUTION) {
-
- trx_sig_reply(sig, next_thr);
- trx_sig_remove(trx, sig);
- } else {
- ut_error;
- }
-
- goto loop;
-}
-
-/********************************************************************
-Send the reply message when a signal in the queue of the trx has been
-handled. */
-
-void
-trx_sig_reply(
-/*==========*/
- trx_sig_t* sig, /* in: signal */
- que_thr_t** next_thr) /* in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread */
-{
- trx_t* receiver_trx;
-
- ut_ad(sig);
- ut_ad(mutex_own(&kernel_mutex));
-
- if (sig->receiver != NULL) {
- ut_ad((sig->receiver)->state == QUE_THR_SIG_REPLY_WAIT);
-
- receiver_trx = thr_get_trx(sig->receiver);
-
- UT_LIST_REMOVE(reply_signals, receiver_trx->reply_signals,
- sig);
- ut_ad(receiver_trx->sess->state != SESS_ERROR);
-
- que_thr_end_wait(sig->receiver, next_thr);
-
- sig->receiver = NULL;
-
- }
-}
-
-/********************************************************************
-Removes a signal object from the trx signal queue. */
-
-void
-trx_sig_remove(
-/*===========*/
- trx_t* trx, /* in: trx handle */
- trx_sig_t* sig) /* in, own: signal */
-{
- ut_ad(trx && sig);
- ut_ad(mutex_own(&kernel_mutex));
-
- ut_ad(sig->receiver == NULL);
-
- UT_LIST_REMOVE(signals, trx->signals, sig);
- sig->type = 0; /* reset the field to catch possible bugs */
-
- if (sig != &(trx->sig)) {
- mem_free(sig);
- }
-}
-
-/*************************************************************************
-Creates a commit command node struct. */
-
-commit_node_t*
-commit_node_create(
-/*===============*/
- /* out, own: commit node struct */
- mem_heap_t* heap) /* in: mem heap where created */
-{
- commit_node_t* node;
-
- node = mem_heap_alloc(heap, sizeof(commit_node_t));
- node->common.type = QUE_NODE_COMMIT;
- node->state = COMMIT_NODE_SEND;
-
- return(node);
-}
-
-/***************************************************************
-Performs an execution step for a commit type node in a query graph. */
-
-que_thr_t*
-trx_commit_step(
-/*============*/
- /* out: query thread to run next, or NULL */
- que_thr_t* thr) /* in: query thread */
-{
- commit_node_t* node;
- que_thr_t* next_thr;
-
- node = thr->run_node;
-
- ut_ad(que_node_get_type(node) == QUE_NODE_COMMIT);
-
- if (thr->prev_node == que_node_get_parent(node)) {
- node->state = COMMIT_NODE_SEND;
- }
-
- if (node->state == COMMIT_NODE_SEND) {
- mutex_enter(&kernel_mutex);
-
- node->state = COMMIT_NODE_WAIT;
-
- next_thr = NULL;
-
- thr->state = QUE_THR_SIG_REPLY_WAIT;
-
- /* Send the commit signal to the transaction */
-
- trx_sig_send(thr_get_trx(thr), TRX_SIG_COMMIT, TRX_SIG_SELF,
- thr, NULL, &next_thr);
-
- mutex_exit(&kernel_mutex);
-
- return(next_thr);
- }
-
- ut_ad(node->state == COMMIT_NODE_WAIT);
-
- node->state = COMMIT_NODE_SEND;
-
- thr->run_node = que_node_get_parent(node);
-
- return(thr);
-}
-
-/**************************************************************************
-Does the transaction commit for MySQL. */
-
-ulint
-trx_commit_for_mysql(
-/*=================*/
- /* out: 0 or error number */
- trx_t* trx) /* in: trx handle */
-{
- /* Because we do not do the commit by sending an Innobase
- sig to the transaction, we must here make sure that trx has been
- started. */
-
- ut_a(trx);
-
- trx->op_info = "committing";
-
- /* If we are doing the XA recovery of prepared transactions, then
- the transaction object does not have an InnoDB session object, and we
- set the dummy session that we use for all MySQL transactions. */
-
- if (trx->sess == NULL) {
- /* Open a dummy session */
-
- if (!trx_dummy_sess) {
- mutex_enter(&kernel_mutex);
-
- if (!trx_dummy_sess) {
- trx_dummy_sess = sess_open();
- }
-
- mutex_exit(&kernel_mutex);
- }
-
- trx->sess = trx_dummy_sess;
- }
-
- trx_start_if_not_started(trx);
-
- mutex_enter(&kernel_mutex);
-
- trx_commit_off_kernel(trx);
-
- mutex_exit(&kernel_mutex);
-
- trx->op_info = "";
-
- return(0);
-}
-
-/**************************************************************************
-If required, flushes the log to disk if we called trx_commit_for_mysql()
-with trx->flush_log_later == TRUE. */
-
-ulint
-trx_commit_complete_for_mysql(
-/*==========================*/
- /* out: 0 or error number */
- trx_t* trx) /* in: trx handle */
-{
- dulint lsn = trx->commit_lsn;
-
- ut_a(trx);
-
- trx->op_info = "flushing log";
-
- if (!trx->must_flush_log_later) {
- /* Do nothing */
- } else if (srv_flush_log_at_trx_commit == 0) {
- /* Do nothing */
- } else if (srv_flush_log_at_trx_commit == 1) {
- if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
- /* Write the log but do not flush it to disk */
-
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
- } else {
- /* Write the log to the log files AND flush them to
- disk */
-
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
- }
- } else if (srv_flush_log_at_trx_commit == 2) {
-
- /* Write the log but do not flush it to disk */
-
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
- } else {
- ut_error;
- }
-
- trx->must_flush_log_later = FALSE;
-
- trx->op_info = "";
-
- return(0);
-}
-
-/**************************************************************************
-Marks the latest SQL statement ended. */
-
-void
-trx_mark_sql_stat_end(
-/*==================*/
- trx_t* trx) /* in: trx handle */
-{
- ut_a(trx);
-
- if (trx->conc_state == TRX_NOT_STARTED) {
- trx->undo_no = ut_dulint_zero;
- }
-
- trx->last_sql_stat_start.least_undo_no = trx->undo_no;
-}
-
-/**************************************************************************
-Prints info about a transaction to the given file. The caller must own the
-kernel mutex and must have called
-innobase_mysql_prepare_print_arbitrary_thd(), unless he knows that MySQL
-or InnoDB cannot meanwhile change the info printed here. */
-
-void
-trx_print(
-/*======*/
- FILE* f, /* in: output stream */
- trx_t* trx, /* in: transaction */
- ulint max_query_len) /* in: max query length to print, or 0 to
- use the default max length */
-{
- ibool newline;
-
- fprintf(f, "TRANSACTION %lu %lu",
- (ulong) ut_dulint_get_high(trx->id),
- (ulong) ut_dulint_get_low(trx->id));
-
- switch (trx->conc_state) {
- case TRX_NOT_STARTED:
- fputs(", not started", f);
- break;
- case TRX_ACTIVE:
- fprintf(f, ", ACTIVE %lu sec",
- (ulong)difftime(time(NULL), trx->start_time));
- break;
- case TRX_PREPARED:
- fprintf(f, ", ACTIVE (PREPARED) %lu sec",
- (ulong)difftime(time(NULL), trx->start_time));
- break;
- case TRX_COMMITTED_IN_MEMORY:
- fputs(", COMMITTED IN MEMORY", f);
- break;
- default:
- fprintf(f, " state %lu", (ulong) trx->conc_state);
- }
-
-#ifdef UNIV_LINUX
- fprintf(f, ", process no %lu", trx->mysql_process_no);
-#endif
- fprintf(f, ", OS thread id %lu",
- (ulong) os_thread_pf(trx->mysql_thread_id));
-
- if (*trx->op_info) {
- putc(' ', f);
- fputs(trx->op_info, f);
- }
-
- if (trx->is_purge) {
- fputs(" purge trx", f);
- }
-
- if (trx->declared_to_be_inside_innodb) {
- fprintf(f, ", thread declared inside InnoDB %lu",
- (ulong) trx->n_tickets_to_enter_innodb);
- }
-
- putc('\n', f);
-
- if (trx->n_mysql_tables_in_use > 0 || trx->mysql_n_tables_locked > 0) {
- fprintf(f, "mysql tables in use %lu, locked %lu\n",
- (ulong) trx->n_mysql_tables_in_use,
- (ulong) trx->mysql_n_tables_locked);
- }
-
- newline = TRUE;
-
- switch (trx->que_state) {
- case TRX_QUE_RUNNING:
- newline = FALSE; break;
- case TRX_QUE_LOCK_WAIT:
- fputs("LOCK WAIT ", f); break;
- case TRX_QUE_ROLLING_BACK:
- fputs("ROLLING BACK ", f); break;
- case TRX_QUE_COMMITTING:
- fputs("COMMITTING ", f); break;
- default:
- fprintf(f, "que state %lu ", (ulong) trx->que_state);
- }
-
- if (0 < UT_LIST_GET_LEN(trx->trx_locks)
- || mem_heap_get_size(trx->lock_heap) > 400) {
- newline = TRUE;
-
- fprintf(f, "%lu lock struct(s), heap size %lu,"
- " %lu row lock(s)",
- (ulong) UT_LIST_GET_LEN(trx->trx_locks),
- (ulong) mem_heap_get_size(trx->lock_heap),
- (ulong) lock_number_of_rows_locked(trx));
- }
-
- if (trx->has_search_latch) {
- newline = TRUE;
- fputs(", holds adaptive hash latch", f);
- }
-
- if (ut_dulint_cmp(trx->undo_no, ut_dulint_zero) != 0) {
- newline = TRUE;
- fprintf(f, ", undo log entries %lu",
- (ulong) ut_dulint_get_low(trx->undo_no));
- }
-
- if (newline) {
- putc('\n', f);
- }
-
- if (trx->mysql_thd != NULL) {
- innobase_mysql_print_thd(f, trx->mysql_thd, max_query_len);
- }
-}
-
-/***********************************************************************
-Compares the "weight" (or size) of two transactions. The weight of one
-transaction is estimated as the number of altered rows + the number of
-locked rows. Transactions that have edited non-transactional tables are
-considered heavier than ones that have not. */
-
-int
-trx_weight_cmp(
-/*===========*/
- /* out: <0, 0 or >0; similar to strcmp(3) */
- trx_t* a, /* in: the first transaction to be compared */
- trx_t* b) /* in: the second transaction to be compared */
-{
- ibool a_notrans_edit;
- ibool b_notrans_edit;
-
- /* If mysql_thd is NULL for a transaction we assume that it has
- not edited non-transactional tables. */
-
- a_notrans_edit = a->mysql_thd != NULL
- && thd_has_edited_nontrans_tables(a->mysql_thd);
-
- b_notrans_edit = b->mysql_thd != NULL
- && thd_has_edited_nontrans_tables(b->mysql_thd);
-
- if (a_notrans_edit && !b_notrans_edit) {
-
- return(1);
- }
-
- if (!a_notrans_edit && b_notrans_edit) {
-
- return(-1);
- }
-
- /* Either both had edited non-transactional tables or both had
- not, we fall back to comparing the number of altered/locked
- rows. */
-
-#if 0
- fprintf(stderr,
- "%s TRX_WEIGHT(a): %lld+%lu, TRX_WEIGHT(b): %lld+%lu\n",
- __func__,
- ut_conv_dulint_to_longlong(a->undo_no),
- UT_LIST_GET_LEN(a->trx_locks),
- ut_conv_dulint_to_longlong(b->undo_no),
- UT_LIST_GET_LEN(b->trx_locks));
-#endif
-
-#define TRX_WEIGHT(t) \
- ut_dulint_add((t)->undo_no, UT_LIST_GET_LEN((t)->trx_locks))
-
- return(ut_dulint_cmp(TRX_WEIGHT(a), TRX_WEIGHT(b)));
-}
-
-/********************************************************************
-Prepares a transaction. */
-
-void
-trx_prepare_off_kernel(
-/*===================*/
- trx_t* trx) /* in: transaction */
-{
- page_t* update_hdr_page;
- trx_rseg_t* rseg;
- ibool must_flush_log = FALSE;
- dulint lsn;
- mtr_t mtr;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- rseg = trx->rseg;
-
- if (trx->insert_undo != NULL || trx->update_undo != NULL) {
-
- mutex_exit(&kernel_mutex);
-
- mtr_start(&mtr);
-
- must_flush_log = TRUE;
-
- /* Change the undo log segment states from TRX_UNDO_ACTIVE
- to TRX_UNDO_PREPARED: these modifications to the file data
- structure define the transaction as prepared in the
- file-based world, at the serialization point of lsn. */
-
- mutex_enter(&(rseg->mutex));
-
- if (trx->insert_undo != NULL) {
-
- /* It is not necessary to obtain trx->undo_mutex here
- because only a single OS thread is allowed to do the
- transaction prepare for this transaction. */
-
- trx_undo_set_state_at_prepare(trx, trx->insert_undo,
- &mtr);
- }
-
- if (trx->update_undo) {
- update_hdr_page = trx_undo_set_state_at_prepare(
- trx, trx->update_undo, &mtr);
- }
-
- mutex_exit(&(rseg->mutex));
-
- /*--------------*/
- mtr_commit(&mtr); /* This mtr commit makes the
- transaction prepared in the file-based
- world */
- /*--------------*/
- lsn = mtr.end_lsn;
-
- mutex_enter(&kernel_mutex);
- }
-
- ut_ad(mutex_own(&kernel_mutex));
-
- /*--------------------------------------*/
- trx->conc_state = TRX_PREPARED;
- /*--------------------------------------*/
-
- if (must_flush_log) {
- /* Depending on the my.cnf options, we may now write the log
- buffer to the log files, making the prepared state of the
- transaction durable if the OS does not crash. We may also
- flush the log files to disk, making the prepared state of the
- transaction durable also at an OS crash or a power outage.
-
- The idea in InnoDB's group prepare is that a group of
- transactions gather behind a trx doing a physical disk write
- to log files, and when that physical write has been completed,
- one of those transactions does a write which prepares the whole
- group. Note that this group prepare will only bring benefit if
- there are > 2 users in the database. Then at least 2 users can
- gather behind one doing the physical log write to disk.
-
- TODO: find out if MySQL holds some mutex when calling this.
- That would spoil our group prepare algorithm. */
-
- mutex_exit(&kernel_mutex);
-
- if (srv_flush_log_at_trx_commit == 0) {
- /* Do nothing */
- } else if (srv_flush_log_at_trx_commit == 1) {
- if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
- /* Write the log but do not flush it to disk */
-
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
- FALSE);
- } else {
- /* Write the log to the log files AND flush
- them to disk */
-
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
- }
- } else if (srv_flush_log_at_trx_commit == 2) {
-
- /* Write the log but do not flush it to disk */
-
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
- } else {
- ut_error;
- }
-
- mutex_enter(&kernel_mutex);
- }
-}
-
-/**************************************************************************
-Does the transaction prepare for MySQL. */
-
-ulint
-trx_prepare_for_mysql(
-/*==================*/
- /* out: 0 or error number */
- trx_t* trx) /* in: trx handle */
-{
- /* Because we do not do the prepare by sending an Innobase
- sig to the transaction, we must here make sure that trx has been
- started. */
-
- ut_a(trx);
-
- trx->op_info = "preparing";
-
- trx_start_if_not_started(trx);
-
- mutex_enter(&kernel_mutex);
-
- trx_prepare_off_kernel(trx);
-
- mutex_exit(&kernel_mutex);
-
- trx->op_info = "";
-
- return(0);
-}
-
-/**************************************************************************
-This function is used to find number of prepared transactions and
-their transaction objects for a recovery. */
-
-int
-trx_recover_for_mysql(
-/*==================*/
- /* out: number of prepared transactions
- stored in xid_list */
- XID* xid_list, /* in/out: prepared transactions */
- ulint len) /* in: number of slots in xid_list */
-{
- trx_t* trx;
- ulint count = 0;
-
- ut_ad(xid_list);
- ut_ad(len);
-
- /* We should set those transactions which are in the prepared state
- to the xid_list */
-
- mutex_enter(&kernel_mutex);
-
- trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
-
- while (trx) {
- if (trx->conc_state == TRX_PREPARED) {
- xid_list[count] = trx->xid;
-
- if (count == 0) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Starting recovery for"
- " XA transactions...\n");
- }
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Transaction %lu %lu in"
- " prepared state after recovery\n",
- (ulong) ut_dulint_get_high(trx->id),
- (ulong) ut_dulint_get_low(trx->id));
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Transaction contains changes"
- " to %lu rows\n",
- (ulong) ut_conv_dulint_to_longlong(
- trx->undo_no));
-
- count++;
-
- if (count == len) {
- break;
- }
- }
-
- trx = UT_LIST_GET_NEXT(trx_list, trx);
- }
-
- mutex_exit(&kernel_mutex);
-
- if (count > 0){
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: %lu transactions in prepared state"
- " after recovery\n",
- (ulong) count);
- }
-
- return ((int) count);
-}
-
-/***********************************************************************
-This function is used to find one X/Open XA distributed transaction
-which is in the prepared state */
-
-trx_t*
-trx_get_trx_by_xid(
-/*===============*/
- /* out: trx or NULL */
- XID* xid) /* in: X/Open XA transaction identification */
-{
- trx_t* trx;
-
- if (xid == NULL) {
-
- return (NULL);
- }
-
- mutex_enter(&kernel_mutex);
-
- trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
-
- while (trx) {
- /* Compare two X/Open XA transaction id's: their
- length should be the same and binary comparison
- of gtrid_lenght+bqual_length bytes should be
- the same */
-
- if (xid->gtrid_length == trx->xid.gtrid_length
- && xid->bqual_length == trx->xid.bqual_length
- && memcmp(xid->data, trx->xid.data,
- xid->gtrid_length + xid->bqual_length) == 0) {
- break;
- }
-
- trx = UT_LIST_GET_NEXT(trx_list, trx);
- }
-
- mutex_exit(&kernel_mutex);
-
- if (trx) {
- if (trx->conc_state != TRX_PREPARED) {
-
- return(NULL);
- }
-
- return(trx);
- } else {
- return(NULL);
- }
-}
diff --git a/storage/innobase/trx/trx0undo.c b/storage/innobase/trx/trx0undo.c
deleted file mode 100644
index b31580d0ce0..00000000000
--- a/storage/innobase/trx/trx0undo.c
+++ /dev/null
@@ -1,1920 +0,0 @@
-/******************************************************
-Transaction undo log
-
-(c) 1996 Innobase Oy
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "trx0undo.h"
-
-#ifdef UNIV_NONINL
-#include "trx0undo.ic"
-#endif
-
-#include "fsp0fsp.h"
-#include "mach0data.h"
-#include "trx0rseg.h"
-#include "trx0trx.h"
-#include "srv0srv.h"
-#include "trx0rec.h"
-#include "trx0purge.h"
-#include "trx0xa.h"
-
-/* How should the old versions in the history list be managed?
- ----------------------------------------------------------
-If each transaction is given a whole page for its update undo log, file
-space consumption can be 10 times higher than necessary. Therefore,
-partly filled update undo log pages should be reusable. But then there
-is no way individual pages can be ordered so that the ordering agrees
-with the serialization numbers of the transactions on the pages. Thus,
-the history list must be formed of undo logs, not their header pages as
-it was in the old implementation.
- However, on a single header page the transactions are placed in
-the order of their serialization numbers. As old versions are purged, we
-may free the page when the last transaction on the page has been purged.
- A problem is that the purge has to go through the transactions
-in the serialization order. This means that we have to look through all
-rollback segments for the one that has the smallest transaction number
-in its history list.
- When should we do a purge? A purge is necessary when space is
-running out in any of the rollback segments. Then we may have to purge
-also old version which might be needed by some consistent read. How do
-we trigger the start of a purge? When a transaction writes to an undo log,
-it may notice that the space is running out. When a read view is closed,
-it may make some history superfluous. The server can have an utility which
-periodically checks if it can purge some history.
- In a parallellized purge we have the problem that a query thread
-can remove a delete marked clustered index record before another query
-thread has processed an earlier version of the record, which cannot then
-be done because the row cannot be constructed from the clustered index
-record. To avoid this problem, we will store in the update and delete mark
-undo record also the columns necessary to construct the secondary index
-entries which are modified.
- We can latch the stack of versions of a single clustered index record
-by taking a latch on the clustered index page. As long as the latch is held,
-no new versions can be added and no versions removed by undo. But, a purge
-can still remove old versions from the bottom of the stack. */
-
-/* How to protect rollback segments, undo logs, and history lists with
- -------------------------------------------------------------------
-latches?
--------
-The contention of the kernel mutex should be minimized. When a transaction
-does its first insert or modify in an index, an undo log is assigned for it.
-Then we must have an x-latch to the rollback segment header.
- When the transaction does more modifys or rolls back, the undo log is
-protected with undo_mutex in the transaction.
- When the transaction commits, its insert undo log is either reset and
-cached for a fast reuse, or freed. In these cases we must have an x-latch on
-the rollback segment page. The update undo log is put to the history list. If
-it is not suitable for reuse, its slot in the rollback segment is reset. In
-both cases, an x-latch must be acquired on the rollback segment.
- The purge operation steps through the history list without modifying
-it until a truncate operation occurs, which can remove undo logs from the end
-of the list and release undo log segments. In stepping through the list,
-s-latches on the undo log pages are enough, but in a truncate, x-latches must
-be obtained on the rollback segment and individual pages. */
-
-/************************************************************************
-Initializes the fields in an undo log segment page. */
-static
-void
-trx_undo_page_init(
-/*===============*/
- page_t* undo_page, /* in: undo log segment page */
- ulint type, /* in: undo log segment type */
- mtr_t* mtr); /* in: mtr */
-/************************************************************************
-Creates and initializes an undo log memory object. */
-static
-trx_undo_t*
-trx_undo_mem_create(
-/*================*/
- /* out, own: the undo log memory object */
- trx_rseg_t* rseg, /* in: rollback segment memory object */
- ulint id, /* in: slot index within rseg */
- ulint type, /* in: type of the log: TRX_UNDO_INSERT or
- TRX_UNDO_UPDATE */
- dulint trx_id, /* in: id of the trx for which the undo log
- is created */
- XID* xid, /* in: X/Open XA transaction identification*/
- ulint page_no,/* in: undo log header page number */
- ulint offset);/* in: undo log header byte offset on page */
-/*******************************************************************
-Initializes a cached insert undo log header page for new use. NOTE that this
-function has its own log record type MLOG_UNDO_HDR_REUSE. You must NOT change
-the operation of this function! */
-static
-ulint
-trx_undo_insert_header_reuse(
-/*=========================*/
- /* out: undo log header byte offset on page */
- page_t* undo_page, /* in: insert undo log segment header page,
- x-latched */
- dulint trx_id, /* in: transaction id */
- mtr_t* mtr); /* in: mtr */
-/**************************************************************************
-If an update undo log can be discarded immediately, this function frees the
-space, resetting the page to the proper state for caching. */
-static
-void
-trx_undo_discard_latest_update_undo(
-/*================================*/
- page_t* undo_page, /* in: header page of an undo log of size 1 */
- mtr_t* mtr); /* in: mtr */
-
-
-/***************************************************************************
-Gets the previous record in an undo log from the previous page. */
-static
-trx_undo_rec_t*
-trx_undo_get_prev_rec_from_prev_page(
-/*=================================*/
- /* out: undo log record, the page s-latched,
- NULL if none */
- trx_undo_rec_t* rec, /* in: undo record */
- ulint page_no,/* in: undo log header page number */
- ulint offset, /* in: undo log header offset on page */
- mtr_t* mtr) /* in: mtr */
-{
- ulint prev_page_no;
- page_t* prev_page;
- page_t* undo_page;
-
- undo_page = buf_frame_align(rec);
-
- prev_page_no = flst_get_prev_addr(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_NODE, mtr)
- .page;
-
- if (prev_page_no == FIL_NULL) {
-
- return(NULL);
- }
-
- prev_page = trx_undo_page_get_s_latched(
- buf_frame_get_space_id(undo_page), prev_page_no, mtr);
-
- return(trx_undo_page_get_last_rec(prev_page, page_no, offset));
-}
-
-/***************************************************************************
-Gets the previous record in an undo log. */
-
-trx_undo_rec_t*
-trx_undo_get_prev_rec(
-/*==================*/
- /* out: undo log record, the page s-latched,
- NULL if none */
- trx_undo_rec_t* rec, /* in: undo record */
- ulint page_no,/* in: undo log header page number */
- ulint offset, /* in: undo log header offset on page */
- mtr_t* mtr) /* in: mtr */
-{
- trx_undo_rec_t* prev_rec;
-
- prev_rec = trx_undo_page_get_prev_rec(rec, page_no, offset);
-
- if (prev_rec) {
-
- return(prev_rec);
- }
-
- /* We have to go to the previous undo log page to look for the
- previous record */
-
- return(trx_undo_get_prev_rec_from_prev_page(rec, page_no, offset,
- mtr));
-}
-
-/***************************************************************************
-Gets the next record in an undo log from the next page. */
-static
-trx_undo_rec_t*
-trx_undo_get_next_rec_from_next_page(
-/*=================================*/
- /* out: undo log record, the page latched, NULL if
- none */
- page_t* undo_page, /* in: undo log page */
- ulint page_no,/* in: undo log header page number */
- ulint offset, /* in: undo log header offset on page */
- ulint mode, /* in: latch mode: RW_S_LATCH or RW_X_LATCH */
- mtr_t* mtr) /* in: mtr */
-{
- trx_ulogf_t* log_hdr;
- ulint next_page_no;
- page_t* next_page;
- ulint space;
- ulint next;
-
- if (page_no == buf_frame_get_page_no(undo_page)) {
-
- log_hdr = undo_page + offset;
- next = mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG);
-
- if (next != 0) {
-
- return(NULL);
- }
- }
-
- space = buf_frame_get_space_id(undo_page);
-
- next_page_no = flst_get_next_addr(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_NODE, mtr)
- .page;
- if (next_page_no == FIL_NULL) {
-
- return(NULL);
- }
-
- if (mode == RW_S_LATCH) {
- next_page = trx_undo_page_get_s_latched(space, next_page_no,
- mtr);
- } else {
- ut_ad(mode == RW_X_LATCH);
- next_page = trx_undo_page_get(space, next_page_no, mtr);
- }
-
- return(trx_undo_page_get_first_rec(next_page, page_no, offset));
-}
-
-/***************************************************************************
-Gets the next record in an undo log. */
-
-trx_undo_rec_t*
-trx_undo_get_next_rec(
-/*==================*/
- /* out: undo log record, the page s-latched,
- NULL if none */
- trx_undo_rec_t* rec, /* in: undo record */
- ulint page_no,/* in: undo log header page number */
- ulint offset, /* in: undo log header offset on page */
- mtr_t* mtr) /* in: mtr */
-{
- trx_undo_rec_t* next_rec;
-
- next_rec = trx_undo_page_get_next_rec(rec, page_no, offset);
-
- if (next_rec) {
- return(next_rec);
- }
-
- return(trx_undo_get_next_rec_from_next_page(buf_frame_align(rec),
- page_no, offset,
- RW_S_LATCH, mtr));
-}
-
-/***************************************************************************
-Gets the first record in an undo log. */
-
-trx_undo_rec_t*
-trx_undo_get_first_rec(
-/*===================*/
- /* out: undo log record, the page latched, NULL if
- none */
- ulint space, /* in: undo log header space */
- ulint page_no,/* in: undo log header page number */
- ulint offset, /* in: undo log header offset on page */
- ulint mode, /* in: latching mode: RW_S_LATCH or RW_X_LATCH */
- mtr_t* mtr) /* in: mtr */
-{
- page_t* undo_page;
- trx_undo_rec_t* rec;
-
- if (mode == RW_S_LATCH) {
- undo_page = trx_undo_page_get_s_latched(space, page_no, mtr);
- } else {
- undo_page = trx_undo_page_get(space, page_no, mtr);
- }
-
- rec = trx_undo_page_get_first_rec(undo_page, page_no, offset);
-
- if (rec) {
- return(rec);
- }
-
- return(trx_undo_get_next_rec_from_next_page(undo_page, page_no, offset,
- mode, mtr));
-}
-
-/*============== UNDO LOG FILE COPY CREATION AND FREEING ==================*/
-
-/**************************************************************************
-Writes the mtr log entry of an undo log page initialization. */
-UNIV_INLINE
-void
-trx_undo_page_init_log(
-/*===================*/
- page_t* undo_page, /* in: undo log page */
- ulint type, /* in: undo log type */
- mtr_t* mtr) /* in: mtr */
-{
- mlog_write_initial_log_record(undo_page, MLOG_UNDO_INIT, mtr);
-
- mlog_catenate_ulint_compressed(mtr, type);
-}
-
-/***************************************************************
-Parses the redo log entry of an undo log page initialization. */
-
-byte*
-trx_undo_parse_page_init(
-/*=====================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr) /* in: mtr or NULL */
-{
- ulint type;
-
- ptr = mach_parse_compressed(ptr, end_ptr, &type);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- if (page) {
- trx_undo_page_init(page, type, mtr);
- }
-
- return(ptr);
-}
-
-/************************************************************************
-Initializes the fields in an undo log segment page. */
-static
-void
-trx_undo_page_init(
-/*===============*/
- page_t* undo_page, /* in: undo log segment page */
- ulint type, /* in: undo log segment type */
- mtr_t* mtr) /* in: mtr */
-{
- trx_upagef_t* page_hdr;
-
- page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
-
- mach_write_to_2(page_hdr + TRX_UNDO_PAGE_TYPE, type);
-
- mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START,
- TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
- mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE,
- TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
-
- fil_page_set_type(undo_page, FIL_PAGE_UNDO_LOG);
-
- trx_undo_page_init_log(undo_page, type, mtr);
-}
-
-/*******************************************************************
-Creates a new undo log segment in file. */
-static
-ulint
-trx_undo_seg_create(
-/*================*/
- /* out: DB_SUCCESS if page creation OK
- possible error codes are:
- DB_TOO_MANY_CONCURRENT_TRXS
- DB_OUT_OF_FILE_SPACE */
- trx_rseg_t* rseg __attribute__((unused)),/* in: rollback segment */
- trx_rsegf_t* rseg_hdr,/* in: rollback segment header, page
- x-latched */
- ulint type, /* in: type of the segment: TRX_UNDO_INSERT or
- TRX_UNDO_UPDATE */
- ulint* id, /* out: slot index within rseg header */
- page_t** undo_page,
- /* out: segment header page x-latched, NULL
- if there was an error */
- mtr_t* mtr) /* in: mtr */
-{
- ulint slot_no;
- ulint space;
- trx_upagef_t* page_hdr;
- trx_usegf_t* seg_hdr;
- ulint n_reserved;
- ibool success;
- ulint err = DB_SUCCESS;
-
- ut_ad(mtr && id && rseg_hdr);
- ut_ad(mutex_own(&(rseg->mutex)));
-
- /* fputs(type == TRX_UNDO_INSERT
- ? "Creating insert undo log segment\n"
- : "Creating update undo log segment\n", stderr); */
- slot_no = trx_rsegf_undo_find_free(rseg_hdr, mtr);
-
- if (slot_no == ULINT_UNDEFINED) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: cannot find a free slot for"
- " an undo log. Do you have too\n"
- "InnoDB: many active transactions"
- " running concurrently?\n");
-
- return(DB_TOO_MANY_CONCURRENT_TRXS);
- }
-
- space = buf_frame_get_space_id(rseg_hdr);
-
- success = fsp_reserve_free_extents(&n_reserved, space, 2, FSP_UNDO,
- mtr);
- if (!success) {
-
- return(DB_OUT_OF_FILE_SPACE);
- }
-
- /* Allocate a new file segment for the undo log */
- *undo_page = fseg_create_general(space, 0,
- TRX_UNDO_SEG_HDR
- + TRX_UNDO_FSEG_HEADER, TRUE, mtr);
-
- fil_space_release_free_extents(space, n_reserved);
-
- if (*undo_page == NULL) {
- /* No space left */
-
- return(DB_OUT_OF_FILE_SPACE);
- }
-
-#ifdef UNIV_SYNC_DEBUG
- buf_page_dbg_add_level(*undo_page, SYNC_TRX_UNDO_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
-
- page_hdr = *undo_page + TRX_UNDO_PAGE_HDR;
- seg_hdr = *undo_page + TRX_UNDO_SEG_HDR;
-
- trx_undo_page_init(*undo_page, type, mtr);
-
- mlog_write_ulint(page_hdr + TRX_UNDO_PAGE_FREE,
- TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE,
- MLOG_2BYTES, mtr);
-
- mlog_write_ulint(seg_hdr + TRX_UNDO_LAST_LOG, 0, MLOG_2BYTES, mtr);
-
- flst_init(seg_hdr + TRX_UNDO_PAGE_LIST, mtr);
-
- flst_add_last(seg_hdr + TRX_UNDO_PAGE_LIST,
- page_hdr + TRX_UNDO_PAGE_NODE, mtr);
-
- trx_rsegf_set_nth_undo(rseg_hdr, slot_no,
- buf_frame_get_page_no(*undo_page), mtr);
-
- *id = slot_no;
-
- return(err);
-}
-
-/**************************************************************************
-Writes the mtr log entry of an undo log header initialization. */
-UNIV_INLINE
-void
-trx_undo_header_create_log(
-/*=======================*/
- page_t* undo_page, /* in: undo log header page */
- dulint trx_id, /* in: transaction id */
- mtr_t* mtr) /* in: mtr */
-{
- mlog_write_initial_log_record(undo_page, MLOG_UNDO_HDR_CREATE, mtr);
-
- mlog_catenate_dulint_compressed(mtr, trx_id);
-}
-
-/*******************************************************************
-Creates a new undo log header in file. NOTE that this function has its own
-log record type MLOG_UNDO_HDR_CREATE. You must NOT change the operation of
-this function! */
-static
-ulint
-trx_undo_header_create(
-/*===================*/
- /* out: header byte offset on page */
- page_t* undo_page, /* in: undo log segment header page,
- x-latched; it is assumed that there are
- TRX_UNDO_LOG_XA_HDR_SIZE bytes free space
- on it */
- dulint trx_id, /* in: transaction id */
- mtr_t* mtr) /* in: mtr */
-{
- trx_upagef_t* page_hdr;
- trx_usegf_t* seg_hdr;
- trx_ulogf_t* log_hdr;
- trx_ulogf_t* prev_log_hdr;
- ulint prev_log;
- ulint free;
- ulint new_free;
-
- ut_ad(mtr && undo_page);
-
- page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
- seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
-
- free = mach_read_from_2(page_hdr + TRX_UNDO_PAGE_FREE);
-
- log_hdr = undo_page + free;
-
- new_free = free + TRX_UNDO_LOG_OLD_HDR_SIZE;
-
- ut_a(free + TRX_UNDO_LOG_XA_HDR_SIZE < UNIV_PAGE_SIZE - 100);
-
- mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START, new_free);
-
- mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE, new_free);
-
- mach_write_to_2(seg_hdr + TRX_UNDO_STATE, TRX_UNDO_ACTIVE);
-
- prev_log = mach_read_from_2(seg_hdr + TRX_UNDO_LAST_LOG);
-
- if (prev_log != 0) {
- prev_log_hdr = undo_page + prev_log;
-
- mach_write_to_2(prev_log_hdr + TRX_UNDO_NEXT_LOG, free);
- }
-
- mach_write_to_2(seg_hdr + TRX_UNDO_LAST_LOG, free);
-
- log_hdr = undo_page + free;
-
- mach_write_to_2(log_hdr + TRX_UNDO_DEL_MARKS, TRUE);
-
- mach_write_to_8(log_hdr + TRX_UNDO_TRX_ID, trx_id);
- mach_write_to_2(log_hdr + TRX_UNDO_LOG_START, new_free);
-
- mach_write_to_1(log_hdr + TRX_UNDO_XID_EXISTS, FALSE);
- mach_write_to_1(log_hdr + TRX_UNDO_DICT_TRANS, FALSE);
-
- mach_write_to_2(log_hdr + TRX_UNDO_NEXT_LOG, 0);
- mach_write_to_2(log_hdr + TRX_UNDO_PREV_LOG, prev_log);
-
- /* Write the log record about the header creation */
- trx_undo_header_create_log(undo_page, trx_id, mtr);
-
- return(free);
-}
-
-/************************************************************************
-Write X/Open XA Transaction Identification (XID) to undo log header */
-static
-void
-trx_undo_write_xid(
-/*===============*/
- trx_ulogf_t* log_hdr,/* in: undo log header */
- const XID* xid, /* in: X/Open XA Transaction Identification */
- mtr_t* mtr) /* in: mtr */
-{
- mlog_write_ulint(log_hdr + TRX_UNDO_XA_FORMAT,
- (ulint)xid->formatID, MLOG_4BYTES, mtr);
-
- mlog_write_ulint(log_hdr + TRX_UNDO_XA_TRID_LEN,
- (ulint)xid->gtrid_length, MLOG_4BYTES, mtr);
-
- mlog_write_ulint(log_hdr + TRX_UNDO_XA_BQUAL_LEN,
- (ulint)xid->bqual_length, MLOG_4BYTES, mtr);
-
- mlog_write_string(log_hdr + TRX_UNDO_XA_XID, (const byte*) xid->data,
- XIDDATASIZE, mtr);
-}
-
-/************************************************************************
-Read X/Open XA Transaction Identification (XID) from undo log header */
-static
-void
-trx_undo_read_xid(
-/*==============*/
- trx_ulogf_t* log_hdr,/* in: undo log header */
- XID* xid) /* out: X/Open XA Transaction Identification */
-{
- xid->formatID = (long)mach_read_from_4(log_hdr + TRX_UNDO_XA_FORMAT);
-
- xid->gtrid_length
- = (long) mach_read_from_4(log_hdr + TRX_UNDO_XA_TRID_LEN);
- xid->bqual_length
- = (long) mach_read_from_4(log_hdr + TRX_UNDO_XA_BQUAL_LEN);
-
- memcpy(xid->data, log_hdr + TRX_UNDO_XA_XID, XIDDATASIZE);
-}
-
-/*******************************************************************
-Adds space for the XA XID after an undo log old-style header. */
-static
-void
-trx_undo_header_add_space_for_xid(
-/*==============================*/
- page_t* undo_page,/* in: undo log segment header page */
- trx_ulogf_t* log_hdr,/* in: undo log header */
- mtr_t* mtr) /* in: mtr */
-{
- trx_upagef_t* page_hdr;
- ulint free;
- ulint new_free;
-
- page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
-
- free = mach_read_from_2(page_hdr + TRX_UNDO_PAGE_FREE);
-
- /* free is now the end offset of the old style undo log header */
-
- ut_a(free == (ulint)(log_hdr - undo_page) + TRX_UNDO_LOG_OLD_HDR_SIZE);
-
- new_free = free + (TRX_UNDO_LOG_XA_HDR_SIZE
- - TRX_UNDO_LOG_OLD_HDR_SIZE);
-
- /* Add space for a XID after the header, update the free offset
- fields on the undo log page and in the undo log header */
-
- mlog_write_ulint(page_hdr + TRX_UNDO_PAGE_START, new_free,
- MLOG_2BYTES, mtr);
-
- mlog_write_ulint(page_hdr + TRX_UNDO_PAGE_FREE, new_free,
- MLOG_2BYTES, mtr);
-
- mlog_write_ulint(log_hdr + TRX_UNDO_LOG_START, new_free,
- MLOG_2BYTES, mtr);
-}
-
-/**************************************************************************
-Writes the mtr log entry of an undo log header reuse. */
-UNIV_INLINE
-void
-trx_undo_insert_header_reuse_log(
-/*=============================*/
- page_t* undo_page, /* in: undo log header page */
- dulint trx_id, /* in: transaction id */
- mtr_t* mtr) /* in: mtr */
-{
- mlog_write_initial_log_record(undo_page, MLOG_UNDO_HDR_REUSE, mtr);
-
- mlog_catenate_dulint_compressed(mtr, trx_id);
-}
-
-/***************************************************************
-Parses the redo log entry of an undo log page header create or reuse. */
-
-byte*
-trx_undo_parse_page_header(
-/*=======================*/
- /* out: end of log record or NULL */
- ulint type, /* in: MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr) /* in: mtr or NULL */
-{
- dulint trx_id;
-
- ptr = mach_dulint_parse_compressed(ptr, end_ptr, &trx_id);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- if (page) {
- if (type == MLOG_UNDO_HDR_CREATE) {
- trx_undo_header_create(page, trx_id, mtr);
- } else {
- ut_ad(type == MLOG_UNDO_HDR_REUSE);
- trx_undo_insert_header_reuse(page, trx_id, mtr);
- }
- }
-
- return(ptr);
-}
-
-/*******************************************************************
-Initializes a cached insert undo log header page for new use. NOTE that this
-function has its own log record type MLOG_UNDO_HDR_REUSE. You must NOT change
-the operation of this function! */
-static
-ulint
-trx_undo_insert_header_reuse(
-/*=========================*/
- /* out: undo log header byte offset on page */
- page_t* undo_page, /* in: insert undo log segment header page,
- x-latched */
- dulint trx_id, /* in: transaction id */
- mtr_t* mtr) /* in: mtr */
-{
- trx_upagef_t* page_hdr;
- trx_usegf_t* seg_hdr;
- trx_ulogf_t* log_hdr;
- ulint free;
- ulint new_free;
-
- ut_ad(mtr && undo_page);
-
- page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
- seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
-
- free = TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE;
-
- ut_a(free + TRX_UNDO_LOG_XA_HDR_SIZE < UNIV_PAGE_SIZE - 100);
-
- log_hdr = undo_page + free;
-
- new_free = free + TRX_UNDO_LOG_OLD_HDR_SIZE;
-
- /* Insert undo data is not needed after commit: we may free all
- the space on the page */
-
- ut_a(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_TYPE)
- == TRX_UNDO_INSERT);
-
- mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START, new_free);
-
- mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE, new_free);
-
- mach_write_to_2(seg_hdr + TRX_UNDO_STATE, TRX_UNDO_ACTIVE);
-
- log_hdr = undo_page + free;
-
- mach_write_to_8(log_hdr + TRX_UNDO_TRX_ID, trx_id);
- mach_write_to_2(log_hdr + TRX_UNDO_LOG_START, new_free);
-
- mach_write_to_1(log_hdr + TRX_UNDO_XID_EXISTS, FALSE);
- mach_write_to_1(log_hdr + TRX_UNDO_DICT_TRANS, FALSE);
-
- /* Write the log record MLOG_UNDO_HDR_REUSE */
- trx_undo_insert_header_reuse_log(undo_page, trx_id, mtr);
-
- return(free);
-}
-
-/**************************************************************************
-Writes the redo log entry of an update undo log header discard. */
-UNIV_INLINE
-void
-trx_undo_discard_latest_log(
-/*========================*/
- page_t* undo_page, /* in: undo log header page */
- mtr_t* mtr) /* in: mtr */
-{
- mlog_write_initial_log_record(undo_page, MLOG_UNDO_HDR_DISCARD, mtr);
-}
-
-/***************************************************************
-Parses the redo log entry of an undo log page header discard. */
-
-byte*
-trx_undo_parse_discard_latest(
-/*==========================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr __attribute__((unused)), /* in: buffer end */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr) /* in: mtr or NULL */
-{
- ut_ad(end_ptr);
-
- if (page) {
- trx_undo_discard_latest_update_undo(page, mtr);
- }
-
- return(ptr);
-}
-
-/**************************************************************************
-If an update undo log can be discarded immediately, this function frees the
-space, resetting the page to the proper state for caching. */
-static
-void
-trx_undo_discard_latest_update_undo(
-/*================================*/
- page_t* undo_page, /* in: header page of an undo log of size 1 */
- mtr_t* mtr) /* in: mtr */
-{
- trx_usegf_t* seg_hdr;
- trx_upagef_t* page_hdr;
- trx_ulogf_t* log_hdr;
- trx_ulogf_t* prev_log_hdr;
- ulint free;
- ulint prev_hdr_offset;
-
- seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
- page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
-
- free = mach_read_from_2(seg_hdr + TRX_UNDO_LAST_LOG);
- log_hdr = undo_page + free;
-
- prev_hdr_offset = mach_read_from_2(log_hdr + TRX_UNDO_PREV_LOG);
-
- if (prev_hdr_offset != 0) {
- prev_log_hdr = undo_page + prev_hdr_offset;
-
- mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START,
- mach_read_from_2(prev_log_hdr
- + TRX_UNDO_LOG_START));
- mach_write_to_2(prev_log_hdr + TRX_UNDO_NEXT_LOG, 0);
- }
-
- mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE, free);
-
- mach_write_to_2(seg_hdr + TRX_UNDO_STATE, TRX_UNDO_CACHED);
- mach_write_to_2(seg_hdr + TRX_UNDO_LAST_LOG, prev_hdr_offset);
-
- trx_undo_discard_latest_log(undo_page, mtr);
-}
-
-/************************************************************************
-Tries to add a page to the undo log segment where the undo log is placed. */
-
-ulint
-trx_undo_add_page(
-/*==============*/
- /* out: page number if success, else
- FIL_NULL */
- trx_t* trx, /* in: transaction */
- trx_undo_t* undo, /* in: undo log memory object */
- mtr_t* mtr) /* in: mtr which does not have a latch to any
- undo log page; the caller must have reserved
- the rollback segment mutex */
-{
- page_t* header_page;
- page_t* new_page;
- trx_rseg_t* rseg;
- ulint page_no;
- ulint n_reserved;
- ibool success;
-
- ut_ad(mutex_own(&(trx->undo_mutex)));
- ut_ad(!mutex_own(&kernel_mutex));
- ut_ad(mutex_own(&(trx->rseg->mutex)));
-
- rseg = trx->rseg;
-
- if (rseg->curr_size == rseg->max_size) {
-
- return(FIL_NULL);
- }
-
- header_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr);
-
- success = fsp_reserve_free_extents(&n_reserved, undo->space, 1,
- FSP_UNDO, mtr);
- if (!success) {
-
- return(FIL_NULL);
- }
-
- page_no = fseg_alloc_free_page_general(header_page + TRX_UNDO_SEG_HDR
- + TRX_UNDO_FSEG_HEADER,
- undo->top_page_no + 1, FSP_UP,
- TRUE, mtr);
-
- fil_space_release_free_extents(undo->space, n_reserved);
-
- if (page_no == FIL_NULL) {
-
- /* No space left */
-
- return(FIL_NULL);
- }
-
- undo->last_page_no = page_no;
-
- new_page = trx_undo_page_get(undo->space, page_no, mtr);
-
- trx_undo_page_init(new_page, undo->type, mtr);
-
- flst_add_last(header_page + TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST,
- new_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE, mtr);
- undo->size++;
- rseg->curr_size++;
-
- return(page_no);
-}
-
-/************************************************************************
-Frees an undo log page that is not the header page. */
-static
-ulint
-trx_undo_free_page(
-/*===============*/
- /* out: last page number in remaining log */
- trx_rseg_t* rseg, /* in: rollback segment */
- ibool in_history, /* in: TRUE if the undo log is in the history
- list */
- ulint space, /* in: space */
- ulint hdr_page_no, /* in: header page number */
- ulint page_no, /* in: page number to free: must not be the
- header page */
- mtr_t* mtr) /* in: mtr which does not have a latch to any
- undo log page; the caller must have reserved
- the rollback segment mutex */
-{
- page_t* header_page;
- page_t* undo_page;
- fil_addr_t last_addr;
- trx_rsegf_t* rseg_header;
- ulint hist_size;
-
- ut_a(hdr_page_no != page_no);
- ut_ad(!mutex_own(&kernel_mutex));
- ut_ad(mutex_own(&(rseg->mutex)));
-
- undo_page = trx_undo_page_get(space, page_no, mtr);
-
- header_page = trx_undo_page_get(space, hdr_page_no, mtr);
-
- flst_remove(header_page + TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST,
- undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE, mtr);
-
- fseg_free_page(header_page + TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER,
- space, page_no, mtr);
-
- last_addr = flst_get_last(header_page + TRX_UNDO_SEG_HDR
- + TRX_UNDO_PAGE_LIST, mtr);
- rseg->curr_size--;
-
- if (in_history) {
- rseg_header = trx_rsegf_get(space, rseg->page_no, mtr);
-
- hist_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE,
- MLOG_4BYTES, mtr);
- ut_ad(hist_size > 0);
- mlog_write_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE,
- hist_size - 1, MLOG_4BYTES, mtr);
- }
-
- return(last_addr.page);
-}
-
-/************************************************************************
-Frees an undo log page when there is also the memory object for the undo
-log. */
-static
-void
-trx_undo_free_page_in_rollback(
-/*===========================*/
- trx_t* trx __attribute__((unused)), /* in: transaction */
- trx_undo_t* undo, /* in: undo log memory copy */
- ulint page_no,/* in: page number to free: must not be the
- header page */
- mtr_t* mtr) /* in: mtr which does not have a latch to any
- undo log page; the caller must have reserved
- the rollback segment mutex */
-{
- ulint last_page_no;
-
- ut_ad(undo->hdr_page_no != page_no);
- ut_ad(mutex_own(&(trx->undo_mutex)));
-
- last_page_no = trx_undo_free_page(undo->rseg, FALSE, undo->space,
- undo->hdr_page_no, page_no, mtr);
-
- undo->last_page_no = last_page_no;
- undo->size--;
-}
-
-/************************************************************************
-Empties an undo log header page of undo records for that undo log. Other
-undo logs may still have records on that page, if it is an update undo log. */
-static
-void
-trx_undo_empty_header_page(
-/*=======================*/
- ulint space, /* in: space */
- ulint hdr_page_no, /* in: header page number */
- ulint hdr_offset, /* in: header offset */
- mtr_t* mtr) /* in: mtr */
-{
- page_t* header_page;
- trx_ulogf_t* log_hdr;
- ulint end;
-
- header_page = trx_undo_page_get(space, hdr_page_no, mtr);
-
- log_hdr = header_page + hdr_offset;
-
- end = trx_undo_page_get_end(header_page, hdr_page_no, hdr_offset);
-
- mlog_write_ulint(log_hdr + TRX_UNDO_LOG_START, end, MLOG_2BYTES, mtr);
-}
-
-/***************************************************************************
-Truncates an undo log from the end. This function is used during a rollback
-to free space from an undo log. */
-
-void
-trx_undo_truncate_end(
-/*==================*/
- trx_t* trx, /* in: transaction whose undo log it is */
- trx_undo_t* undo, /* in: undo log */
- dulint limit) /* in: all undo records with undo number
- >= this value should be truncated */
-{
- page_t* undo_page;
- ulint last_page_no;
- trx_undo_rec_t* rec;
- trx_undo_rec_t* trunc_here;
- trx_rseg_t* rseg;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(trx->undo_mutex)));
- ut_ad(mutex_own(&(trx->rseg->mutex)));
-
- rseg = trx->rseg;
-
- for (;;) {
- mtr_start(&mtr);
-
- trunc_here = NULL;
-
- last_page_no = undo->last_page_no;
-
- undo_page = trx_undo_page_get(undo->space, last_page_no, &mtr);
-
- rec = trx_undo_page_get_last_rec(undo_page, undo->hdr_page_no,
- undo->hdr_offset);
- for (;;) {
- if (rec == NULL) {
- if (last_page_no == undo->hdr_page_no) {
-
- goto function_exit;
- }
-
- trx_undo_free_page_in_rollback(
- trx, undo, last_page_no, &mtr);
- break;
- }
-
- if (ut_dulint_cmp(trx_undo_rec_get_undo_no(rec), limit)
- >= 0) {
- /* Truncate at least this record off, maybe
- more */
- trunc_here = rec;
- } else {
- goto function_exit;
- }
-
- rec = trx_undo_page_get_prev_rec(rec,
- undo->hdr_page_no,
- undo->hdr_offset);
- }
-
- mtr_commit(&mtr);
- }
-
-function_exit:
- if (trunc_here) {
- mlog_write_ulint(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_FREE,
- trunc_here - undo_page, MLOG_2BYTES, &mtr);
- }
-
- mtr_commit(&mtr);
-}
-
-/***************************************************************************
-Truncates an undo log from the start. This function is used during a purge
-operation. */
-
-void
-trx_undo_truncate_start(
-/*====================*/
- trx_rseg_t* rseg, /* in: rollback segment */
- ulint space, /* in: space id of the log */
- ulint hdr_page_no, /* in: header page number */
- ulint hdr_offset, /* in: header offset on the page */
- dulint limit) /* in: all undo pages with undo numbers <
- this value should be truncated; NOTE that
- the function only frees whole pages; the
- header page is not freed, but emptied, if
- all the records there are < limit */
-{
- page_t* undo_page;
- trx_undo_rec_t* rec;
- trx_undo_rec_t* last_rec;
- ulint page_no;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(rseg->mutex)));
-
- if (0 == ut_dulint_cmp(limit, ut_dulint_zero)) {
-
- return;
- }
-loop:
- mtr_start(&mtr);
-
- rec = trx_undo_get_first_rec(space, hdr_page_no, hdr_offset,
- RW_X_LATCH, &mtr);
- if (rec == NULL) {
- /* Already empty */
-
- mtr_commit(&mtr);
-
- return;
- }
-
- undo_page = buf_frame_align(rec);
-
- last_rec = trx_undo_page_get_last_rec(undo_page, hdr_page_no,
- hdr_offset);
- if (ut_dulint_cmp(trx_undo_rec_get_undo_no(last_rec), limit) >= 0) {
-
- mtr_commit(&mtr);
-
- return;
- }
-
- page_no = buf_frame_get_page_no(undo_page);
-
- if (page_no == hdr_page_no) {
- trx_undo_empty_header_page(space, hdr_page_no, hdr_offset,
- &mtr);
- } else {
- trx_undo_free_page(rseg, TRUE, space, hdr_page_no,
- page_no, &mtr);
- }
-
- mtr_commit(&mtr);
-
- goto loop;
-}
-
-/**************************************************************************
-Frees an undo log segment which is not in the history list. */
-static
-void
-trx_undo_seg_free(
-/*==============*/
- trx_undo_t* undo) /* in: undo log */
-{
- trx_rseg_t* rseg;
- fseg_header_t* file_seg;
- trx_rsegf_t* rseg_header;
- trx_usegf_t* seg_header;
- ibool finished;
- mtr_t mtr;
-
- finished = FALSE;
- rseg = undo->rseg;
-
- while (!finished) {
-
- mtr_start(&mtr);
-
- ut_ad(!mutex_own(&kernel_mutex));
-
- mutex_enter(&(rseg->mutex));
-
- seg_header = trx_undo_page_get(undo->space, undo->hdr_page_no,
- &mtr) + TRX_UNDO_SEG_HDR;
-
- file_seg = seg_header + TRX_UNDO_FSEG_HEADER;
-
- finished = fseg_free_step(file_seg, &mtr);
-
- if (finished) {
- /* Update the rseg header */
- rseg_header = trx_rsegf_get(rseg->space, rseg->page_no,
- &mtr);
- trx_rsegf_set_nth_undo(rseg_header, undo->id, FIL_NULL,
- &mtr);
- }
-
- mutex_exit(&(rseg->mutex));
- mtr_commit(&mtr);
- }
-}
-
-/*========== UNDO LOG MEMORY COPY INITIALIZATION =====================*/
-
-/************************************************************************
-Creates and initializes an undo log memory object according to the values
-in the header in file, when the database is started. The memory object is
-inserted in the appropriate list of rseg. */
-static
-trx_undo_t*
-trx_undo_mem_create_at_db_start(
-/*============================*/
- /* out, own: the undo log memory object */
- trx_rseg_t* rseg, /* in: rollback segment memory object */
- ulint id, /* in: slot index within rseg */
- ulint page_no,/* in: undo log segment page number */
- mtr_t* mtr) /* in: mtr */
-{
- page_t* undo_page;
- trx_upagef_t* page_header;
- trx_usegf_t* seg_header;
- trx_ulogf_t* undo_header;
- trx_undo_t* undo;
- ulint type;
- ulint state;
- dulint trx_id;
- ulint offset;
- fil_addr_t last_addr;
- page_t* last_page;
- trx_undo_rec_t* rec;
- XID xid;
- ibool xid_exists = FALSE;
-
- if (id >= TRX_RSEG_N_SLOTS) {
- fprintf(stderr,
- "InnoDB: Error: undo->id is %lu\n", (ulong) id);
- ut_error;
- }
-
- undo_page = trx_undo_page_get(rseg->space, page_no, mtr);
-
- page_header = undo_page + TRX_UNDO_PAGE_HDR;
-
- type = mtr_read_ulint(page_header + TRX_UNDO_PAGE_TYPE, MLOG_2BYTES,
- mtr);
- seg_header = undo_page + TRX_UNDO_SEG_HDR;
-
- state = mach_read_from_2(seg_header + TRX_UNDO_STATE);
-
- offset = mach_read_from_2(seg_header + TRX_UNDO_LAST_LOG);
-
- undo_header = undo_page + offset;
-
- trx_id = mtr_read_dulint(undo_header + TRX_UNDO_TRX_ID, mtr);
-
- xid_exists = mtr_read_ulint(undo_header + TRX_UNDO_XID_EXISTS,
- MLOG_1BYTE, mtr);
-
- /* Read X/Open XA transaction identification if it exists, or
- set it to NULL. */
-
- memset(&xid, 0, sizeof(xid));
- xid.formatID = -1;
-
- if (xid_exists == TRUE) {
- trx_undo_read_xid(undo_header, &xid);
- }
-
- mutex_enter(&(rseg->mutex));
-
- undo = trx_undo_mem_create(rseg, id, type, trx_id, &xid,
- page_no, offset);
- mutex_exit(&(rseg->mutex));
-
- undo->dict_operation = mtr_read_ulint(
- undo_header + TRX_UNDO_DICT_TRANS, MLOG_1BYTE, mtr);
-
- undo->table_id = mtr_read_dulint(undo_header + TRX_UNDO_TABLE_ID, mtr);
- undo->state = state;
- undo->size = flst_get_len(seg_header + TRX_UNDO_PAGE_LIST, mtr);
-
- /* If the log segment is being freed, the page list is inconsistent! */
- if (state == TRX_UNDO_TO_FREE) {
-
- goto add_to_list;
- }
-
- last_addr = flst_get_last(seg_header + TRX_UNDO_PAGE_LIST, mtr);
-
- undo->last_page_no = last_addr.page;
- undo->top_page_no = last_addr.page;
-
- last_page = trx_undo_page_get(rseg->space, undo->last_page_no, mtr);
-
- rec = trx_undo_page_get_last_rec(last_page, page_no, offset);
-
- if (rec == NULL) {
- undo->empty = TRUE;
- } else {
- undo->empty = FALSE;
- undo->top_offset = rec - last_page;
- undo->top_undo_no = trx_undo_rec_get_undo_no(rec);
- }
-add_to_list:
- if (type == TRX_UNDO_INSERT) {
- if (state != TRX_UNDO_CACHED) {
- UT_LIST_ADD_LAST(undo_list, rseg->insert_undo_list,
- undo);
- } else {
- UT_LIST_ADD_LAST(undo_list, rseg->insert_undo_cached,
- undo);
- }
- } else {
- ut_ad(type == TRX_UNDO_UPDATE);
- if (state != TRX_UNDO_CACHED) {
- UT_LIST_ADD_LAST(undo_list, rseg->update_undo_list,
- undo);
- } else {
- UT_LIST_ADD_LAST(undo_list, rseg->update_undo_cached,
- undo);
- }
- }
-
- return(undo);
-}
-
-/************************************************************************
-Initializes the undo log lists for a rollback segment memory copy. This
-function is only called when the database is started or a new rollback
-segment is created. */
-
-ulint
-trx_undo_lists_init(
-/*================*/
- /* out: the combined size of undo log segments
- in pages */
- trx_rseg_t* rseg) /* in: rollback segment memory object */
-{
- ulint page_no;
- trx_undo_t* undo;
- ulint size = 0;
- trx_rsegf_t* rseg_header;
- ulint i;
- mtr_t mtr;
-
- UT_LIST_INIT(rseg->update_undo_list);
- UT_LIST_INIT(rseg->update_undo_cached);
- UT_LIST_INIT(rseg->insert_undo_list);
- UT_LIST_INIT(rseg->insert_undo_cached);
-
- mtr_start(&mtr);
-
- rseg_header = trx_rsegf_get_new(rseg->space, rseg->page_no, &mtr);
-
- for (i = 0; i < TRX_RSEG_N_SLOTS; i++) {
- page_no = trx_rsegf_get_nth_undo(rseg_header, i, &mtr);
-
- /* In forced recovery: try to avoid operations which look
- at database pages; undo logs are rapidly changing data, and
- the probability that they are in an inconsistent state is
- high */
-
- if (page_no != FIL_NULL
- && srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) {
-
- undo = trx_undo_mem_create_at_db_start(rseg, i,
- page_no, &mtr);
- size += undo->size;
-
- mtr_commit(&mtr);
-
- mtr_start(&mtr);
-
- rseg_header = trx_rsegf_get(rseg->space,
- rseg->page_no, &mtr);
- }
- }
-
- mtr_commit(&mtr);
-
- return(size);
-}
-
-/************************************************************************
-Creates and initializes an undo log memory object. */
-static
-trx_undo_t*
-trx_undo_mem_create(
-/*================*/
- /* out, own: the undo log memory object */
- trx_rseg_t* rseg, /* in: rollback segment memory object */
- ulint id, /* in: slot index within rseg */
- ulint type, /* in: type of the log: TRX_UNDO_INSERT or
- TRX_UNDO_UPDATE */
- dulint trx_id, /* in: id of the trx for which the undo log
- is created */
- XID* xid, /* in: X/Open transaction identification */
- ulint page_no,/* in: undo log header page number */
- ulint offset) /* in: undo log header byte offset on page */
-{
- trx_undo_t* undo;
-
- ut_ad(mutex_own(&(rseg->mutex)));
-
- if (id >= TRX_RSEG_N_SLOTS) {
- fprintf(stderr,
- "InnoDB: Error: undo->id is %lu\n", (ulong) id);
- ut_error;
- }
-
- undo = mem_alloc(sizeof(trx_undo_t));
-
- if (undo == NULL) {
-
- return NULL;
- }
-
- undo->id = id;
- undo->type = type;
- undo->state = TRX_UNDO_ACTIVE;
- undo->del_marks = FALSE;
- undo->trx_id = trx_id;
- undo->xid = *xid;
-
- undo->dict_operation = FALSE;
-
- undo->rseg = rseg;
-
- undo->space = rseg->space;
- undo->hdr_page_no = page_no;
- undo->hdr_offset = offset;
- undo->last_page_no = page_no;
- undo->size = 1;
-
- undo->empty = TRUE;
- undo->top_page_no = page_no;
- undo->guess_page = NULL;
-
- return(undo);
-}
-
-/************************************************************************
-Initializes a cached undo log object for new use. */
-static
-void
-trx_undo_mem_init_for_reuse(
-/*========================*/
- trx_undo_t* undo, /* in: undo log to init */
- dulint trx_id, /* in: id of the trx for which the undo log
- is created */
- XID* xid, /* in: X/Open XA transaction identification*/
- ulint offset) /* in: undo log header byte offset on page */
-{
- ut_ad(mutex_own(&((undo->rseg)->mutex)));
-
- if (UNIV_UNLIKELY(undo->id >= TRX_RSEG_N_SLOTS)) {
- fprintf(stderr, "InnoDB: Error: undo->id is %lu\n",
- (ulong) undo->id);
-
- mem_analyze_corruption(undo);
- ut_error;
- }
-
- undo->state = TRX_UNDO_ACTIVE;
- undo->del_marks = FALSE;
- undo->trx_id = trx_id;
- undo->xid = *xid;
-
- undo->dict_operation = FALSE;
-
- undo->hdr_offset = offset;
- undo->empty = TRUE;
-}
-
-/************************************************************************
-Frees an undo log memory copy. */
-static
-void
-trx_undo_mem_free(
-/*==============*/
- trx_undo_t* undo) /* in: the undo object to be freed */
-{
- if (undo->id >= TRX_RSEG_N_SLOTS) {
- fprintf(stderr,
- "InnoDB: Error: undo->id is %lu\n", (ulong) undo->id);
- ut_error;
- }
-
- mem_free(undo);
-}
-
-/**************************************************************************
-Creates a new undo log. */
-static
-ulint
-trx_undo_create(
-/*============*/
- /* out: DB_SUCCESS if successful in creating
- the new undo lob object, possible error
- codes are:
- DB_TOO_MANY_CONCURRENT_TRXS
- DB_OUT_OF_FILE_SPACE
- DB_OUT_OF_MEMORY*/
- trx_t* trx, /* in: transaction */
- trx_rseg_t* rseg, /* in: rollback segment memory copy */
- ulint type, /* in: type of the log: TRX_UNDO_INSERT or
- TRX_UNDO_UPDATE */
- dulint trx_id, /* in: id of the trx for which the undo log
- is created */
- XID* xid, /* in: X/Open transaction identification*/
- trx_undo_t** undo, /* out: the new undo log object, undefined
- * if did not succeed */
- mtr_t* mtr) /* in: mtr */
-{
- trx_rsegf_t* rseg_header;
- ulint page_no;
- ulint offset;
- ulint id;
- page_t* undo_page;
- ulint err;
-
- ut_ad(mutex_own(&(rseg->mutex)));
-
- if (rseg->curr_size == rseg->max_size) {
-
- return(DB_OUT_OF_FILE_SPACE);
- }
-
- rseg->curr_size++;
-
- rseg_header = trx_rsegf_get(rseg->space, rseg->page_no, mtr);
-
- err = trx_undo_seg_create(rseg, rseg_header, type, &id,
- &undo_page, mtr);
-
- if (err != DB_SUCCESS) {
- /* Did not succeed */
-
- rseg->curr_size--;
-
- return(err);
- }
-
- page_no = buf_frame_get_page_no(undo_page);
-
- offset = trx_undo_header_create(undo_page, trx_id, mtr);
-
- if (trx->support_xa) {
- trx_undo_header_add_space_for_xid(undo_page,
- undo_page + offset, mtr);
- }
-
- *undo = trx_undo_mem_create(rseg, id, type, trx_id, xid,
- page_no, offset);
- if (*undo == NULL) {
-
- err = DB_OUT_OF_MEMORY;
- }
-
- return(err);
-}
-
-/*================ UNDO LOG ASSIGNMENT AND CLEANUP =====================*/
-
-/************************************************************************
-Reuses a cached undo log. */
-static
-trx_undo_t*
-trx_undo_reuse_cached(
-/*==================*/
- /* out: the undo log memory object, NULL if
- none cached */
- trx_t* trx, /* in: transaction */
- trx_rseg_t* rseg, /* in: rollback segment memory object */
- ulint type, /* in: type of the log: TRX_UNDO_INSERT or
- TRX_UNDO_UPDATE */
- dulint trx_id, /* in: id of the trx for which the undo log
- is used */
- XID* xid, /* in: X/Open XA transaction identification */
- mtr_t* mtr) /* in: mtr */
-{
- trx_undo_t* undo;
- page_t* undo_page;
- ulint offset;
-
- ut_ad(mutex_own(&(rseg->mutex)));
-
- if (type == TRX_UNDO_INSERT) {
-
- undo = UT_LIST_GET_FIRST(rseg->insert_undo_cached);
- if (undo == NULL) {
-
- return(NULL);
- }
-
- UT_LIST_REMOVE(undo_list, rseg->insert_undo_cached, undo);
- } else {
- ut_ad(type == TRX_UNDO_UPDATE);
-
- undo = UT_LIST_GET_FIRST(rseg->update_undo_cached);
- if (undo == NULL) {
-
- return(NULL);
- }
-
- UT_LIST_REMOVE(undo_list, rseg->update_undo_cached, undo);
- }
-
- ut_ad(undo->size == 1);
-
- if (undo->id >= TRX_RSEG_N_SLOTS) {
- fprintf(stderr, "InnoDB: Error: undo->id is %lu\n",
- (ulong) undo->id);
- mem_analyze_corruption(undo);
- ut_error;
- }
-
- undo_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr);
-
- if (type == TRX_UNDO_INSERT) {
- offset = trx_undo_insert_header_reuse(undo_page, trx_id, mtr);
-
- if (trx->support_xa) {
- trx_undo_header_add_space_for_xid(
- undo_page, undo_page + offset, mtr);
- }
- } else {
- ut_a(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_TYPE)
- == TRX_UNDO_UPDATE);
-
- offset = trx_undo_header_create(undo_page, trx_id, mtr);
-
- if (trx->support_xa) {
- trx_undo_header_add_space_for_xid(
- undo_page, undo_page + offset, mtr);
- }
- }
-
- trx_undo_mem_init_for_reuse(undo, trx_id, xid, offset);
-
- return(undo);
-}
-
-/**************************************************************************
-Marks an undo log header as a header of a data dictionary operation
-transaction. */
-static
-void
-trx_undo_mark_as_dict_operation(
-/*============================*/
- trx_t* trx, /* in: dict op transaction */
- trx_undo_t* undo, /* in: assigned undo log */
- mtr_t* mtr) /* in: mtr */
-{
- page_t* hdr_page;
-
- ut_a(trx->dict_operation);
-
- hdr_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr);
-
- mlog_write_ulint(hdr_page + undo->hdr_offset
- + TRX_UNDO_DICT_TRANS,
- trx->dict_operation, MLOG_1BYTE, mtr);
-
- mlog_write_dulint(hdr_page + undo->hdr_offset + TRX_UNDO_TABLE_ID,
- trx->table_id, mtr);
-
- undo->dict_operation = trx->dict_operation;
- undo->table_id = trx->table_id;
-}
-
-/**************************************************************************
-Assigns an undo log for a transaction. A new undo log is created or a cached
-undo log reused. */
-
-ulint
-trx_undo_assign_undo(
-/*=================*/
- /* out: DB_SUCCESS if undo log assign
- successful, possible error codes are:
- DD_TOO_MANY_CONCURRENT_TRXS
- DB_OUT_OF_FILE_SPACE DB_OUT_OF_MEMORY*/
- trx_t* trx, /* in: transaction */
- ulint type) /* in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */
-{
- trx_rseg_t* rseg;
- trx_undo_t* undo;
- mtr_t mtr;
- ulint err = DB_SUCCESS;
-
- ut_ad(trx);
- ut_ad(trx->rseg);
-
- rseg = trx->rseg;
-
- ut_ad(mutex_own(&(trx->undo_mutex)));
-
- mtr_start(&mtr);
-
- ut_ad(!mutex_own(&kernel_mutex));
-
- mutex_enter(&(rseg->mutex));
-
- undo = trx_undo_reuse_cached(trx, rseg, type, trx->id, &trx->xid,
- &mtr);
- if (undo == NULL) {
- err = trx_undo_create(trx, rseg, type, trx->id, &trx->xid,
- &undo, &mtr);
- if (err != DB_SUCCESS) {
-
- goto func_exit;
- }
- }
-
- if (type == TRX_UNDO_INSERT) {
- UT_LIST_ADD_FIRST(undo_list, rseg->insert_undo_list, undo);
- ut_ad(trx->insert_undo == NULL);
- trx->insert_undo = undo;
- } else {
- UT_LIST_ADD_FIRST(undo_list, rseg->update_undo_list, undo);
- ut_ad(trx->update_undo == NULL);
- trx->update_undo = undo;
- }
-
- if (trx->dict_operation) {
- trx_undo_mark_as_dict_operation(trx, undo, &mtr);
- }
-
-func_exit:
- mutex_exit(&(rseg->mutex));
- mtr_commit(&mtr);
-
- return err;
-}
-
-/**********************************************************************
-Sets the state of the undo log segment at a transaction finish. */
-
-page_t*
-trx_undo_set_state_at_finish(
-/*=========================*/
- /* out: undo log segment header page,
- x-latched */
- trx_rseg_t* rseg, /* in: rollback segment memory object */
- trx_t* trx __attribute__((unused)), /* in: transaction */
- trx_undo_t* undo, /* in: undo log memory copy */
- mtr_t* mtr) /* in: mtr */
-{
- trx_usegf_t* seg_hdr;
- trx_upagef_t* page_hdr;
- page_t* undo_page;
- ulint state;
-
- ut_ad(trx);
- ut_ad(undo);
- ut_ad(mtr);
- ut_ad(mutex_own(&rseg->mutex));
-
- if (undo->id >= TRX_RSEG_N_SLOTS) {
- fprintf(stderr, "InnoDB: Error: undo->id is %lu\n",
- (ulong) undo->id);
- mem_analyze_corruption(undo);
- ut_error;
- }
-
- undo_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr);
-
- seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
- page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
-
- if (undo->size == 1
- && mach_read_from_2(page_hdr + TRX_UNDO_PAGE_FREE)
- < TRX_UNDO_PAGE_REUSE_LIMIT) {
-
- /* This is a heuristic to avoid the problem of all UNDO
- slots ending up in one of the UNDO lists. Previously if
- the server crashed with all the slots in one of the lists,
- transactions that required the slots of a different type
- would fail for lack of slots. */
-
- if (UT_LIST_GET_LEN(rseg->update_undo_list) < 500
- && UT_LIST_GET_LEN(rseg->insert_undo_list) < 500) {
-
- state = TRX_UNDO_CACHED;
- } else {
- state = TRX_UNDO_TO_FREE;
- }
-
- } else if (undo->type == TRX_UNDO_INSERT) {
-
- state = TRX_UNDO_TO_FREE;
- } else {
- state = TRX_UNDO_TO_PURGE;
- }
-
- undo->state = state;
-
- mlog_write_ulint(seg_hdr + TRX_UNDO_STATE, state, MLOG_2BYTES, mtr);
-
- return(undo_page);
-}
-
-/**********************************************************************
-Sets the state of the undo log segment at a transaction prepare. */
-
-page_t*
-trx_undo_set_state_at_prepare(
-/*==========================*/
- /* out: undo log segment header page,
- x-latched */
- trx_t* trx, /* in: transaction */
- trx_undo_t* undo, /* in: undo log memory copy */
- mtr_t* mtr) /* in: mtr */
-{
- trx_usegf_t* seg_hdr;
- trx_upagef_t* page_hdr;
- trx_ulogf_t* undo_header;
- page_t* undo_page;
- ulint offset;
-
- ut_ad(trx && undo && mtr);
-
- if (undo->id >= TRX_RSEG_N_SLOTS) {
- fprintf(stderr, "InnoDB: Error: undo->id is %lu\n",
- (ulong) undo->id);
- mem_analyze_corruption(undo);
- ut_error;
- }
-
- undo_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr);
-
- seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
- page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
-
- /*------------------------------*/
- undo->state = TRX_UNDO_PREPARED;
- undo->xid = trx->xid;
- /*------------------------------*/
-
- mlog_write_ulint(seg_hdr + TRX_UNDO_STATE, undo->state,
- MLOG_2BYTES, mtr);
-
- offset = mach_read_from_2(seg_hdr + TRX_UNDO_LAST_LOG);
- undo_header = undo_page + offset;
-
- mlog_write_ulint(undo_header + TRX_UNDO_XID_EXISTS,
- TRUE, MLOG_1BYTE, mtr);
-
- trx_undo_write_xid(undo_header, &undo->xid, mtr);
-
- return(undo_page);
-}
-
-/**************************************************************************
-Adds the update undo log header as the first in the history list, and
-frees the memory object, or puts it to the list of cached update undo log
-segments. */
-
-void
-trx_undo_update_cleanup(
-/*====================*/
- trx_t* trx, /* in: trx owning the update undo log */
- page_t* undo_page, /* in: update undo log header page,
- x-latched */
- mtr_t* mtr) /* in: mtr */
-{
- trx_rseg_t* rseg;
- trx_undo_t* undo;
-
- undo = trx->update_undo;
- rseg = trx->rseg;
-
- ut_ad(mutex_own(&(rseg->mutex)));
-
- trx_purge_add_update_undo_to_history(trx, undo_page, mtr);
-
- UT_LIST_REMOVE(undo_list, rseg->update_undo_list, undo);
-
- trx->update_undo = NULL;
-
- if (undo->state == TRX_UNDO_CACHED) {
-
- UT_LIST_ADD_FIRST(undo_list, rseg->update_undo_cached, undo);
- } else {
- ut_ad(undo->state == TRX_UNDO_TO_PURGE);
-
- trx_undo_mem_free(undo);
- }
-}
-
-/**********************************************************************
-Frees or caches an insert undo log after a transaction commit or rollback.
-Knowledge of inserts is not needed after a commit or rollback, therefore
-the data can be discarded. */
-
-void
-trx_undo_insert_cleanup(
-/*====================*/
- trx_t* trx) /* in: transaction handle */
-{
- trx_undo_t* undo;
- trx_rseg_t* rseg;
-
- undo = trx->insert_undo;
- ut_ad(undo);
-
- rseg = trx->rseg;
-
- mutex_enter(&(rseg->mutex));
-
- UT_LIST_REMOVE(undo_list, rseg->insert_undo_list, undo);
- trx->insert_undo = NULL;
-
- if (undo->state == TRX_UNDO_CACHED) {
-
- UT_LIST_ADD_FIRST(undo_list, rseg->insert_undo_cached, undo);
- } else {
- ut_ad(undo->state == TRX_UNDO_TO_FREE);
-
- /* Delete first the undo log segment in the file */
-
- mutex_exit(&(rseg->mutex));
-
- trx_undo_seg_free(undo);
-
- mutex_enter(&(rseg->mutex));
-
- ut_ad(rseg->curr_size > undo->size);
-
- rseg->curr_size -= undo->size;
-
- trx_undo_mem_free(undo);
- }
-
- mutex_exit(&(rseg->mutex));
-}
diff --git a/storage/innobase/usr/usr0sess.c b/storage/innobase/usr/usr0sess.c
deleted file mode 100644
index 3740c05eaab..00000000000
--- a/storage/innobase/usr/usr0sess.c
+++ /dev/null
@@ -1,81 +0,0 @@
-/******************************************************
-Sessions
-
-(c) 1996 Innobase Oy
-
-Created 6/25/1996 Heikki Tuuri
-*******************************************************/
-
-#include "usr0sess.h"
-
-#ifdef UNIV_NONINL
-#include "usr0sess.ic"
-#endif
-
-#include "trx0trx.h"
-
-/*************************************************************************
-Closes a session, freeing the memory occupied by it. */
-static
-void
-sess_close(
-/*=======*/
- sess_t* sess); /* in, own: session object */
-
-/*************************************************************************
-Opens a session. */
-
-sess_t*
-sess_open(void)
-/*===========*/
- /* out, own: session object */
-{
- sess_t* sess;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- sess = mem_alloc(sizeof(sess_t));
-
- sess->state = SESS_ACTIVE;
-
- sess->trx = trx_create(sess);
-
- UT_LIST_INIT(sess->graphs);
-
- return(sess);
-}
-
-/*************************************************************************
-Closes a session, freeing the memory occupied by it. */
-static
-void
-sess_close(
-/*=======*/
- sess_t* sess) /* in, own: session object */
-{
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(sess->trx == NULL);
-
- mem_free(sess);
-}
-
-/*************************************************************************
-Closes a session, freeing the memory occupied by it, if it is in a state
-where it should be closed. */
-
-ibool
-sess_try_close(
-/*===========*/
- /* out: TRUE if closed */
- sess_t* sess) /* in, own: session object */
-{
- ut_ad(mutex_own(&kernel_mutex));
-
- if (UT_LIST_GET_LEN(sess->graphs) == 0) {
- sess_close(sess);
-
- return(TRUE);
- }
-
- return(FALSE);
-}
diff --git a/storage/innobase/ut/ut0byte.c b/storage/innobase/ut/ut0byte.c
deleted file mode 100644
index b5467fde601..00000000000
--- a/storage/innobase/ut/ut0byte.c
+++ /dev/null
@@ -1,31 +0,0 @@
-/*******************************************************************
-Byte utilities
-
-(c) 1994, 1995 Innobase Oy
-
-Created 5/11/1994 Heikki Tuuri
-********************************************************************/
-
-#include "ut0byte.h"
-
-#ifdef UNIV_NONINL
-#include "ut0byte.ic"
-#endif
-
-#include "ut0sort.h"
-
-/* Zero value for a dulint */
-dulint ut_dulint_zero = {0, 0};
-
-/* Maximum value for a dulint */
-dulint ut_dulint_max = {0xFFFFFFFFUL, 0xFFFFFFFFUL};
-
-/****************************************************************
-Sort function for dulint arrays. */
-void
-ut_dulint_sort(dulint* arr, dulint* aux_arr, ulint low, ulint high)
-/*===============================================================*/
-{
- UT_SORT_FUNCTION_BODY(ut_dulint_sort, arr, aux_arr, low, high,
- ut_dulint_cmp);
-}
diff --git a/storage/innobase/ut/ut0dbg.c b/storage/innobase/ut/ut0dbg.c
deleted file mode 100644
index 8c4be190d77..00000000000
--- a/storage/innobase/ut/ut0dbg.c
+++ /dev/null
@@ -1,98 +0,0 @@
-/*********************************************************************
-Debug utilities for Innobase.
-
-(c) 1994, 1995 Innobase Oy
-
-Created 1/30/1994 Heikki Tuuri
-**********************************************************************/
-
-#include "univ.i"
-
-#if defined(__GNUC__) && (__GNUC__ > 2)
-#else
-/* This is used to eliminate compiler warnings */
-ulint ut_dbg_zero = 0;
-#endif
-
-#if defined(UNIV_SYNC_DEBUG) || !defined(UT_DBG_USE_ABORT)
-/* If this is set to TRUE all threads will stop into the next assertion
-and assert */
-ibool ut_dbg_stop_threads = FALSE;
-#endif
-#ifdef __NETWARE__
-ibool panic_shutdown = FALSE; /* This is set to TRUE when on NetWare there
- happens an InnoDB assertion failure or other
- fatal error condition that requires an
- immediate shutdown. */
-#elif !defined(UT_DBG_USE_ABORT)
-/* Null pointer used to generate memory trap */
-
-ulint* ut_dbg_null_ptr = NULL;
-#endif
-
-/*****************************************************************
-Report a failed assertion. */
-
-void
-ut_dbg_assertion_failed(
-/*====================*/
- const char* expr, /* in: the failed assertion (optional) */
- const char* file, /* in: source file containing the assertion */
- ulint line) /* in: line number of the assertion */
-{
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Assertion failure in thread %lu"
- " in file %s line %lu\n",
- os_thread_pf(os_thread_get_curr_id()), file, line);
- if (expr) {
- fprintf(stderr,
- "InnoDB: Failing assertion: %s\n", expr);
- }
-
- fputs("InnoDB: We intentionally generate a memory trap.\n"
- "InnoDB: Submit a detailed bug report"
- " to http://bugs.mysql.com.\n"
- "InnoDB: If you get repeated assertion failures"
- " or crashes, even\n"
- "InnoDB: immediately after the mysqld startup, there may be\n"
- "InnoDB: corruption in the InnoDB tablespace. Please refer to\n"
- "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
- "forcing-recovery.html\n"
- "InnoDB: about forcing recovery.\n", stderr);
-#if defined(UNIV_SYNC_DEBUG) || !defined(UT_DBG_USE_ABORT)
- ut_dbg_stop_threads = TRUE;
-#endif
-}
-
-#ifdef __NETWARE__
-/*****************************************************************
-Shut down MySQL/InnoDB after assertion failure. */
-
-void
-ut_dbg_panic(void)
-/*==============*/
-{
- if (!panic_shutdown) {
- panic_shutdown = TRUE;
- innobase_shutdown_for_mysql();
- }
- exit(1);
-}
-#else /* __NETWARE__ */
-# if defined(UNIV_SYNC_DEBUG) || !defined(UT_DBG_USE_ABORT)
-/*****************************************************************
-Stop a thread after assertion failure. */
-
-void
-ut_dbg_stop_thread(
-/*===============*/
- const char* file,
- ulint line)
-{
- fprintf(stderr, "InnoDB: Thread %lu stopped in file %s line %lu\n",
- os_thread_pf(os_thread_get_curr_id()), file, line);
- os_thread_sleep(1000000000);
-}
-# endif
-#endif /* __NETWARE__ */
diff --git a/storage/innobase/ut/ut0list.c b/storage/innobase/ut/ut0list.c
deleted file mode 100644
index a0db7ff7b55..00000000000
--- a/storage/innobase/ut/ut0list.c
+++ /dev/null
@@ -1,169 +0,0 @@
-#include "ut0list.h"
-#ifdef UNIV_NONINL
-#include "ut0list.ic"
-#endif
-
-/********************************************************************
-Create a new list. */
-
-ib_list_t*
-ib_list_create(void)
-/*=================*/
- /* out: list */
-{
- ib_list_t* list = mem_alloc(sizeof(ib_list_t));
-
- list->first = NULL;
- list->last = NULL;
- list->is_heap_list = FALSE;
-
- return(list);
-}
-
-/********************************************************************
-Create a new list using the given heap. ib_list_free MUST NOT BE CALLED for
-lists created with this function. */
-
-ib_list_t*
-ib_list_create_heap(
-/*================*/
- /* out: list */
- mem_heap_t* heap) /* in: memory heap to use */
-{
- ib_list_t* list = mem_heap_alloc(heap, sizeof(ib_list_t));
-
- list->first = NULL;
- list->last = NULL;
- list->is_heap_list = TRUE;
-
- return(list);
-}
-
-/********************************************************************
-Free a list. */
-
-void
-ib_list_free(
-/*=========*/
- ib_list_t* list) /* in: list */
-{
- ut_a(!list->is_heap_list);
-
- /* We don't check that the list is empty because it's entirely valid
- to e.g. have all the nodes allocated from a single heap that is then
- freed after the list itself is freed. */
-
- mem_free(list);
-}
-
-/********************************************************************
-Add the data to the start of the list. */
-
-ib_list_node_t*
-ib_list_add_first(
-/*==============*/
- /* out: new list node*/
- ib_list_t* list, /* in: list */
- void* data, /* in: data */
- mem_heap_t* heap) /* in: memory heap to use */
-{
- return(ib_list_add_after(list, ib_list_get_first(list), data, heap));
-}
-
-/********************************************************************
-Add the data to the end of the list. */
-
-ib_list_node_t*
-ib_list_add_last(
-/*=============*/
- /* out: new list node*/
- ib_list_t* list, /* in: list */
- void* data, /* in: data */
- mem_heap_t* heap) /* in: memory heap to use */
-{
- return(ib_list_add_after(list, ib_list_get_last(list), data, heap));
-}
-
-/********************************************************************
-Add the data after the indicated node. */
-
-ib_list_node_t*
-ib_list_add_after(
-/*==============*/
- /* out: new list node*/
- ib_list_t* list, /* in: list */
- ib_list_node_t* prev_node, /* in: node preceding new node (can
- be NULL) */
- void* data, /* in: data */
- mem_heap_t* heap) /* in: memory heap to use */
-{
- ib_list_node_t* node = mem_heap_alloc(heap, sizeof(ib_list_node_t));
-
- node->data = data;
-
- if (!list->first) {
- /* Empty list. */
-
- ut_a(!prev_node);
-
- node->prev = NULL;
- node->next = NULL;
-
- list->first = node;
- list->last = node;
- } else if (!prev_node) {
- /* Start of list. */
-
- node->prev = NULL;
- node->next = list->first;
-
- list->first->prev = node;
-
- list->first = node;
- } else {
- /* Middle or end of list. */
-
- node->prev = prev_node;
- node->next = prev_node->next;
-
- prev_node->next = node;
-
- if (node->next) {
- node->next->prev = node;
- } else {
- list->last = node;
- }
- }
-
- return(node);
-}
-
-/********************************************************************
-Remove the node from the list. */
-
-void
-ib_list_remove(
-/*===========*/
- ib_list_t* list, /* in: list */
- ib_list_node_t* node) /* in: node to remove */
-{
- if (node->prev) {
- node->prev->next = node->next;
- } else {
- /* First item in list. */
-
- ut_ad(list->first == node);
-
- list->first = node->next;
- }
-
- if (node->next) {
- node->next->prev = node->prev;
- } else {
- /* Last item in list. */
-
- ut_ad(list->last == node);
-
- list->last = node->prev;
- }
-}
diff --git a/storage/innobase/ut/ut0mem.c b/storage/innobase/ut/ut0mem.c
deleted file mode 100644
index b466a5f6872..00000000000
--- a/storage/innobase/ut/ut0mem.c
+++ /dev/null
@@ -1,548 +0,0 @@
-/************************************************************************
-Memory primitives
-
-(c) 1994, 1995 Innobase Oy
-
-Created 5/11/1994 Heikki Tuuri
-*************************************************************************/
-
-#include "ut0mem.h"
-
-#ifdef UNIV_NONINL
-#include "ut0mem.ic"
-#endif
-
-#include "mem0mem.h"
-#include "os0sync.h"
-#include "os0thread.h"
-
-/* This struct is placed first in every allocated memory block */
-typedef struct ut_mem_block_struct ut_mem_block_t;
-
-/* The total amount of memory currently allocated from the OS with malloc */
-ulint ut_total_allocated_memory = 0;
-
-struct ut_mem_block_struct{
- UT_LIST_NODE_T(ut_mem_block_t) mem_block_list;
- /* mem block list node */
- ulint size; /* size of allocated memory */
- ulint magic_n;
-};
-
-#define UT_MEM_MAGIC_N 1601650166
-
-/* List of all memory blocks allocated from the operating system
-with malloc */
-UT_LIST_BASE_NODE_T(ut_mem_block_t) ut_mem_block_list;
-
-os_fast_mutex_t ut_list_mutex; /* this protects the list */
-
-ibool ut_mem_block_list_inited = FALSE;
-
-ulint* ut_mem_null_ptr = NULL;
-
-/**************************************************************************
-Initializes the mem block list at database startup. */
-static
-void
-ut_mem_block_list_init(void)
-/*========================*/
-{
- os_fast_mutex_init(&ut_list_mutex);
- UT_LIST_INIT(ut_mem_block_list);
- ut_mem_block_list_inited = TRUE;
-}
-
-/**************************************************************************
-Allocates memory. Sets it also to zero if UNIV_SET_MEM_TO_ZERO is
-defined and set_to_zero is TRUE. */
-
-void*
-ut_malloc_low(
-/*==========*/
- /* out, own: allocated memory */
- ulint n, /* in: number of bytes to allocate */
- ibool set_to_zero, /* in: TRUE if allocated memory should be
- set to zero if UNIV_SET_MEM_TO_ZERO is
- defined */
- ibool assert_on_error)/* in: if TRUE, we crash mysqld if the
- memory cannot be allocated */
-{
- ulint retry_count = 0;
- void* ret;
-
- ut_ad((sizeof(ut_mem_block_t) % 8) == 0); /* check alignment ok */
-
- if (!ut_mem_block_list_inited) {
- ut_mem_block_list_init();
- }
-retry:
- os_fast_mutex_lock(&ut_list_mutex);
-
- ret = malloc(n + sizeof(ut_mem_block_t));
-
- if (ret == NULL && retry_count < 60) {
- if (retry_count == 0) {
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Error: cannot allocate"
- " %lu bytes of\n"
- "InnoDB: memory with malloc!"
- " Total allocated memory\n"
- "InnoDB: by InnoDB %lu bytes."
- " Operating system errno: %lu\n"
- "InnoDB: Check if you should"
- " increase the swap file or\n"
- "InnoDB: ulimits of your operating system.\n"
- "InnoDB: On FreeBSD check you"
- " have compiled the OS with\n"
- "InnoDB: a big enough maximum process size.\n"
- "InnoDB: Note that in most 32-bit"
- " computers the process\n"
- "InnoDB: memory space is limited"
- " to 2 GB or 4 GB.\n"
- "InnoDB: We keep retrying"
- " the allocation for 60 seconds...\n",
- (ulong) n, (ulong) ut_total_allocated_memory,
-#ifdef __WIN__
- (ulong) GetLastError()
-#else
- (ulong) errno
-#endif
- );
- }
-
- os_fast_mutex_unlock(&ut_list_mutex);
-
- /* Sleep for a second and retry the allocation; maybe this is
- just a temporary shortage of memory */
-
- os_thread_sleep(1000000);
-
- retry_count++;
-
- goto retry;
- }
-
- if (ret == NULL) {
- /* Flush stderr to make more probable that the error
- message gets in the error file before we generate a seg
- fault */
-
- fflush(stderr);
-
- os_fast_mutex_unlock(&ut_list_mutex);
-
- /* Make an intentional seg fault so that we get a stack
- trace */
- /* Intentional segfault on NetWare causes an abend. Avoid this
- by graceful exit handling in ut_a(). */
-#if (!defined __NETWARE__)
- if (assert_on_error) {
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: We now intentionally"
- " generate a seg fault so that\n"
- "InnoDB: on Linux we get a stack trace.\n");
-
- if (*ut_mem_null_ptr) ut_mem_null_ptr = 0;
- } else {
- return(NULL);
- }
-#else
- ut_a(0);
-#endif
- }
-
- if (set_to_zero) {
-#ifdef UNIV_SET_MEM_TO_ZERO
- memset(ret, '\0', n + sizeof(ut_mem_block_t));
-#endif
- }
-
- UNIV_MEM_ALLOC(ret, n + sizeof(ut_mem_block_t));
-
- ((ut_mem_block_t*)ret)->size = n + sizeof(ut_mem_block_t);
- ((ut_mem_block_t*)ret)->magic_n = UT_MEM_MAGIC_N;
-
- ut_total_allocated_memory += n + sizeof(ut_mem_block_t);
-
- UT_LIST_ADD_FIRST(mem_block_list, ut_mem_block_list,
- ((ut_mem_block_t*)ret));
- os_fast_mutex_unlock(&ut_list_mutex);
-
- return((void*)((byte*)ret + sizeof(ut_mem_block_t)));
-}
-
-/**************************************************************************
-Allocates memory. Sets it also to zero if UNIV_SET_MEM_TO_ZERO is
-defined. */
-
-void*
-ut_malloc(
-/*======*/
- /* out, own: allocated memory */
- ulint n) /* in: number of bytes to allocate */
-{
- return(ut_malloc_low(n, TRUE, TRUE));
-}
-
-/**************************************************************************
-Tests if malloc of n bytes would succeed. ut_malloc() asserts if memory runs
-out. It cannot be used if we want to return an error message. Prints to
-stderr a message if fails. */
-
-ibool
-ut_test_malloc(
-/*===========*/
- /* out: TRUE if succeeded */
- ulint n) /* in: try to allocate this many bytes */
-{
- void* ret;
-
- ret = malloc(n);
-
- if (ret == NULL) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: cannot allocate"
- " %lu bytes of memory for\n"
- "InnoDB: a BLOB with malloc! Total allocated memory\n"
- "InnoDB: by InnoDB %lu bytes."
- " Operating system errno: %d\n"
- "InnoDB: Check if you should increase"
- " the swap file or\n"
- "InnoDB: ulimits of your operating system.\n"
- "InnoDB: On FreeBSD check you have"
- " compiled the OS with\n"
- "InnoDB: a big enough maximum process size.\n",
- (ulong) n,
- (ulong) ut_total_allocated_memory,
- (int) errno);
- return(FALSE);
- }
-
- free(ret);
-
- return(TRUE);
-}
-
-/**************************************************************************
-Frees a memory block allocated with ut_malloc. */
-
-void
-ut_free(
-/*====*/
- void* ptr) /* in, own: memory block */
-{
- ut_mem_block_t* block;
-
- block = (ut_mem_block_t*)((byte*)ptr - sizeof(ut_mem_block_t));
-
- os_fast_mutex_lock(&ut_list_mutex);
-
- ut_a(block->magic_n == UT_MEM_MAGIC_N);
- ut_a(ut_total_allocated_memory >= block->size);
-
- ut_total_allocated_memory -= block->size;
-
- UT_LIST_REMOVE(mem_block_list, ut_mem_block_list, block);
- free(block);
-
- os_fast_mutex_unlock(&ut_list_mutex);
-}
-
-/**************************************************************************
-Implements realloc. This is needed by /pars/lexyy.c. Otherwise, you should not
-use this function because the allocation functions in mem0mem.h are the
-recommended ones in InnoDB.
-
-man realloc in Linux, 2004:
-
- realloc() changes the size of the memory block pointed to
- by ptr to size bytes. The contents will be unchanged to
- the minimum of the old and new sizes; newly allocated mem­
- ory will be uninitialized. If ptr is NULL, the call is
- equivalent to malloc(size); if size is equal to zero, the
- call is equivalent to free(ptr). Unless ptr is NULL, it
- must have been returned by an earlier call to malloc(),
- calloc() or realloc().
-
-RETURN VALUE
- realloc() returns a pointer to the newly allocated memory,
- which is suitably aligned for any kind of variable and may
- be different from ptr, or NULL if the request fails. If
- size was equal to 0, either NULL or a pointer suitable to
- be passed to free() is returned. If realloc() fails the
- original block is left untouched - it is not freed or
- moved. */
-
-void*
-ut_realloc(
-/*=======*/
- /* out, own: pointer to new mem block or NULL */
- void* ptr, /* in: pointer to old block or NULL */
- ulint size) /* in: desired size */
-{
- ut_mem_block_t* block;
- ulint old_size;
- ulint min_size;
- void* new_ptr;
-
- if (ptr == NULL) {
-
- return(ut_malloc(size));
- }
-
- if (size == 0) {
- ut_free(ptr);
-
- return(NULL);
- }
-
- block = (ut_mem_block_t*)((byte*)ptr - sizeof(ut_mem_block_t));
-
- ut_a(block->magic_n == UT_MEM_MAGIC_N);
-
- old_size = block->size - sizeof(ut_mem_block_t);
-
- if (size < old_size) {
- min_size = size;
- } else {
- min_size = old_size;
- }
-
- new_ptr = ut_malloc(size);
-
- if (new_ptr == NULL) {
-
- return(NULL);
- }
-
- /* Copy the old data from ptr */
- ut_memcpy(new_ptr, ptr, min_size);
-
- ut_free(ptr);
-
- return(new_ptr);
-}
-
-/**************************************************************************
-Frees in shutdown all allocated memory not freed yet. */
-
-void
-ut_free_all_mem(void)
-/*=================*/
-{
- ut_mem_block_t* block;
-
- os_fast_mutex_free(&ut_list_mutex);
-
- while ((block = UT_LIST_GET_FIRST(ut_mem_block_list))) {
-
- ut_a(block->magic_n == UT_MEM_MAGIC_N);
- ut_a(ut_total_allocated_memory >= block->size);
-
- ut_total_allocated_memory -= block->size;
-
- UT_LIST_REMOVE(mem_block_list, ut_mem_block_list, block);
- free(block);
- }
-
- if (ut_total_allocated_memory != 0) {
- fprintf(stderr,
- "InnoDB: Warning: after shutdown"
- " total allocated memory is %lu\n",
- (ulong) ut_total_allocated_memory);
- }
-}
-
-/**************************************************************************
-Copies up to size - 1 characters from the NUL-terminated string src to
-dst, NUL-terminating the result. Returns strlen(src), so truncation
-occurred if the return value >= size. */
-
-ulint
-ut_strlcpy(
-/*=======*/
- /* out: strlen(src) */
- char* dst, /* in: destination buffer */
- const char* src, /* in: source buffer */
- ulint size) /* in: size of destination buffer */
-{
- ulint src_size = strlen(src);
-
- if (size != 0) {
- ulint n = ut_min(src_size, size - 1);
-
- memcpy(dst, src, n);
- dst[n] = '\0';
- }
-
- return(src_size);
-}
-
-/**************************************************************************
-Like ut_strlcpy, but if src doesn't fit in dst completely, copies the last
-(size - 1) bytes of src, not the first. */
-
-ulint
-ut_strlcpy_rev(
-/*===========*/
- /* out: strlen(src) */
- char* dst, /* in: destination buffer */
- const char* src, /* in: source buffer */
- ulint size) /* in: size of destination buffer */
-{
- ulint src_size = strlen(src);
-
- if (size != 0) {
- ulint n = ut_min(src_size, size - 1);
-
- memcpy(dst, src + src_size - n, n + 1);
- }
-
- return(src_size);
-}
-
-/**************************************************************************
-Make a quoted copy of a NUL-terminated string. Leading and trailing
-quotes will not be included; only embedded quotes will be escaped.
-See also ut_strlenq() and ut_memcpyq(). */
-
-char*
-ut_strcpyq(
-/*=======*/
- /* out: pointer to end of dest */
- char* dest, /* in: output buffer */
- char q, /* in: the quote character */
- const char* src) /* in: null-terminated string */
-{
- while (*src) {
- if ((*dest++ = *src++) == q) {
- *dest++ = q;
- }
- }
-
- return(dest);
-}
-
-/**************************************************************************
-Make a quoted copy of a fixed-length string. Leading and trailing
-quotes will not be included; only embedded quotes will be escaped.
-See also ut_strlenq() and ut_strcpyq(). */
-
-char*
-ut_memcpyq(
-/*=======*/
- /* out: pointer to end of dest */
- char* dest, /* in: output buffer */
- char q, /* in: the quote character */
- const char* src, /* in: string to be quoted */
- ulint len) /* in: length of src */
-{
- const char* srcend = src + len;
-
- while (src < srcend) {
- if ((*dest++ = *src++) == q) {
- *dest++ = q;
- }
- }
-
- return(dest);
-}
-
-/**************************************************************************
-Return the number of times s2 occurs in s1. Overlapping instances of s2
-are only counted once. */
-
-ulint
-ut_strcount(
-/*========*/
- /* out: the number of times s2 occurs in s1 */
- const char* s1, /* in: string to search in */
- const char* s2) /* in: string to search for */
-{
- ulint count = 0;
- ulint len = strlen(s2);
-
- if (len == 0) {
-
- return(0);
- }
-
- for (;;) {
- s1 = strstr(s1, s2);
-
- if (!s1) {
-
- break;
- }
-
- count++;
- s1 += len;
- }
-
- return(count);
-}
-
-/**************************************************************************
-Replace every occurrence of s1 in str with s2. Overlapping instances of s1
-are only replaced once. */
-
-char *
-ut_strreplace(
-/*==========*/
- /* out, own: modified string, must be
- freed with mem_free() */
- const char* str, /* in: string to operate on */
- const char* s1, /* in: string to replace */
- const char* s2) /* in: string to replace s1 with */
-{
- char* new_str;
- char* ptr;
- const char* str_end;
- ulint str_len = strlen(str);
- ulint s1_len = strlen(s1);
- ulint s2_len = strlen(s2);
- ulint count = 0;
- int len_delta = (int)s2_len - (int)s1_len;
-
- str_end = str + str_len;
-
- if (len_delta <= 0) {
- len_delta = 0;
- } else {
- count = ut_strcount(str, s1);
- }
-
- new_str = mem_alloc(str_len + count * len_delta + 1);
- ptr = new_str;
-
- while (str) {
- const char* next = strstr(str, s1);
-
- if (!next) {
- next = str_end;
- }
-
- memcpy(ptr, str, next - str);
- ptr += next - str;
-
- if (next == str_end) {
-
- break;
- }
-
- memcpy(ptr, s2, s2_len);
- ptr += s2_len;
-
- str = next + s1_len;
- }
-
- *ptr = '\0';
-
- return(new_str);
-}
diff --git a/storage/innobase/ut/ut0rnd.c b/storage/innobase/ut/ut0rnd.c
deleted file mode 100644
index 016809e0474..00000000000
--- a/storage/innobase/ut/ut0rnd.c
+++ /dev/null
@@ -1,78 +0,0 @@
-/*******************************************************************
-Random numbers and hashing
-
-(c) 1994, 1995 Innobase Oy
-
-Created 5/11/1994 Heikki Tuuri
-********************************************************************/
-
-#include "ut0rnd.h"
-
-#ifdef UNIV_NONINL
-#include "ut0rnd.ic"
-#endif
-
-/* These random numbers are used in ut_find_prime */
-#define UT_RANDOM_1 1.0412321
-#define UT_RANDOM_2 1.1131347
-#define UT_RANDOM_3 1.0132677
-
-
-ulint ut_rnd_ulint_counter = 65654363;
-
-/***************************************************************
-Looks for a prime number slightly greater than the given argument.
-The prime is chosen so that it is not near any power of 2. */
-
-ulint
-ut_find_prime(
-/*==========*/
- /* out: prime */
- ulint n) /* in: positive number > 100 */
-{
- ulint pow2;
- ulint i;
-
- n += 100;
-
- pow2 = 1;
- while (pow2 * 2 < n) {
- pow2 = 2 * pow2;
- }
-
- if ((double)n < 1.05 * (double)pow2) {
- n = (ulint) ((double)n * UT_RANDOM_1);
- }
-
- pow2 = 2 * pow2;
-
- if ((double)n > 0.95 * (double)pow2) {
- n = (ulint) ((double)n * UT_RANDOM_2);
- }
-
- if (n > pow2 - 20) {
- n += 30;
- }
-
- /* Now we have n far enough from powers of 2. To make
- n more random (especially, if it was not near
- a power of 2), we then multiply it by a random number. */
-
- n = (ulint) ((double)n * UT_RANDOM_3);
-
- for (;; n++) {
- i = 2;
- while (i * i <= n) {
- if (n % i == 0) {
- goto next_n;
- }
- i++;
- }
-
- /* Found a prime */
- break;
-next_n: ;
- }
-
- return(n);
-}
diff --git a/storage/innobase/ut/ut0ut.c b/storage/innobase/ut/ut0ut.c
deleted file mode 100644
index 1ae43172894..00000000000
--- a/storage/innobase/ut/ut0ut.c
+++ /dev/null
@@ -1,592 +0,0 @@
-/*******************************************************************
-Various utilities for Innobase.
-
-(c) 1994, 1995 Innobase Oy
-
-Created 5/11/1994 Heikki Tuuri
-********************************************************************/
-
-#include "ut0ut.h"
-
-#ifdef UNIV_NONINL
-#include "ut0ut.ic"
-#endif
-
-#include <stdarg.h>
-#include <string.h>
-#include <ctype.h>
-
-#include "ut0sort.h"
-#include "trx0trx.h"
-#include "ha_prototypes.h"
-
-ibool ut_always_false = FALSE;
-
-#ifdef __WIN__
-/*********************************************************************
-NOTE: The Windows epoch starts from 1601/01/01 whereas the Unix
-epoch starts from 1970/1/1. For selection of constant see:
-http://support.microsoft.com/kb/167296/ */
-#define WIN_TO_UNIX_DELTA_USEC ((ib_longlong) 11644473600000000ULL)
-
-
-/*********************************************************************
-This is the Windows version of gettimeofday(2).*/
-static
-int
-ut_gettimeofday(
-/*============*/
- /* out: 0 if all OK else -1 */
- struct timeval* tv, /* out: Values are relative to Unix epoch */
- void* tz) /* in: not used */
-{
- FILETIME ft;
- ib_longlong tm;
-
- if (!tv) {
- errno = EINVAL;
- return(-1);
- }
-
- GetSystemTimeAsFileTime(&ft);
-
- tm = (ib_longlong) ft.dwHighDateTime << 32;
- tm |= ft.dwLowDateTime;
-
- ut_a(tm >= 0); /* If tm wraps over to negative, the quotient / 10
- does not work */
-
- tm /= 10; /* Convert from 100 nsec periods to usec */
-
- /* If we don't convert to the Unix epoch the value for
- struct timeval::tv_sec will overflow.*/
- tm -= WIN_TO_UNIX_DELTA_USEC;
-
- tv->tv_sec = (long) (tm / 1000000L);
- tv->tv_usec = (long) (tm % 1000000L);
-
- return(0);
-}
-#else
-#define ut_gettimeofday gettimeofday
-#endif
-
-/************************************************************
-Gets the high 32 bits in a ulint. That is makes a shift >> 32,
-but since there seem to be compiler bugs in both gcc and Visual C++,
-we do this by a special conversion. */
-
-ulint
-ut_get_high32(
-/*==========*/
- /* out: a >> 32 */
- ulint a) /* in: ulint */
-{
- ib_longlong i;
-
- i = (ib_longlong)a;
-
- i = i >> 32;
-
- return((ulint)i);
-}
-
-/************************************************************
-The following function returns elapsed CPU time in milliseconds. */
-
-ulint
-ut_clock(void)
-{
- return((clock() * 1000) / CLOCKS_PER_SEC);
-}
-
-/**************************************************************
-Returns system time. We do not specify the format of the time returned:
-the only way to manipulate it is to use the function ut_difftime. */
-
-ib_time_t
-ut_time(void)
-/*=========*/
-{
- return(time(NULL));
-}
-
-/**************************************************************
-Returns system time.
-Upon successful completion, the value 0 is returned; otherwise the
-value -1 is returned and the global variable errno is set to indicate the
-error. */
-
-int
-ut_usectime(
-/*========*/
- /* out: 0 on success, -1 otherwise */
- ulint* sec, /* out: seconds since the Epoch */
- ulint* ms) /* out: microseconds since the Epoch+*sec */
-{
- struct timeval tv;
- int ret;
- int errno_gettimeofday;
- int i;
-
- for (i = 0; i < 10; i++) {
-
- ret = ut_gettimeofday(&tv, NULL);
-
- if (ret == -1) {
- errno_gettimeofday = errno;
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: gettimeofday(): %s\n",
- strerror(errno_gettimeofday));
- os_thread_sleep(100000); /* 0.1 sec */
- errno = errno_gettimeofday;
- } else {
- break;
- }
- }
-
- if (ret != -1) {
- *sec = (ulint) tv.tv_sec;
- *ms = (ulint) tv.tv_usec;
- }
-
- return(ret);
-}
-
-/**************************************************************
-Returns diff in microseconds (end_sec,end_ms) - (start_sec,start_ms) */
-
-ib_longlong
-ut_usecdiff(
-/*========*/
- ulint end_sec, /* in: seconds since the Epoch */
- ulint end_ms, /* in: microseconds since the Epoch+*sec1 */
- ulint start_sec, /* in: seconds since the Epoch */
- ulint start_ms) /* in: microseconds since the Epoch+*sec2 */
-{
- ib_longlong end_mics = end_sec * 1000000LL + end_ms;
- ib_longlong start_mics = start_sec * 1000000LL + start_ms;
-
- return end_mics - start_mics;
-}
-
-/**************************************************************
-Returns the difference of two times in seconds. */
-
-double
-ut_difftime(
-/*========*/
- /* out: time2 - time1 expressed in seconds */
- ib_time_t time2, /* in: time */
- ib_time_t time1) /* in: time */
-{
- return(difftime(time2, time1));
-}
-
-/**************************************************************
-Prints a timestamp to a file. */
-
-void
-ut_print_timestamp(
-/*===============*/
- FILE* file) /* in: file where to print */
-{
-#ifdef __WIN__
- SYSTEMTIME cal_tm;
-
- GetLocalTime(&cal_tm);
-
- fprintf(file,"%02d%02d%02d %2d:%02d:%02d",
- (int)cal_tm.wYear % 100,
- (int)cal_tm.wMonth,
- (int)cal_tm.wDay,
- (int)cal_tm.wHour,
- (int)cal_tm.wMinute,
- (int)cal_tm.wSecond);
-#else
- struct tm cal_tm;
- struct tm* cal_tm_ptr;
- time_t tm;
-
- time(&tm);
-
-#ifdef HAVE_LOCALTIME_R
- localtime_r(&tm, &cal_tm);
- cal_tm_ptr = &cal_tm;
-#else
- cal_tm_ptr = localtime(&tm);
-#endif
- fprintf(file,"%02d%02d%02d %2d:%02d:%02d",
- cal_tm_ptr->tm_year % 100,
- cal_tm_ptr->tm_mon + 1,
- cal_tm_ptr->tm_mday,
- cal_tm_ptr->tm_hour,
- cal_tm_ptr->tm_min,
- cal_tm_ptr->tm_sec);
-#endif
-}
-
-/**************************************************************
-Sprintfs a timestamp to a buffer, 13..14 chars plus terminating NUL. */
-
-void
-ut_sprintf_timestamp(
-/*=================*/
- char* buf) /* in: buffer where to sprintf */
-{
-#ifdef __WIN__
- SYSTEMTIME cal_tm;
-
- GetLocalTime(&cal_tm);
-
- sprintf(buf, "%02d%02d%02d %2d:%02d:%02d",
- (int)cal_tm.wYear % 100,
- (int)cal_tm.wMonth,
- (int)cal_tm.wDay,
- (int)cal_tm.wHour,
- (int)cal_tm.wMinute,
- (int)cal_tm.wSecond);
-#else
- struct tm cal_tm;
- struct tm* cal_tm_ptr;
- time_t tm;
-
- time(&tm);
-
-#ifdef HAVE_LOCALTIME_R
- localtime_r(&tm, &cal_tm);
- cal_tm_ptr = &cal_tm;
-#else
- cal_tm_ptr = localtime(&tm);
-#endif
- sprintf(buf, "%02d%02d%02d %2d:%02d:%02d",
- cal_tm_ptr->tm_year % 100,
- cal_tm_ptr->tm_mon + 1,
- cal_tm_ptr->tm_mday,
- cal_tm_ptr->tm_hour,
- cal_tm_ptr->tm_min,
- cal_tm_ptr->tm_sec);
-#endif
-}
-
-/**************************************************************
-Sprintfs a timestamp to a buffer with no spaces and with ':' characters
-replaced by '_'. */
-
-void
-ut_sprintf_timestamp_without_extra_chars(
-/*=====================================*/
- char* buf) /* in: buffer where to sprintf */
-{
-#ifdef __WIN__
- SYSTEMTIME cal_tm;
-
- GetLocalTime(&cal_tm);
-
- sprintf(buf, "%02d%02d%02d_%2d_%02d_%02d",
- (int)cal_tm.wYear % 100,
- (int)cal_tm.wMonth,
- (int)cal_tm.wDay,
- (int)cal_tm.wHour,
- (int)cal_tm.wMinute,
- (int)cal_tm.wSecond);
-#else
- struct tm cal_tm;
- struct tm* cal_tm_ptr;
- time_t tm;
-
- time(&tm);
-
-#ifdef HAVE_LOCALTIME_R
- localtime_r(&tm, &cal_tm);
- cal_tm_ptr = &cal_tm;
-#else
- cal_tm_ptr = localtime(&tm);
-#endif
- sprintf(buf, "%02d%02d%02d_%2d_%02d_%02d",
- cal_tm_ptr->tm_year % 100,
- cal_tm_ptr->tm_mon + 1,
- cal_tm_ptr->tm_mday,
- cal_tm_ptr->tm_hour,
- cal_tm_ptr->tm_min,
- cal_tm_ptr->tm_sec);
-#endif
-}
-
-/**************************************************************
-Returns current year, month, day. */
-
-void
-ut_get_year_month_day(
-/*==================*/
- ulint* year, /* out: current year */
- ulint* month, /* out: month */
- ulint* day) /* out: day */
-{
-#ifdef __WIN__
- SYSTEMTIME cal_tm;
-
- GetLocalTime(&cal_tm);
-
- *year = (ulint)cal_tm.wYear;
- *month = (ulint)cal_tm.wMonth;
- *day = (ulint)cal_tm.wDay;
-#else
- struct tm cal_tm;
- struct tm* cal_tm_ptr;
- time_t tm;
-
- time(&tm);
-
-#ifdef HAVE_LOCALTIME_R
- localtime_r(&tm, &cal_tm);
- cal_tm_ptr = &cal_tm;
-#else
- cal_tm_ptr = localtime(&tm);
-#endif
- *year = (ulint)cal_tm_ptr->tm_year + 1900;
- *month = (ulint)cal_tm_ptr->tm_mon + 1;
- *day = (ulint)cal_tm_ptr->tm_mday;
-#endif
-}
-
-/*****************************************************************
-Runs an idle loop on CPU. The argument gives the desired delay
-in microseconds on 100 MHz Pentium + Visual C++. */
-
-ulint
-ut_delay(
-/*=====*/
- /* out: dummy value */
- ulint delay) /* in: delay in microseconds on 100 MHz Pentium */
-{
- ulint i, j;
-
- j = 0;
-
- for (i = 0; i < delay * 50; i++) {
- PAUSE_INSTRUCTION();
- j += i;
- }
-
- if (ut_always_false) {
- ut_always_false = (ibool) j;
- }
-
- return(j);
-}
-
-/*****************************************************************
-Prints the contents of a memory buffer in hex and ascii. */
-
-void
-ut_print_buf(
-/*=========*/
- FILE* file, /* in: file where to print */
- const void* buf, /* in: memory buffer */
- ulint len) /* in: length of the buffer */
-{
- const byte* data;
- ulint i;
-
- UNIV_MEM_ASSERT_RW(buf, len);
-
- fprintf(file, " len %lu; hex ", len);
-
- for (data = (const byte*)buf, i = 0; i < len; i++) {
- fprintf(file, "%02lx", (ulong)*data++);
- }
-
- fputs("; asc ", file);
-
- data = (const byte*)buf;
-
- for (i = 0; i < len; i++) {
- int c = (int) *data++;
- putc(isprint(c) ? c : ' ', file);
- }
-
- putc(';', file);
-}
-
-/****************************************************************
-Sort function for ulint arrays. */
-
-void
-ut_ulint_sort(ulint* arr, ulint* aux_arr, ulint low, ulint high)
-/*============================================================*/
-{
- UT_SORT_FUNCTION_BODY(ut_ulint_sort, arr, aux_arr, low, high,
- ut_ulint_cmp);
-}
-
-/*****************************************************************
-Calculates fast the number rounded up to the nearest power of 2. */
-
-ulint
-ut_2_power_up(
-/*==========*/
- /* out: first power of 2 which is >= n */
- ulint n) /* in: number != 0 */
-{
- ulint res;
-
- res = 1;
-
- ut_ad(n > 0);
-
- while (res < n) {
- res = res * 2;
- }
-
- return(res);
-}
-
-/**************************************************************************
-Outputs a NUL-terminated file name, quoted with apostrophes. */
-
-void
-ut_print_filename(
-/*==============*/
- FILE* f, /* in: output stream */
- const char* name) /* in: name to print */
-{
- putc('\'', f);
- for (;;) {
- int c = *name++;
- switch (c) {
- case 0:
- goto done;
- case '\'':
- putc(c, f);
- /* fall through */
- default:
- putc(c, f);
- }
- }
-done:
- putc('\'', f);
-}
-
-/**************************************************************************
-Outputs a fixed-length string, quoted as an SQL identifier.
-If the string contains a slash '/', the string will be
-output as two identifiers separated by a period (.),
-as in SQL database_name.identifier. */
-
-void
-ut_print_name(
-/*==========*/
- FILE* f, /* in: output stream */
- trx_t* trx, /* in: transaction */
- ibool table_id,/* in: TRUE=print a table name,
- FALSE=print other identifier */
- const char* name) /* in: name to print */
-{
- ut_print_namel(f, trx, table_id, name, strlen(name));
-}
-
-/**************************************************************************
-Outputs a fixed-length string, quoted as an SQL identifier.
-If the string contains a slash '/', the string will be
-output as two identifiers separated by a period (.),
-as in SQL database_name.identifier. */
-
-void
-ut_print_namel(
-/*===========*/
- FILE* f, /* in: output stream */
- trx_t* trx, /* in: transaction (NULL=no quotes) */
- ibool table_id,/* in: TRUE=print a table name,
- FALSE=print other identifier */
- const char* name, /* in: name to print */
- ulint namelen)/* in: length of name */
-{
-#ifdef UNIV_HOTBACKUP
- fwrite(name, 1, namelen, f);
-#else
- if (table_id) {
- char* slash = memchr(name, '/', namelen);
- if (!slash) {
-
- goto no_db_name;
- }
-
- /* Print the database name and table name separately. */
- innobase_print_identifier(f, trx, TRUE, name, slash - name);
- putc('.', f);
- innobase_print_identifier(f, trx, TRUE, slash + 1,
- namelen - (slash - name) - 1);
- } else {
-no_db_name:
- innobase_print_identifier(f, trx, table_id, name, namelen);
- }
-#endif
-}
-
-/**************************************************************************
-Catenate files. */
-
-void
-ut_copy_file(
-/*=========*/
- FILE* dest, /* in: output file */
- FILE* src) /* in: input file to be appended to output */
-{
- long len = ftell(src);
- char buf[4096];
-
- rewind(src);
- do {
- size_t maxs = len < (long) sizeof buf
- ? (size_t) len
- : sizeof buf;
- size_t size = fread(buf, 1, maxs, src);
- fwrite(buf, 1, size, dest);
- len -= (long) size;
- if (size < maxs) {
- break;
- }
- } while (len > 0);
-}
-
-/**************************************************************************
-snprintf(). */
-
-#ifdef __WIN__
-#include <stdarg.h>
-int
-ut_snprintf(
- /* out: number of characters that would
- have been printed if the size were
- unlimited, not including the terminating
- '\0'. */
- char* str, /* out: string */
- size_t size, /* in: str size */
- const char* fmt, /* in: format */
- ...) /* in: format values */
-{
- int res;
- va_list ap1;
- va_list ap2;
-
- va_start(ap1, fmt);
- va_start(ap2, fmt);
-
- res = _vscprintf(fmt, ap1);
- ut_a(res != -1);
-
- if (size > 0) {
- _vsnprintf(str, size, fmt, ap2);
-
- if ((size_t) res >= size) {
- str[size - 1] = '\0';
- }
- }
-
- va_end(ap1);
- va_end(ap2);
-
- return(res);
-}
-#endif /* __WIN__ */
diff --git a/storage/innobase/ut/ut0vec.c b/storage/innobase/ut/ut0vec.c
deleted file mode 100644
index e0d3e84d4a2..00000000000
--- a/storage/innobase/ut/ut0vec.c
+++ /dev/null
@@ -1,54 +0,0 @@
-#include "ut0vec.h"
-#ifdef UNIV_NONINL
-#include "ut0vec.ic"
-#endif
-#include <string.h>
-
-/********************************************************************
-Create a new vector with the given initial size. */
-
-ib_vector_t*
-ib_vector_create(
-/*=============*/
- /* out: vector */
- mem_heap_t* heap, /* in: heap */
- ulint size) /* in: initial size */
-{
- ib_vector_t* vec;
-
- ut_a(size > 0);
-
- vec = mem_heap_alloc(heap, sizeof(*vec));
-
- vec->heap = heap;
- vec->data = mem_heap_alloc(heap, sizeof(void*) * size);
- vec->used = 0;
- vec->total = size;
-
- return(vec);
-}
-
-/********************************************************************
-Push a new element to the vector, increasing its size if necessary. */
-
-void
-ib_vector_push(
-/*===========*/
- ib_vector_t* vec, /* in: vector */
- void* elem) /* in: data element */
-{
- if (vec->used >= vec->total) {
- void** new_data;
- ulint new_total = vec->total * 2;
-
- new_data = mem_heap_alloc(vec->heap,
- sizeof(void*) * new_total);
- memcpy(new_data, vec->data, sizeof(void*) * vec->total);
-
- vec->data = new_data;
- vec->total = new_total;
- }
-
- vec->data[vec->used] = elem;
- vec->used++;
-}
diff --git a/storage/innobase/ut/ut0wqueue.c b/storage/innobase/ut/ut0wqueue.c
deleted file mode 100644
index 7e090e89a4f..00000000000
--- a/storage/innobase/ut/ut0wqueue.c
+++ /dev/null
@@ -1,92 +0,0 @@
-#include "ut0wqueue.h"
-
-/********************************************************************
-Create a new work queue. */
-
-ib_wqueue_t*
-ib_wqueue_create(void)
-/*===================*/
- /* out: work queue */
-{
- ib_wqueue_t* wq = mem_alloc(sizeof(ib_wqueue_t));
-
- mutex_create(&wq->mutex, SYNC_WORK_QUEUE);
-
- wq->items = ib_list_create();
- wq->event = os_event_create(NULL);
-
- return(wq);
-}
-
-/********************************************************************
-Free a work queue. */
-
-void
-ib_wqueue_free(
-/*===========*/
- ib_wqueue_t* wq) /* in: work queue */
-{
- ut_a(!ib_list_get_first(wq->items));
-
- mutex_free(&wq->mutex);
- ib_list_free(wq->items);
- os_event_free(wq->event);
-
- mem_free(wq);
-}
-
-/********************************************************************
-Add a work item to the queue. */
-
-void
-ib_wqueue_add(
-/*==========*/
- ib_wqueue_t* wq, /* in: work queue */
- void* item, /* in: work item */
- mem_heap_t* heap) /* in: memory heap to use for allocating the
- list node */
-{
- mutex_enter(&wq->mutex);
-
- ib_list_add_last(wq->items, item, heap);
- os_event_set(wq->event);
-
- mutex_exit(&wq->mutex);
-}
-
-/********************************************************************
-Wait for a work item to appear in the queue. */
-
-void*
-ib_wqueue_wait(
- /* out: work item */
- ib_wqueue_t* wq) /* in: work queue */
-{
- ib_list_node_t* node;
-
- for (;;) {
- os_event_wait(wq->event);
-
- mutex_enter(&wq->mutex);
-
- node = ib_list_get_first(wq->items);
-
- if (node) {
- ib_list_remove(wq->items, node);
-
- if (!ib_list_get_first(wq->items)) {
- /* We must reset the event when the list
- gets emptied. */
- os_event_reset(wq->event);
- }
-
- break;
- }
-
- mutex_exit(&wq->mutex);
- }
-
- mutex_exit(&wq->mutex);
-
- return(node->data);
-}
diff --git a/storage/innobase/win_atomics32_test.c b/storage/innobase/win_atomics32_test.c
deleted file mode 100644
index fcb88d6b54e..00000000000
--- a/storage/innobase/win_atomics32_test.c
+++ /dev/null
@@ -1,30 +0,0 @@
-/* Copyright (C) 2009 Sun Microsystems AB
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; version 2 of the License.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
-
-#include <windows.h>
-
-int main()
-{
- volatile long var32 = 0;
- long add32 = 1;
- long old32 = 0;
- long exch32 = 1;
- long ret_value;
-
- ret_value = InterlockedExchangeAdd(&var32, add32);
- ret_value = InterlockedCompareExchange(&var32, exch32, old32);
- MemoryBarrier();
- return EXIT_SUCCESS;
-}
diff --git a/storage/innobase/win_atomics64_test.c b/storage/innobase/win_atomics64_test.c
deleted file mode 100644
index 123cb6d98cf..00000000000
--- a/storage/innobase/win_atomics64_test.c
+++ /dev/null
@@ -1,30 +0,0 @@
-/* Copyright (C) 2009 Sun Microsystems AB
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; version 2 of the License.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
-
-#include <windows.h>
-
-int main()
-{
- volatile long long var64 = 0;
- long long add64 = 1;
- long long old64 = 0;
- long long exch64 = 1;
- long long ret_value;
-
- ret_value = InterlockedExchangeAdd64(&var64, add64);
- ret_value = InterlockedCompareExchange64(&var64, exch64, old64);
- MemoryBarrier();
- return EXIT_SUCCESS;
-}