summaryrefslogtreecommitdiff
path: root/storage/innobase/include/lock0priv.h
blob: 06573d261fb08895da0c41efdc00994970f59626 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
/*****************************************************************************

Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2015, 2016, MariaDB Corporation

This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA

*****************************************************************************/

/**************************************************//**
@file include/lock0priv.h
Lock module internal structures and methods.

Created July 12, 2007 Vasil Dimov
*******************************************************/

#ifndef lock0priv_h
#define lock0priv_h

#ifndef LOCK_MODULE_IMPLEMENTATION
/* If you need to access members of the structures defined in this
file, please write appropriate functions that retrieve them and put
those functions in lock/ */
#error Do not include lock0priv.h outside of the lock/ module
#endif

#include "univ.i"
#include "dict0types.h"
#include "hash0hash.h"
#include "trx0types.h"

#ifndef UINT32_MAX
#define UINT32_MAX             (4294967295U)
#endif

/** A table lock */
struct lock_table_t {
	dict_table_t*	table;		/*!< database table in dictionary
					cache */
	UT_LIST_NODE_T(lock_t)
			locks;		/*!< list of locks on the same
					table */
	/** Print the table lock into the given output stream
	@param[in,out]	out	the output stream
	@return the given output stream. */
	std::ostream& print(std::ostream& out) const;
};

/** Print the table lock into the given output stream
@param[in,out]	out	the output stream
@return the given output stream. */
inline
std::ostream& lock_table_t::print(std::ostream& out) const
{
	out << "[lock_table_t: name=" << table->name << "]";
	return(out);
}

/** The global output operator is overloaded to conveniently
print the lock_table_t object into the given output stream.
@param[in,out]	out	the output stream
@param[in]	lock	the table lock
@return the given output stream */
inline
std::ostream&
operator<<(std::ostream& out, const lock_table_t& lock)
{
	return(lock.print(out));
}

/** Record lock for a page */
struct lock_rec_t {
	ib_uint32_t	space;		/*!< space id */
	ib_uint32_t	page_no;	/*!< page number */
	ib_uint32_t	n_bits;		/*!< number of bits in the lock
					bitmap; NOTE: the lock bitmap is
					placed immediately after the
					lock struct */

	/** Print the record lock into the given output stream
	@param[in,out]	out	the output stream
	@return the given output stream. */
	std::ostream& print(std::ostream& out) const;
};

/** Print the record lock into the given output stream
@param[in,out]	out	the output stream
@return the given output stream. */
inline
std::ostream& lock_rec_t::print(std::ostream& out) const
{
	out << "[lock_rec_t: space=" << space << ", page_no=" << page_no
		<< ", n_bits=" << n_bits << "]";
	return(out);
}

inline
std::ostream&
operator<<(std::ostream& out, const lock_rec_t& lock)
{
	return(lock.print(out));
}

/** Lock struct; protected by lock_sys->mutex */
struct lock_t {
	trx_t*		trx;		/*!< transaction owning the
					lock */
	UT_LIST_NODE_T(lock_t)
			trx_locks;	/*!< list of the locks of the
					transaction */

	dict_index_t*	index;		/*!< index for a record lock */

	lock_t*		hash;		/*!< hash chain node for a record
					lock. The link node in a singly linked
					list, used during hashing. */

	/* Statistics for how long lock has been held and time
	how long this lock had to be waited before it was granted */
	time_t		requested_time; /*!< Lock request time */
	ulint		wait_time;	/*!< Time waited this lock or 0 */

	union {
		lock_table_t	tab_lock;/*!< table lock */
		lock_rec_t	rec_lock;/*!< record lock */
	} un_member;			/*!< lock details */

	ib_uint32_t	type_mode;	/*!< lock type, mode, LOCK_GAP or
					LOCK_REC_NOT_GAP,
					LOCK_INSERT_INTENTION,
					wait flag, ORed */

	/** Determine if the lock object is a record lock.
	@return true if record lock, false otherwise. */
	bool is_record_lock() const
	{
		return(type() == LOCK_REC);
	}

	bool is_waiting() const
	{
		return(type_mode & LOCK_WAIT);
	}

	bool is_gap() const
	{
		return(type_mode & LOCK_GAP);
	}

	bool is_record_not_gap() const
	{
		return(type_mode & LOCK_REC_NOT_GAP);
	}

	bool is_insert_intention() const
	{
		return(type_mode & LOCK_INSERT_INTENTION);
	}

	ulint type() const {
		return(type_mode & LOCK_TYPE_MASK);
	}

	enum lock_mode mode() const
	{
		return(static_cast<enum lock_mode>(type_mode & LOCK_MODE_MASK));
	}

	/** Print the lock object into the given output stream.
	@param[in,out]	out	the output stream
	@return the given output stream. */
	std::ostream& print(std::ostream& out) const;

	/** Convert the member 'type_mode' into a human readable string.
	@return human readable string */
	std::string type_mode_string() const;

	const char* type_string() const
	{
		switch (type_mode & LOCK_TYPE_MASK) {
		case LOCK_REC:
			return("LOCK_REC");
		case LOCK_TABLE:
			return("LOCK_TABLE");
		default:
			ut_error;
		}
	}
};

/** Convert the member 'type_mode' into a human readable string.
@return human readable string */
inline
std::string
lock_t::type_mode_string() const
{
	std::ostringstream sout;
	sout << type_string();
	sout << " | " << lock_mode_string(mode());

	if (is_record_not_gap()) {
		sout << " | LOCK_REC_NOT_GAP";
	}

	if (is_waiting()) {
		sout << " | LOCK_WAIT";
	}

	if (is_gap()) {
		sout << " | LOCK_GAP";
	}

	if (is_insert_intention()) {
		sout << " | LOCK_INSERT_INTENTION";
	}
	return(sout.str());
}

inline
std::ostream&
lock_t::print(std::ostream& out) const
{
	out << "[lock_t: type_mode=" << type_mode << "("
		<< type_mode_string() << ")";

	if (is_record_lock()) {
		out << un_member.rec_lock;
	} else {
		out << un_member.tab_lock;
	}

	out << "]";
	return(out);
}

inline
std::ostream&
operator<<(std::ostream& out, const lock_t& lock)
{
	return(lock.print(out));
}

#ifdef UNIV_DEBUG
extern ibool	lock_print_waits;
#endif /* UNIV_DEBUG */

/** Restricts the length of search we will do in the waits-for
graph of transactions */
static const ulint	LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK = 1000000;

/** Restricts the search depth we will do in the waits-for graph of
transactions */
static const ulint	LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK = 200;

/** When releasing transaction locks, this specifies how often we release
the lock mutex for a moment to give also others access to it */
static const ulint	LOCK_RELEASE_INTERVAL = 1000;

/* Safety margin when creating a new record lock: this many extra records
can be inserted to the page without need to create a lock with a bigger
bitmap */

static const ulint	LOCK_PAGE_BITMAP_MARGIN = 64;

/* An explicit record lock affects both the record and the gap before it.
An implicit x-lock does not affect the gap, it only locks the index
record from read or update.

If a transaction has modified or inserted an index record, then
it owns an implicit x-lock on the record. On a secondary index record,
a transaction has an implicit x-lock also if it has modified the
clustered index record, the max trx id of the page where the secondary
index record resides is >= trx id of the transaction (or database recovery
is running), and there are no explicit non-gap lock requests on the
secondary index record.

This complicated definition for a secondary index comes from the
implementation: we want to be able to determine if a secondary index
record has an implicit x-lock, just by looking at the present clustered
index record, not at the historical versions of the record. The
complicated definition can be explained to the user so that there is
nondeterminism in the access path when a query is answered: we may,
or may not, access the clustered index record and thus may, or may not,
bump into an x-lock set there.

Different transaction can have conflicting locks set on the gap at the
same time. The locks on the gap are purely inhibitive: an insert cannot
be made, or a select cursor may have to wait if a different transaction
has a conflicting lock on the gap. An x-lock on the gap does not give
the right to insert into the gap.

An explicit lock can be placed on a user record or the supremum record of
a page. The locks on the supremum record are always thought to be of the gap
type, though the gap bit is not set. When we perform an update of a record
where the size of the record changes, we may temporarily store its explicit
locks on the infimum record of the page, though the infimum otherwise never
carries locks.

A waiting record lock can also be of the gap type. A waiting lock request
can be granted when there is no conflicting mode lock request by another
transaction ahead of it in the explicit lock queue.

In version 4.0.5 we added yet another explicit lock type: LOCK_REC_NOT_GAP.
It only locks the record it is placed on, not the gap before the record.
This lock type is necessary to emulate an Oracle-like READ COMMITTED isolation
level.

-------------------------------------------------------------------------
RULE 1: If there is an implicit x-lock on a record, and there are non-gap
-------
lock requests waiting in the queue, then the transaction holding the implicit
x-lock also has an explicit non-gap record x-lock. Therefore, as locks are
released, we can grant locks to waiting lock requests purely by looking at
the explicit lock requests in the queue.

RULE 3: Different transactions cannot have conflicting granted non-gap locks
-------
on a record at the same time. However, they can have conflicting granted gap
locks.
RULE 4: If a there is a waiting lock request in a queue, no lock request,
-------
gap or not, can be inserted ahead of it in the queue. In record deletes
and page splits new gap type locks can be created by the database manager
for a transaction, and without rule 4, the waits-for graph of transactions
might become cyclic without the database noticing it, as the deadlock check
is only performed when a transaction itself requests a lock!
-------------------------------------------------------------------------

An insert is allowed to a gap if there are no explicit lock requests by
other transactions on the next record. It does not matter if these lock
requests are granted or waiting, gap bit set or not, with the exception
that a gap type request set by another transaction to wait for
its turn to do an insert is ignored. On the other hand, an
implicit x-lock by another transaction does not prevent an insert, which
allows for more concurrency when using an Oracle-style sequence number
generator for the primary key with many transactions doing inserts
concurrently.

A modify of a record is allowed if the transaction has an x-lock on the
record, or if other transactions do not have any non-gap lock requests on the
record.

A read of a single user record with a cursor is allowed if the transaction
has a non-gap explicit, or an implicit lock on the record, or if the other
transactions have no x-lock requests on the record. At a page supremum a
read is always allowed.

In summary, an implicit lock is seen as a granted x-lock only on the
record, not on the gap. An explicit lock with no gap bit set is a lock
both on the record and the gap. If the gap bit is set, the lock is only
on the gap. Different transaction cannot own conflicting locks on the
record at the same time, but they may own conflicting locks on the gap.
Granted locks on a record give an access right to the record, but gap type
locks just inhibit operations.

NOTE: Finding out if some transaction has an implicit x-lock on a secondary
index record can be cumbersome. We may have to look at previous versions of
the corresponding clustered index record to find out if a delete marked
secondary index record was delete marked by an active transaction, not by
a committed one.

FACT A: If a transaction has inserted a row, it can delete it any time
without need to wait for locks.

PROOF: The transaction has an implicit x-lock on every index record inserted
for the row, and can thus modify each record without the need to wait. Q.E.D.

FACT B: If a transaction has read some result set with a cursor, it can read
it again, and retrieves the same result set, if it has not modified the
result set in the meantime. Hence, there is no phantom problem. If the
biggest record, in the alphabetical order, touched by the cursor is removed,
a lock wait may occur, otherwise not.

PROOF: When a read cursor proceeds, it sets an s-lock on each user record
it passes, and a gap type s-lock on each page supremum. The cursor must
wait until it has these locks granted. Then no other transaction can
have a granted x-lock on any of the user records, and therefore cannot
modify the user records. Neither can any other transaction insert into
the gaps which were passed over by the cursor. Page splits and merges,
and removal of obsolete versions of records do not affect this, because
when a user record or a page supremum is removed, the next record inherits
its locks as gap type locks, and therefore blocks inserts to the same gap.
Also, if a page supremum is inserted, it inherits its locks from the successor
record. When the cursor is positioned again at the start of the result set,
the records it will touch on its course are either records it touched
during the last pass or new inserted page supremums. It can immediately
access all these records, and when it arrives at the biggest record, it
notices that the result set is complete. If the biggest record was removed,
lock wait can occur because the next record only inherits a gap type lock,
and a wait may be needed. Q.E.D. */

/* If an index record should be changed or a new inserted, we must check
the lock on the record or the next. When a read cursor starts reading,
we will set a record level s-lock on each record it passes, except on the
initial record on which the cursor is positioned before we start to fetch
records. Our index tree search has the convention that the B-tree
cursor is positioned BEFORE the first possibly matching record in
the search. Optimizations are possible here: if the record is searched
on an equality condition to a unique key, we could actually set a special
lock on the record, a lock which would not prevent any insert before
this record. In the next key locking an x-lock set on a record also
prevents inserts just before that record.
	There are special infimum and supremum records on each page.
A supremum record can be locked by a read cursor. This records cannot be
updated but the lock prevents insert of a user record to the end of
the page.
	Next key locks will prevent the phantom problem where new rows
could appear to SELECT result sets after the select operation has been
performed. Prevention of phantoms ensures the serilizability of
transactions.
	What should we check if an insert of a new record is wanted?
Only the lock on the next record on the same page, because also the
supremum record can carry a lock. An s-lock prevents insertion, but
what about an x-lock? If it was set by a searched update, then there
is implicitly an s-lock, too, and the insert should be prevented.
What if our transaction owns an x-lock to the next record, but there is
a waiting s-lock request on the next record? If this s-lock was placed
by a read cursor moving in the ascending order in the index, we cannot
do the insert immediately, because when we finally commit our transaction,
the read cursor should see also the new inserted record. So we should
move the read cursor backward from the next record for it to pass over
the new inserted record. This move backward may be too cumbersome to
implement. If we in this situation just enqueue a second x-lock request
for our transaction on the next record, then the deadlock mechanism
notices a deadlock between our transaction and the s-lock request
transaction. This seems to be an ok solution.
	We could have the convention that granted explicit record locks,
lock the corresponding records from changing, and also lock the gaps
before them from inserting. A waiting explicit lock request locks the gap
before from inserting. Implicit record x-locks, which we derive from the
transaction id in the clustered index record, only lock the record itself
from modification, not the gap before it from inserting.
	How should we store update locks? If the search is done by a unique
key, we could just modify the record trx id. Otherwise, we could put a record
x-lock on the record. If the update changes ordering fields of the
clustered index record, the inserted new record needs no record lock in
lock table, the trx id is enough. The same holds for a secondary index
record. Searched delete is similar to update.

PROBLEM:
What about waiting lock requests? If a transaction is waiting to make an
update to a record which another modified, how does the other transaction
know to send the end-lock-wait signal to the waiting transaction? If we have
the convention that a transaction may wait for just one lock at a time, how
do we preserve it if lock wait ends?

PROBLEM:
Checking the trx id label of a secondary index record. In the case of a
modification, not an insert, is this necessary? A secondary index record
is modified only by setting or resetting its deleted flag. A secondary index
record contains fields to uniquely determine the corresponding clustered
index record. A secondary index record is therefore only modified if we
also modify the clustered index record, and the trx id checking is done
on the clustered index record, before we come to modify the secondary index
record. So, in the case of delete marking or unmarking a secondary index
record, we do not have to care about trx ids, only the locks in the lock
table must be checked. In the case of a select from a secondary index, the
trx id is relevant, and in this case we may have to search the clustered
index record.

PROBLEM: How to update record locks when page is split or merged, or
--------------------------------------------------------------------
a record is deleted or updated?
If the size of fields in a record changes, we perform the update by
a delete followed by an insert. How can we retain the locks set or
waiting on the record? Because a record lock is indexed in the bitmap
by the heap number of the record, when we remove the record from the
record list, it is possible still to keep the lock bits. If the page
is reorganized, we could make a table of old and new heap numbers,
and permute the bitmaps in the locks accordingly. We can add to the
table a row telling where the updated record ended. If the update does
not require a reorganization of the page, we can simply move the lock
bits for the updated record to the position determined by its new heap
number (we may have to allocate a new lock, if we run out of the bitmap
in the old one).
	A more complicated case is the one where the reinsertion of the
updated record is done pessimistically, because the structure of the
tree may change.

PROBLEM: If a supremum record is removed in a page merge, or a record
---------------------------------------------------------------------
removed in a purge, what to do to the waiting lock requests? In a split to
the right, we just move the lock requests to the new supremum. If a record
is removed, we could move the waiting lock request to its inheritor, the
next record in the index. But, the next record may already have lock
requests on its own queue. A new deadlock check should be made then. Maybe
it is easier just to release the waiting transactions. They can then enqueue
new lock requests on appropriate records.

PROBLEM: When a record is inserted, what locks should it inherit from the
-------------------------------------------------------------------------
upper neighbor? An insert of a new supremum record in a page split is
always possible, but an insert of a new user record requires that the upper
neighbor does not have any lock requests by other transactions, granted or
waiting, in its lock queue. Solution: We can copy the locks as gap type
locks, so that also the waiting locks are transformed to granted gap type
locks on the inserted record. */

/* LOCK COMPATIBILITY MATRIX
 *    IS IX S  X  AI
 * IS +	 +  +  -  +
 * IX +	 +  -  -  +
 * S  +	 -  +  -  -
 * X  -	 -  -  -  -
 * AI +	 +  -  -  -
 *
 * Note that for rows, InnoDB only acquires S or X locks.
 * For tables, InnoDB normally acquires IS or IX locks.
 * S or X table locks are only acquired for LOCK TABLES.
 * Auto-increment (AI) locks are needed because of
 * statement-level MySQL binlog.
 * See also lock_mode_compatible().
 */
static const byte lock_compatibility_matrix[5][5] = {
 /**         IS     IX       S     X       AI */
 /* IS */ {  TRUE,  TRUE,  TRUE,  FALSE,  TRUE},
 /* IX */ {  TRUE,  TRUE,  FALSE, FALSE,  TRUE},
 /* S  */ {  TRUE,  FALSE, TRUE,  FALSE,  FALSE},
 /* X  */ {  FALSE, FALSE, FALSE, FALSE,  FALSE},
 /* AI */ {  TRUE,  TRUE,  FALSE, FALSE,  FALSE}
};

/* STRONGER-OR-EQUAL RELATION (mode1=row, mode2=column)
 *    IS IX S  X  AI
 * IS +  -  -  -  -
 * IX +  +  -  -  -
 * S  +  -  +  -  -
 * X  +  +  +  +  +
 * AI -  -  -  -  +
 * See lock_mode_stronger_or_eq().
 */
static const byte lock_strength_matrix[5][5] = {
 /**         IS     IX       S     X       AI */
 /* IS */ {  TRUE,  FALSE, FALSE,  FALSE, FALSE},
 /* IX */ {  TRUE,  TRUE,  FALSE, FALSE,  FALSE},
 /* S  */ {  TRUE,  FALSE, TRUE,  FALSE,  FALSE},
 /* X  */ {  TRUE,  TRUE,  TRUE,  TRUE,   TRUE},
 /* AI */ {  FALSE, FALSE, FALSE, FALSE,  TRUE}
};

/** Maximum depth of the DFS stack. */
static const ulint MAX_STACK_SIZE = 4096;

#define PRDT_HEAPNO	PAGE_HEAP_NO_INFIMUM
/** Record locking request status */
enum lock_rec_req_status {
        /** Failed to acquire a lock */
        LOCK_REC_FAIL,
        /** Succeeded in acquiring a lock (implicit or already acquired) */
        LOCK_REC_SUCCESS,
        /** Explicitly created a new lock */
        LOCK_REC_SUCCESS_CREATED
};

/**
Record lock ID */
struct RecID {

	RecID(ulint space_id, ulint page_no, ulint heap_no)
		:
		m_space_id(static_cast<uint32_t>(space_id)),
		m_page_no(static_cast<uint32_t>(page_no)),
		m_heap_no(static_cast<uint32_t>(heap_no)),
		m_fold(lock_rec_fold(m_space_id, m_page_no))
	{
		ut_ad(space_id < UINT32_MAX);
		ut_ad(page_no < UINT32_MAX);
		ut_ad(heap_no < UINT32_MAX);
	}

	RecID(const buf_block_t* block, ulint heap_no)
		:
		m_space_id(block->page.id.space()),
		m_page_no(block->page.id.page_no()),
		m_heap_no(static_cast<uint32_t>(heap_no)),
		m_fold(lock_rec_fold(m_space_id, m_page_no))
	{
		ut_ad(heap_no < UINT32_MAX);
	}

	/**
	@return the "folded" value of {space, page_no} */
	ulint fold() const
	{
		return(m_fold);
	}

	/**
	Tablespace ID */
	uint32_t		m_space_id;

	/**
	Page number within the space ID */
	uint32_t		m_page_no;

	/**
	Heap number within the page */
	uint32_t		m_heap_no;

	/**
	Hashed key value */
	ulint			m_fold;
};

/**
Create record locks */
class RecLock {
public:

	/**
	@param[in,out] thr	Transaction query thread requesting the record
				lock
	@param[in] index	Index on which record lock requested
	@param[in] rec_id	Record lock tuple {space, page_no, heap_no}
	@param[in] mode		The lock mode */
	RecLock(que_thr_t*	thr,
		dict_index_t*	index,
		const RecID&	rec_id,
		ulint		mode)
		:
		m_thr(thr),
		m_trx(thr_get_trx(thr)),
		m_mode(mode),
		m_index(index),
		m_rec_id(rec_id)
	{
		ut_ad(is_predicate_lock(m_mode));

		init(NULL);
	}

	/**
	@param[in,out] thr	Transaction query thread requesting the record
				lock
	@param[in] index	Index on which record lock requested
	@param[in] block	Buffer page containing record
	@param[in] heap_no	Heap number within the block
	@param[in] mode		The lock mode
	@param[in] prdt		The predicate for the rtree lock */
	RecLock(que_thr_t*	thr,
		dict_index_t*	index,
		const buf_block_t*
				block,
		ulint		heap_no,
		ulint		mode,
		lock_prdt_t*	prdt = NULL)
		:
		m_thr(thr),
		m_trx(thr_get_trx(thr)),
		m_mode(mode),
		m_index(index),
		m_rec_id(block, heap_no)
	{
		btr_assert_not_corrupted(block, index);

		init(block->frame);
	}

	/**
	@param[in] index	Index on which record lock requested
	@param[in] rec_id	Record lock tuple {space, page_no, heap_no}
	@param[in] mode		The lock mode */
	RecLock(dict_index_t*	index,
		const RecID&	rec_id,
		ulint		mode)
		:
		m_thr(),
		m_trx(),
		m_mode(mode),
		m_index(index),
		m_rec_id(rec_id)
	{
		ut_ad(is_predicate_lock(m_mode));

		init(NULL);
	}

	/**
	@param[in] index	Index on which record lock requested
	@param[in] block	Buffer page containing record
	@param[in] heap_no	Heap number withing block
	@param[in] mode		The lock mode */
	RecLock(dict_index_t*	index,
		const buf_block_t*
				block,
		ulint		heap_no,
		ulint		mode)
		:
		m_thr(),
		m_trx(),
		m_mode(mode),
		m_index(index),
		m_rec_id(block, heap_no)
	{
		btr_assert_not_corrupted(block, index);

		init(block->frame);
	}

	/**
	Enqueue a lock wait for a transaction. If it is a high priority
	transaction (cannot rollback) then jump ahead in the record lock wait
	queue and if the transaction at the head of the queue is itself waiting
	roll it back.
	@param[in, out] wait_for	The lock that the the joining
					transaction is waiting for
	@param[in] prdt			Predicate [optional]
	@return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
		DB_SUCCESS_LOCKED_REC; DB_SUCCESS_LOCKED_REC means that
		there was a deadlock, but another transaction was chosen
		as a victim, and we got the lock immediately: no need to
		wait then */
	dberr_t add_to_waitq(
		const lock_t*	wait_for,
		const lock_prdt_t*
				prdt = NULL);

	/**
	Create a lock for a transaction and initialise it.
	@param[in, out] trx		Transaction requesting the new lock
	@param[in] owns_trx_mutex	true if caller owns the trx_t::mutex
	@param[in] prdt			Predicate lock (optional)
	@return new lock instance */
	lock_t* create(
		trx_t*		trx,
		bool		owns_trx_mutex,
		const lock_prdt_t*
				prdt = NULL);
	lock_t* create(
		lock_t* const	c_lock,
		trx_t*		trx,
		bool		owns_trx_mutex,
		const lock_prdt_t*
				prdt = NULL);

	/**
	Check of the lock is on m_rec_id.
	@param[in] lock			Lock to compare with
	@return true if the record lock is on m_rec_id*/
	bool is_on_row(const lock_t* lock) const;

	/**
	Create the lock instance
	@param[in, out] trx	The transaction requesting the lock
	@param[in, out] index	Index on which record lock is required
	@param[in] mode		The lock mode desired
	@param[in] rec_id	The record id
	@param[in] size		Size of the lock + bitmap requested
	@return a record lock instance */
	static lock_t* lock_alloc(
		trx_t*		trx,
		dict_index_t*	index,
		ulint		mode,
		const RecID&	rec_id,
		ulint		size);

private:
	/**
	Enqueue a lock wait for a high priority transaction, jump the record
	lock wait queue and if the transaction at the head of the queue is
	itself waiting roll it back.
	@param[in, out] wait_for	The lock that the joining
					transaction is waiting for
	@param[in] prdt			Predicate for the predicate lock
	@return NULL if the lock was granted */
	lock_t* enqueue_priority(
		const lock_t*	wait_for,
		const lock_prdt_t*
				prdt);

	/*
	@return the record lock size in bytes */
	size_t lock_size() const
	{
		return(m_size);
	}

	/**
	Do some checks and prepare for creating a new record lock */
	void prepare() const;

	/**
	Collect the transactions that will need to be rolled back asynchronously
	@param[in, out] trx	Transaction to be rolled back */
	void mark_trx_for_rollback(trx_t* trx);

	/**
	Add the lock to the head of the record lock {space, page_no} wait
	queue and the transaction's lock list. If the transactions holding
	blocking locks are already marked for termination then they are not
	added to the hit list.
	@param[in, out] lock		Lock being requested
	@param[in] wait_for		The blocking lock
	@param[in] kill_trx		true if the transaction that m_trx
					is waiting for should be killed */
	void jump_queue(lock_t* lock, const lock_t* wait_for, bool kill_trx);

	/**
	Setup the requesting transaction state for lock grant
	@param[in,out] lock	Lock for which to change state */
	void set_wait_state(lock_t* lock);

	/**
	Rollback the transaction that is blocking the requesting transaction
	@param[in, out] lock	The blocking lock */
	void rollback_blocking_trx(lock_t* lock) const;

	/**
	Add the lock to the record lock hash and the transaction's lock list
	@param[in,out] lock	Newly created record lock to add to the
				rec hash and the transaction lock list
	@param[in] add_to_hash	If the lock should be added to the hash table */
	void lock_add(lock_t* lock, bool add_to_hash);

	/**
	Check and resolve any deadlocks
	@param[in, out] lock		The lock being acquired
	@return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
		DB_SUCCESS_LOCKED_REC; DB_SUCCESS_LOCKED_REC means that
		there was a deadlock, but another transaction was chosen
		as a victim, and we got the lock immediately: no need to
		wait then */
	dberr_t deadlock_check(lock_t* lock);

	/**
	Check the outcome of the deadlock check
	@param[in,out] victim_trx	Transaction selected for rollback
	@param[in,out] lock		Lock being requested
	@return DB_LOCK_WAIT, DB_DEADLOCK or DB_SUCCESS_LOCKED_REC */
	dberr_t check_deadlock_result(const trx_t* victim_trx, lock_t* lock);

	/**
	Setup the context from the requirements */
	void init(const page_t* page)
	{
		ut_ad(lock_mutex_own());
		ut_ad(!srv_read_only_mode);
		ut_ad(dict_index_is_clust(m_index)
		      || !dict_index_is_online_ddl(m_index));
		ut_ad(m_thr == NULL || m_trx == thr_get_trx(m_thr));

		m_size = is_predicate_lock(m_mode)
			  ? lock_size(m_mode) : lock_size(page);

		/** If rec is the supremum record, then we reset the
		gap and LOCK_REC_NOT_GAP bits, as all locks on the
		supremum are automatically of the gap type */

		if (m_rec_id.m_heap_no == PAGE_HEAP_NO_SUPREMUM) {
			ut_ad(!(m_mode & LOCK_REC_NOT_GAP));

			m_mode &= ~(LOCK_GAP | LOCK_REC_NOT_GAP);
		}
	}

	/**
	Calculate the record lock physical size required for a predicate lock.
	@param[in] mode For predicate locks the lock mode
	@return the size of the lock data structure required in bytes */
	static size_t lock_size(ulint mode)
	{
		ut_ad(is_predicate_lock(mode));

		/* The lock is always on PAGE_HEAP_NO_INFIMUM(0),
		so we only need 1 bit (which is rounded up to 1
		byte) for lock bit setting */

		size_t	n_bytes;

		if (mode & LOCK_PREDICATE) {
			const ulint	align = UNIV_WORD_SIZE - 1;

			/* We will attach the predicate structure
			after lock. Make sure the memory is
			aligned on 8 bytes, the mem_heap_alloc
			will align it with MEM_SPACE_NEEDED
			anyway. */

			n_bytes = (1 + sizeof(lock_prdt_t) + align) & ~align;

			/* This should hold now */

			ut_ad(n_bytes == sizeof(lock_prdt_t) + UNIV_WORD_SIZE);

		} else {
			n_bytes = 1;
		}

		return(n_bytes);
	}

	/**
	Calculate the record lock physical size required, non-predicate lock.
	@param[in] page		For non-predicate locks the buffer page
	@return the size of the lock data structure required in bytes */
	static size_t lock_size(const page_t* page)
	{
		ulint	n_recs = page_dir_get_n_heap(page);

		/* Make lock bitmap bigger by a safety margin */

		return(1 + ((n_recs + LOCK_PAGE_BITMAP_MARGIN) / 8));
	}

	/**
	@return true if the requested lock mode is for a predicate
		or page lock */
	static bool is_predicate_lock(ulint mode)
	{
		return(mode & (LOCK_PREDICATE | LOCK_PRDT_PAGE));
	}

private:
	/** The query thread of the transaction */
	que_thr_t*		m_thr;

	/**
	Transaction requesting the record lock */
	trx_t*			m_trx;

	/**
	Lock mode requested */
	ulint			m_mode;

	/**
	Size of the record lock in bytes */
	size_t			m_size;

	/**
	Index on which the record lock is required */
	dict_index_t*		m_index;

	/**
	The record lock tuple {space, page_no, heap_no} */
	RecID			m_rec_id;
};

#ifdef UNIV_DEBUG
/** The count of the types of locks. */
static const ulint      lock_types = UT_ARR_SIZE(lock_compatibility_matrix);
#endif /* UNIV_DEBUG */

/*********************************************************************//**
Gets the type of a lock.
@return LOCK_TABLE or LOCK_REC */
UNIV_INLINE
ulint
lock_get_type_low(
/*==============*/
	const lock_t*	lock);	/*!< in: lock */

/*********************************************************************//**
Gets the previous record lock set on a record.
@return previous lock on the same record, NULL if none exists */
const lock_t*
lock_rec_get_prev(
/*==============*/
	const lock_t*	in_lock,/*!< in: record lock */
	ulint		heap_no);/*!< in: heap number of the record */

/*********************************************************************//**
Cancels a waiting lock request and releases possible other transactions
waiting behind it. */
void
lock_cancel_waiting_and_release(
/*============================*/
	lock_t*	lock);	/*!< in/out: waiting lock request */

/*********************************************************************//**
Checks if some transaction has an implicit x-lock on a record in a clustered
index.
@return transaction id of the transaction which has the x-lock, or 0 */
UNIV_INLINE
trx_id_t
lock_clust_rec_some_has_impl(
/*=========================*/
	const rec_t*		rec,	/*!< in: user record */
	const dict_index_t*	index,	/*!< in: clustered index */
	const ulint*		offsets)/*!< in: rec_get_offsets(rec, index) */
	MY_ATTRIBUTE((nonnull, warn_unused_result));

/*********************************************************************//**
Gets the first or next record lock on a page.
@return next lock, NULL if none exists */
UNIV_INLINE
const lock_t*
lock_rec_get_next_on_page_const(
/*============================*/
	const lock_t*	lock);	/*!< in: a record lock */

/*********************************************************************//**
Gets the nth bit of a record lock.
@return TRUE if bit set also if i == ULINT_UNDEFINED return FALSE*/
UNIV_INLINE
ibool
lock_rec_get_nth_bit(
/*=================*/
	const lock_t*	lock,	/*!< in: record lock */
	ulint		i);	/*!< in: index of the bit */

/*********************************************************************//**
Gets the number of bits in a record lock bitmap.
@return number of bits */
UNIV_INLINE
ulint
lock_rec_get_n_bits(
/*================*/
	const lock_t*	lock);	/*!< in: record lock */

/**********************************************************************//**
Sets the nth bit of a record lock to TRUE. */
UNIV_INLINE
void
lock_rec_set_nth_bit(
/*=================*/
	lock_t*	lock,	/*!< in: record lock */
	ulint	i);	/*!< in: index of the bit */

/*********************************************************************//**
Gets the first or next record lock on a page.
@return next lock, NULL if none exists */
UNIV_INLINE
lock_t*
lock_rec_get_next_on_page(
/*======================*/
	lock_t*		lock);		/*!< in: a record lock */
/*********************************************************************//**
Gets the first record lock on a page, where the page is identified by its
file address.
@return first lock, NULL if none exists */
UNIV_INLINE
lock_t*
lock_rec_get_first_on_page_addr(
/*============================*/
	hash_table_t*   lock_hash,	/* Lock hash table */
	ulint           space,		/*!< in: space */
	ulint           page_no);	/*!< in: page number */

/*********************************************************************//**
Gets the first record lock on a page, where the page is identified by a
pointer to it.
@return first lock, NULL if none exists */
UNIV_INLINE
lock_t*
lock_rec_get_first_on_page(
/*=======================*/
	hash_table_t*		lock_hash,	/*!< in: lock hash table */
	const buf_block_t*	block);		/*!< in: buffer block */


/*********************************************************************//**
Gets the next explicit lock request on a record.
@return next lock, NULL if none exists or if heap_no == ULINT_UNDEFINED */
UNIV_INLINE
lock_t*
lock_rec_get_next(
/*==============*/
	ulint	heap_no,/*!< in: heap number of the record */
	lock_t*	lock);	/*!< in: lock */

/*********************************************************************//**
Gets the next explicit lock request on a record.
@return next lock, NULL if none exists or if heap_no == ULINT_UNDEFINED */
UNIV_INLINE
const lock_t*
lock_rec_get_next_const(
/*====================*/
	ulint		heap_no,/*!< in: heap number of the record */
	const lock_t*	lock);	/*!< in: lock */

/*********************************************************************//**
Gets the first explicit lock request on a record.
@return first lock, NULL if none exists */
UNIV_INLINE
lock_t*
lock_rec_get_first(
/*===============*/
	hash_table_t*		hash,	/*!< in: hash chain the lock on */
	const buf_block_t*	block,	/*!< in: block containing the record */
	ulint			heap_no);/*!< in: heap number of the record */

/*********************************************************************//**
Gets the mode of a lock.
@return mode */
UNIV_INLINE
enum lock_mode
lock_get_mode(
/*==========*/
	const lock_t*	lock);	/*!< in: lock */

/*********************************************************************//**
Calculates if lock mode 1 is compatible with lock mode 2.
@return nonzero if mode1 compatible with mode2 */
UNIV_INLINE
ulint
lock_mode_compatible(
/*=================*/
	enum lock_mode	mode1,	/*!< in: lock mode */
	enum lock_mode	mode2);	/*!< in: lock mode */

/*********************************************************************//**
Calculates if lock mode 1 is stronger or equal to lock mode 2.
@return nonzero if mode1 stronger or equal to mode2 */
UNIV_INLINE
ulint
lock_mode_stronger_or_eq(
/*=====================*/
	enum lock_mode	mode1,	/*!< in: lock mode */
	enum lock_mode	mode2);	/*!< in: lock mode */

/*********************************************************************//**
Gets the wait flag of a lock.
@return LOCK_WAIT if waiting, 0 if not */
UNIV_INLINE
ulint
lock_get_wait(
/*==========*/
	const lock_t*	lock);	/*!< in: lock */

/*********************************************************************//**
Looks for a suitable type record lock struct by the same trx on the same page.
This can be used to save space when a new record lock should be set on a page:
no new struct is needed, if a suitable old is found.
@return lock or NULL */
UNIV_INLINE
lock_t*
lock_rec_find_similar_on_page(
/*==========================*/
	ulint		type_mode,	/*!< in: lock type_mode field */
	ulint		heap_no,	/*!< in: heap number of the record */
	lock_t*		lock,		/*!< in: lock_rec_get_first_on_page() */
	const trx_t*	trx);		/*!< in: transaction */

/*********************************************************************//**
Checks if a transaction has the specified table lock, or stronger. This
function should only be called by the thread that owns the transaction.
@return lock or NULL */
UNIV_INLINE
const lock_t*
lock_table_has(
/*===========*/
	const trx_t*		trx,	/*!< in: transaction */
	const dict_table_t*	table,	/*!< in: table */
	enum lock_mode		mode);	/*!< in: lock mode */

#ifndef UNIV_NONINL
#include "lock0priv.ic"
#endif

#endif /* lock0priv_h */