summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/src/docs/devdoc-schema.dox
blob: dd59aa2535b72177248a035b1044bd9fa1e96952 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
/*! @page devdoc-schema Schema Operations

A schema defines the format of the application data in WiredTiger. WiredTiger
supports various types of schemas (See @ref schema for more
information), operated upon through a WT_SESSION reference. This section details
the internals of these various schema operations.

Schema operations cause an update to the metadata and are performed under a
schema lock to avoid concurrent operations on the same object. The following
sequence of steps define a generic schema operation:

@plantuml_start{schema_generic.png}
@startuml{schema_generic.png}
:A schema operation;
partition with-schema-lock {
    :perform operation on underlying data-object;
    :update metadata-file;
    :checkpoint and sync metadata;
}
stop
@enduml
@plantuml_end

@section schema_create Schema Create

The create schema operation is responsible for creating the underlying data
object on the filesystem with the right parameters and then creating an entry
for this new object into the metadata. The sequence of operations involved in a
create for various schema types are as follows:

@plantuml_start{schema_create.png}
@startuml{schema_create.png}
:WT_SESSION->create(.,name,.)
(__session_create());

partition session-API-call {
    :API session init with NULL dhandle;
    :exit if PANIC flag set;
    :exit if invalid configuration;
}

:validate "name" and if passed "type" config parameter;
note right
    "name" parameter is called as "uri" internally.
    "type" is usually not passed and generally
    implied from the uri.
end note

partition with-schema-lock {
    partition with-table-lock {
        :turn on meta tracking;
        :check uri}

        split
            :uri matches "file:"
            ("file" is the underlying
             type for all the objects);
        split again
            :uri matches "colgroup:";
            :__create_colgroup();
        split again
            :uri matches "table:";
            :__create_table();
        split again
            :uri matches "lsm:";
            :__wt_lsm_tree_create();
        split again
            :uri matches "index:";
            :__create_index();
        split again
            :matches a named data-source;
            :__create_data_source();
        end split

        partition __create_file() {
            :exit if file exists;
            :validate allocation size;
            :block manager creates the file:
             1.create file using __wt_open()
             2.write an initial descriptor to file
             3.fsync and close the file handle;
            if (metadata-file?) then (yes)
            else (no)
                :update metadata with file
                 configuration and version;
            endif
            :check if file setup correctly by
             getting btree handle with
             WT_DHANDLE_EXCLUSIVE set;
            if (metatracking on?) then (yes)
                :track locked handle*;
            else (no)
                :release btree -
                 sync and close;
            endif
        }

        partition turn-off-meta-tracking {
            if (errors?) then (yes)
                :unroll operations;
            else (no)
                if (logging?) then (yes)
                    :sync log;
                else (no)
                endif
                :checkpoint and sync;
            endif
            :apply post-commit ops:
             release tracked (handle) btree* -
             sync and close;
            note right
                if meta tracking is on, this btree
                was being tracked as locked. As part
                of tuning off meta tracking, we sync
                and close this btree
            end note
        }
    }
}

:API-end;

stop
@enduml
@plantuml_end

@section schema_rename Schema Rename

The rename schema operation is responsible for renaming the underlying data
object on the filesystem and updating the metadata accordingly. The sequence of
operations involved in a rename for various schema types are as follows:

@plantuml_start{schema_rename.png}
@startuml{schema_rename.png}
:WT_SESSION->rename(old-uri, new-uri, .)
(__session_rename());
:session-API-call;

partition with-checkpoint-lock {
    partition with-schema-lock {
        partition with-table-write-lock {
            :validate new uri-type to match the old type;
            :turn on meta tracking;
            :check uri}

            split
                :uri matches "file:"
                ("file" is the underlying
                 type for all the objects);
            split again
                :uri matches "lsm:";
                :__wt_lsm_tree_rename();
            split again
                :matches a named data-source;
                :WT_DATA_SOURCE::rename();
            split again
                :uri matches "table:";
                partition __rename_table() {
                    :rename colgroups and indices represented by the table:
                     1. extract names from the uri
                     2. create new uri with existing types and configuration
                     3. recursive call the rename operation on individual
                        colgroup and index with the old and the new uri
                     4. remove old entry for colgroups and indices from
                        the metadata table and add the new ones;
                    :close and remove table handle from the session;
                    :remove old table entry from the metadata table
                     and add a new one;
                }
            end split

            partition __rename_file() {
                :fail if backup cursor open and schema operations will conflict;
                :close btree handles in the file;
                :fail if file with the old name doesn't exist or with the new
                 name exists;
                :remove old file entries and add new in the metadata;
                :rename the underlying file;
                if (meta-tracking?) then (yes)
                    :track filesystem op;
                else (no)
                endif
            }

            :bump schema generation number to ignore stale data;

            partition turn-off-meta-tracking {
                if (errors?) then (yes)
                    :unroll operations;
                else (no)
                    if (logging?) then (yes)
                        :sync log;
                    else (no)
                    endif
                    :checkpoint and sync;
                endif
            }
        }
    }
}

:API-end;

stop
@enduml
@plantuml_end

*/