summaryrefslogtreecommitdiff
path: root/docs/programmer_reference/stl_persistence.html
blob: 32d99b5f554c05b9e6fd46291ba362da775f2d9a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
  <head>
    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
    <title>Dbstl persistence</title>
    <link rel="stylesheet" href="gettingStarted.css" type="text/css" />
    <meta name="generator" content="DocBook XSL Stylesheets V1.73.2" />
    <link rel="start" href="index.html" title="Berkeley DB Programmer's Reference Guide" />
    <link rel="up" href="stl.html" title="Chapter 7. Standard Template Library API" />
    <link rel="prev" href="stl_complex_rw.html" title="Store and Retrieve data or objects of complex types" />
    <link rel="next" href="stl_container_specific.html" title="Dbstl container specific notes" />
  </head>
  <body>
    <div xmlns="" class="navheader">
      <div class="libver">
        <p>Library Version 12.1.6.1</p>
      </div>
      <table width="100%" summary="Navigation header">
        <tr>
          <th colspan="3" align="center">Dbstl persistence</th>
        </tr>
        <tr>
          <td width="20%" align="left"><a accesskey="p" href="stl_complex_rw.html">Prev</a> </td>
          <th width="60%" align="center">Chapter 7. Standard Template Library API</th>
          <td width="20%" align="right"> <a accesskey="n" href="stl_container_specific.html">Next</a></td>
        </tr>
      </table>
      <hr />
    </div>
    <div class="sect1" lang="en" xml:lang="en">
      <div class="titlepage">
        <div>
          <div>
            <h2 class="title" style="clear: both"><a id="stl_persistence"></a>Dbstl persistence</h2>
          </div>
        </div>
      </div>
      <div class="toc">
        <dl>
          <dt>
            <span class="sect2">
              <a href="stl_persistence.html#directdbget">Direct database get</a>
            </span>
          </dt>
          <dt>
            <span class="sect2">
              <a href="stl_persistence.html#chg_persistence">Change persistence</a>
            </span>
          </dt>
          <dt>
            <span class="sect2">
              <a href="stl_persistence.html#obj_life_persistence">Object life time and persistence </a>
            </span>
          </dt>
        </dl>
      </div>
      <p> 
        The following sections provide information on how to
        achieve persistence using dbstl. 
    </p>
      <div class="sect2" lang="en" xml:lang="en">
        <div class="titlepage">
          <div>
            <div>
              <h3 class="title"><a id="directdbget"></a>Direct database get</h3>
            </div>
          </div>
        </div>
        <p>
            Each container has a <span class="bold"><strong>begin()</strong></span> method 
            which produces an iterator.
            These <span class="bold"><strong>begin</strong></span> methods take
            a boolean parameter, <span class="bold"><strong>directdb_get</strong></span>, which 
            controls the caching behavior of the iterator. The default value of this
            parameter is <code class="literal">true</code>.
        </p>
        <p> 
            If <span class="bold"><strong>directdb_get</strong></span> is
            <code class="literal">true</code>, then the persistent object is
            fetched anew from the database each time the iterator is
            dereferenced as a pointer by use of the star-operator
            (<span class="bold"><strong>*iterator</strong></span>) or by use
            of the arrow-operator (<span class="bold"><strong>iterator-&gt;member</strong></span>). 
            If <span class="bold"><strong>directdb_get</strong></span> is
            <code class="literal">false</code>, then the first dereferencing
            of the iterator fetches the object from the database, but
            later dereferences can return cached data. 
        </p>
        <p> 
            With <span class="bold"><strong>directdb_get</strong></span> set
            to <code class="literal">true</code>, if you call: 
        </p>
        <pre class="programlisting">(*iterator).datamember1=new-value1; 
(*iterator).datamember2=new-value2;  </pre>
        <p>
            then the assignment to <code class="literal">datamember1</code>
            will be lost, because the second dereferencing of the
            iterator would cause the cached copy of the object to be
            overwritten by the object's persistent data from the
            database. 
        </p>
        <p>
            You also can use the arrow operator like this: 
        </p>
        <pre class="programlisting">iterator-&gt;datamember1=new-value1; 
iterator-&gt;datamember2=new-value2;  </pre>
        <p>
            This works exactly the same way as <span class="bold"><strong>iterator::operator*</strong></span>. For this
            reason, the same caching rules apply to arrow operators as
            they do for star operators.
        </p>
        <p> 
            One way to avoid this problem is to create a reference
            to the object, and use it to access the object: 
        </p>
        <pre class="programlisting">container::value_type &amp;ref = *iterator;
ref.datamember1=new-value1;
ref.datamember2=new-value2;
...// more member function calls and datamember assignments
ref._DB_STL_StoreElement();  </pre>
        <p> 
            The above code will not lose the newly assigned value
            of <code class="literal">ref.datamember1</code> in the way that the
            previous example did.
        </p>
        <p> 
            In order to avoid these complications, you can assign
            to the object referenced by an iterator with another
            object of the same type like this: 
        </p>
        <pre class="programlisting">container::value_type obj2;
obj2.datamember1 = new-value1;
obj2.datamember2 = new-value2;
*itr = obj2; </pre>
        <p>
            This code snippet causes the new values in
            <code class="literal">obj2</code> to be stored into the
            underlying database.
        </p>
        <p> 
            If you have two iterators going through the same
            container like this: 
        </p>
        <pre class="programlisting">for (iterator1 = v.begin(), iterator2 = v.begin();
     iterator1 != v.end();
     ++iterator1, ++iterator2) {
        *iterator1 = new_value;
        print(*iterator2);
}  </pre>
        <p> 
            then the printed value will depend on the value of
            <span class="bold"><strong>directdb_get</strong></span> with
            which the iterator had been created. If <span class="bold"><strong>directdb_get</strong></span> is
            <code class="literal">false</code>, then the original,
            persistent value is printed; otherwise the newly assigned
            value is returned from the cache when
            <code class="literal">iterator2</code> is dereferenced. This
            happens because each iterator has its own cached copy of
            the persistent object, and the dereferencing of
            <code class="literal">iterator2</code> refreshes
            <code class="literal">iterator2</code>'s copy from the database,
            retrieving the value stored by the assignment to
            <code class="literal">*iterator1</code>.
        </p>
        <p>
            Alternatively, you can set <span class="bold"><strong>directdb_get </strong></span> to
            <code class="literal">false</code> and call <code class="methodname">iterator2-&gt;refresh()</code>
            immediately before the dereferencing of
            <code class="literal">iterator2</code>, so that
            <code class="literal">iterator2</code>'s cached value is
            refreshed.
        </p>
        <p> 
            If <span class="bold"><strong>directdb_get</strong></span> is
            <code class="literal">false</code>, a few of the tests in
            dbstl's test kit will fail. This is because the above
            contrived case appears in several of C++ STL tests.
            Consequently, the default value of the <span class="bold"><strong>directdb_get</strong></span> parameter in the
            <code class="methodname">container::begin()</code> methods is
            <code class="literal">true</code>. If your use cases avoid such
            bizarre usage of iterators, you can set it to
            <code class="literal">false</code>, which makes the iterator
            read operation faster. 
        </p>
      </div>
      <div class="sect2" lang="en" xml:lang="en">
        <div class="titlepage">
          <div>
            <div>
              <h3 class="title"><a id="chg_persistence"></a>Change persistence</h3>
            </div>
          </div>
        </div>
        <p>
            If you modify the object to which an iterator refers by
            using one of the following: 
        </p>
        <pre class="programlisting">(*iterator).member_function_call()</pre>
        <p>
            or 
        </p>
        <pre class="programlisting">(*iterator).data_member = new_value</pre>
        <p>
            then you should call
            <code class="methodname">iterator-&gt;_DB_STL_StoreElement()</code>
            to store the change. Otherwise the change is lost after
            the iterator moves on to other elements. 
        </p>
        <p>
            If you are storing a sequence, and you modified some
            part of it, you should also call
            <code class="methodname">iterator-&gt;_DB_STL_StoreElement()</code>
            before moving the iterator.
        </p>
        <p>
            And in both cases, if <span class="bold"><strong>directdb_get</strong></span> is 
            <code class="literal">true</code>
            (this is the default value), you should call
            <code class="methodname">_DB_STL_StoreElement()</code> after
            the change and before the next iterator movement OR the
            next dereferencing of the iterator by the star or arrow
            operators (<code class="literal">iterator::operator*</code> or
            <code class="literal">iterator::operator-&gt;</code>).
            Otherwise, you will lose the change.
        </p>
        <p> 
            If you update the element by assigning to a
            dereferenced iterator like this: 
        </p>
        <pre class="programlisting">*iterator = new_element;</pre>
        <p>
            then you never have to call
            <code class="methodname">_DB_STL_StoreElement()</code>
            because the change is stored in the database
            automatically. 
        </p>
      </div>
      <div class="sect2" lang="en" xml:lang="en">
        <div class="titlepage">
          <div>
            <div>
              <h3 class="title"><a id="obj_life_persistence"></a>Object life time and persistence </h3>
            </div>
          </div>
        </div>
        <p> 
            Dbstl is an interface to Berkeley DB, so it is used to
            store data persistently. This is really a different
            purpose from that of regular C++ STL. This difference in
            their goals has implications on expected object lifetime:
            In standard STL, when you store an object A of type ID
            into C++ stl vector V using V.push_back(A), if a proper
            copy constructor is provided in A's class type, then the
            copy of A (call it B) and everything in B, such as another
            object C pointed to by B's data member B.c_ptr, will be
            stored in V and will live as long as B is still in V and V
            is alive. B will be destroyed when V is destroyed or B is
            erased from V. 
        </p>
        <p>
            This is not true for dbstl, which will copy A's data
            and store it in the underlying database. The copy is by
            default a shallow copy, but users can register their
            object marshalling and unmarshalling functions using the
            <code class="classname">DbstlElemTraits</code> class template.
            So if A is passed to a <code class="classname">db_vector</code>
            container, <code class="literal">dv</code>, by using
            <code class="literal">dv.push_back(A)</code>, then dbstl copies
            A's data using the registered functions, and stores data
            into the underlying database. Consequently, A will be
            valid, even if the container is destroyed, because it is
            stored into the database.
        </p>
        <p> 
            If the copy is simply a shallow copy, and A is later
            destroyed, then the pointer stored in the database will
            become invalid. The next time we use the retrieved object,
            we will be using an invalid pointer, which probably will
            result in errors. To avoid this, store the referred object
            C rather than the pointer member A.c_ptr itself, by
            registering the right marshalling/unmarshalling function
            with <code class="classname">DbstlElemTraits</code>.
        </p>
        <p>
            For example, consider the following example class
            declaration:
        </p>
        <pre class="programlisting">class ID
{
public:
    string Name;
    int Score;
};  </pre>
        <p>
            Here, the class ID has a data member <span class="bold"><strong>Name</strong></span>, which refers to a memory
            address of the actual characters in the string. If we
            simply shallow copy an object, <code class="literal">id</code>, of
            class ID to store it, then the stored data,
            <code class="literal">idd</code>, is invalid when
            <code class="literal">id</code> is destroyed. This is because
            <code class="literal">idd</code> and <code class="literal">id</code> refer
            to a common memory address which is the base address of
            the memory space storing all characters in the string, and
            this memory space is released when <code class="literal">id</code>
            is destroyed. So <code class="literal">idd</code> will be referring
            to an invalid address. The next time we retrieve
            <code class="literal">idd</code> and use it, there will probably
            be memory corruption.
        </p>
        <p>
            The way to store <code class="literal">id</code> is to write a
            marshal/unmarshal function pair like this: 
        </p>
        <pre class="programlisting">void copy_id(void *dest, const ID&amp;elem)
{
	memcpy(dest, &amp;elem.Score, sizeof(elem.Score));
	char *p = ((char *)dest) + sizeof(elem.Score);
	strcpy(p, elem.Name.c_str());
}

void restore_id(ID&amp; dest, const void *srcdata)
{
	memcpy(&amp;dest.Score, srcdata, sizeof(dest.Score));
	const char *p = ((char *)srcdata) + sizeof(dest.Score);
	dest.Name = p;
}

size_t size_id(const ID&amp; elem)
{
	return sizeof(elem.Score) + elem.Name.size() + 
	    1;// store the '\0' char.
}  </pre>
        <p>
            Then register the above functions before storing any
            instance of <code class="classname">ID</code>: 
        </p>
        <pre class="programlisting">DbstlElemTraits&lt;ID&gt;::instance()-&gt;set_copy_function(copy_id);
DbstlElemTraits&lt;ID&gt;::instance()-&gt;set_size_function(size_id);
DbstlElemTraits&lt;ID&gt;::instance()-&gt;set_restore_function(restore_id);  </pre>
        <p>
            This way, the actual data of instances of ID are
            stored, and so the data will persist even if the container
            itself is destroyed. 
        </p>
      </div>
    </div>
    <div class="navfooter">
      <hr />
      <table width="100%" summary="Navigation footer">
        <tr>
          <td width="40%" align="left"><a accesskey="p" href="stl_complex_rw.html">Prev</a> </td>
          <td width="20%" align="center">
            <a accesskey="u" href="stl.html">Up</a>
          </td>
          <td width="40%" align="right"> <a accesskey="n" href="stl_container_specific.html">Next</a></td>
        </tr>
        <tr>
          <td width="40%" align="left" valign="top">Store and Retrieve data or
        objects of complex types  </td>
          <td width="20%" align="center">
            <a accesskey="h" href="index.html">Home</a>
          </td>
          <td width="40%" align="right" valign="top"> Dbstl container specific
        notes</td>
        </tr>
      </table>
    </div>
  </body>
</html>