summaryrefslogtreecommitdiff
path: root/contrib/hstore/hstore_compat.c
blob: 6364f032f98bac8bb8a2cabb20bf6c530ecc5183 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
/*
 * contrib/hstore/hstore_compat.c
 *
 * Notes on old/new hstore format disambiguation.
 *
 * There are three formats to consider:
 * 1) old contrib/hstore (referred to as hstore-old)
 * 2) prerelease pgfoundry hstore
 * 3) new contrib/hstore
 *
 * (2) and (3) are identical except for the HS_FLAG_NEWVERSION
 * bit, which is set in (3) but not (2).
 *
 * Values that are already in format (3), or which are
 * unambiguously in format (2), are handled by the first
 * "return immediately" test in hstoreUpgrade().
 *
 * To stress a point: we ONLY get here with possibly-ambiguous
 * values if we're doing some sort of in-place migration from an
 * old prerelease pgfoundry hstore-new; and we explicitly don't
 * support that without fixing up any potentially padded values
 * first. Most of the code here is serious overkill, but the
 * performance penalty isn't serious (especially compared to the
 * palloc() that we have to do anyway) and the belt-and-braces
 * validity checks provide some reassurance. (If for some reason
 * we get a value that would have worked on the old code, but
 * which would be botched by the conversion code, the validity
 * checks will fail it first so we get an error rather than bad
 * data.)
 *
 * Note also that empty hstores are the same in (2) and (3), so
 * there are some special-case paths for them.
 *
 * We tell the difference between formats (2) and (3) as follows (but
 * note that there are some edge cases where we can't tell; see
 * comments in hstoreUpgrade):
 *
 * First, since there must be at least one entry, we look at
 * how the bits line up. The new format looks like:
 *
 * 10kkkkkkkkkkkkkkkkkkkkkkkkkkkkkk  (k..k = keylen)
 * 0nvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv  (v..v = keylen+vallen)
 *
 * The old format looks like one of these, depending on endianness
 * and bitfield layout: (k..k = keylen, v..v = vallen, p..p = pos,
 * n = isnull)
 *
 * kkkkkkkkkkkkkkkkvvvvvvvvvvvvvvvv
 * nppppppppppppppppppppppppppppppp
 *
 * kkkkkkkkkkkkkkkkvvvvvvvvvvvvvvvv
 * pppppppppppppppppppppppppppppppn
 *
 * vvvvvvvvvvvvvvvvkkkkkkkkkkkkkkkk
 * nppppppppppppppppppppppppppppppp
 *
 * vvvvvvvvvvvvvvvvkkkkkkkkkkkkkkkk
 * pppppppppppppppppppppppppppppppn   (usual i386 format)
 *
 * If the entry is in old format, for the first entry "pos" must be 0.
 * We can obviously see that either keylen or vallen must be >32768
 * for there to be any ambiguity (which is why lengths less than that
 * are fasttracked in hstore.h) Since "pos"==0, the "v" field in the
 * new-format interpretation can only be 0 or 1, which constrains all
 * but three bits of the old-format's k and v fields. But in addition
 * to all of this, the data length implied by the keylen and vallen
 * must fit in the varlena size. So the only ambiguous edge case for
 * hstores with only one entry occurs between a new-format entry with
 * an excess (~32k) of padding, and an old-format entry. But we know
 * which format to use in that case based on how we were compiled, so
 * no actual data corruption can occur.
 *
 * If there is more than one entry, the requirement that keys do not
 * decrease in length, and that positions increase contiguously, and
 * that the end of the data not be beyond the end of the varlena
 * itself, disambiguates in almost all other cases. There is a small
 * set of ambiguous cases which could occur if the old-format value
 * has a large excess of padding and just the right pattern of key
 * sizes, but these are also handled based on how we were compiled.
 *
 * The otherwise undocumented function hstore_version_diag is provided
 * for testing purposes.
 */
#include "postgres.h"


#include "hstore.h"

/*
 * This is the structure used for entries in the old contrib/hstore
 * implementation. Notice that this is the same size as the new entry
 * (two 32-bit words per key/value pair) and that the header is the
 * same, so the old and new versions of ARRPTR, STRPTR, CALCDATASIZE
 * etc. are compatible.
 *
 * If the above statement isn't true on some bizarre platform, we're
 * a bit hosed (see StaticAssertStmt in hstoreValidOldFormat).
 */
typedef struct
{
	uint16		keylen;
	uint16		vallen;
	uint32
				valisnull:1,
				pos:31;
} HOldEntry;

static int	hstoreValidNewFormat(HStore *hs);
static int	hstoreValidOldFormat(HStore *hs);


/*
 * Validity test for a new-format hstore.
 *	0 = not valid
 *	1 = valid but with "slop" in the length
 *	2 = exactly valid
 */
static int
hstoreValidNewFormat(HStore *hs)
{
	int			count = HS_COUNT(hs);
	HEntry	   *entries = ARRPTR(hs);
	int			buflen = (count) ? HSE_ENDPOS(entries[2 * (count) - 1]) : 0;
	int			vsize = CALCDATASIZE(count, buflen);
	int			i;

	if (hs->size_ & HS_FLAG_NEWVERSION)
		return 2;

	if (count == 0)
		return 2;

	if (!HSE_ISFIRST(entries[0]))
		return 0;

	if (vsize > VARSIZE(hs))
		return 0;

	/* entry position must be nondecreasing */

	for (i = 1; i < 2 * count; ++i)
	{
		if (HSE_ISFIRST(entries[i])
			|| (HSE_ENDPOS(entries[i]) < HSE_ENDPOS(entries[i - 1])))
			return 0;
	}

	/* key length must be nondecreasing and keys must not be null */

	for (i = 1; i < count; ++i)
	{
		if (HS_KEYLEN(entries, i) < HS_KEYLEN(entries, i - 1))
			return 0;
		if (HSE_ISNULL(entries[2 * i]))
			return 0;
	}

	if (vsize != VARSIZE(hs))
		return 1;

	return 2;
}

/*
 * Validity test for an old-format hstore.
 *	0 = not valid
 *	1 = valid but with "slop" in the length
 *	2 = exactly valid
 */
static int
hstoreValidOldFormat(HStore *hs)
{
	int			count = hs->size_;
	HOldEntry  *entries = (HOldEntry *) ARRPTR(hs);
	int			vsize;
	int			lastpos = 0;
	int			i;

	if (hs->size_ & HS_FLAG_NEWVERSION)
		return 0;

	/* New format uses an HEntry for key and another for value */
	StaticAssertStmt(sizeof(HOldEntry) == 2 * sizeof(HEntry),
					 "old hstore format is not upward-compatible");

	if (count == 0)
		return 2;

	if (count > 0xFFFFFFF)
		return 0;

	if (CALCDATASIZE(count, 0) > VARSIZE(hs))
		return 0;

	if (entries[0].pos != 0)
		return 0;

	/* key length must be nondecreasing */

	for (i = 1; i < count; ++i)
	{
		if (entries[i].keylen < entries[i - 1].keylen)
			return 0;
	}

	/*
	 * entry position must be strictly increasing, except for the first entry
	 * (which can be ""=>"" and thus zero-length); and all entries must be
	 * properly contiguous
	 */

	for (i = 0; i < count; ++i)
	{
		if (entries[i].pos != lastpos)
			return 0;
		lastpos += (entries[i].keylen
					+ ((entries[i].valisnull) ? 0 : entries[i].vallen));
	}

	vsize = CALCDATASIZE(count, lastpos);

	if (vsize > VARSIZE(hs))
		return 0;

	if (vsize != VARSIZE(hs))
		return 1;

	return 2;
}


/*
 * hstoreUpgrade: PG_DETOAST_DATUM plus support for conversion of old hstores
 */
HStore *
hstoreUpgrade(Datum orig)
{
	HStore	   *hs = (HStore *) PG_DETOAST_DATUM(orig);
	int			valid_new;
	int			valid_old;
	bool		writable;

	/* Return immediately if no conversion needed */
	if ((hs->size_ & HS_FLAG_NEWVERSION) ||
		hs->size_ == 0 ||
		(VARSIZE(hs) < 32768 && HSE_ISFIRST((ARRPTR(hs)[0]))))
		return hs;

	valid_new = hstoreValidNewFormat(hs);
	valid_old = hstoreValidOldFormat(hs);
	/* Do we have a writable copy? */
	writable = ((void *) hs != (void *) DatumGetPointer(orig));

	if (!valid_old || hs->size_ == 0)
	{
		if (valid_new)
		{
			/*
			 * force the "new version" flag and the correct varlena length,
			 * but only if we have a writable copy already (which we almost
			 * always will, since short new-format values won't come through
			 * here)
			 */
			if (writable)
			{
				HS_SETCOUNT(hs, HS_COUNT(hs));
				HS_FIXSIZE(hs, HS_COUNT(hs));
			}
			return hs;
		}
		else
		{
			elog(ERROR, "invalid hstore value found");
		}
	}

	/*
	 * this is the tricky edge case. It is only possible in some quite extreme
	 * cases (the hstore must have had a lot of wasted padding space at the
	 * end). But the only way a "new" hstore value could get here is if we're
	 * upgrading in place from a pre-release version of hstore-new (NOT
	 * contrib/hstore), so we work off the following assumptions: 1. If you're
	 * moving from old contrib/hstore to hstore-new, you're required to fix up
	 * any potential conflicts first, e.g. by running ALTER TABLE ... USING
	 * col::text::hstore; on all hstore columns before upgrading. 2. If you're
	 * moving from old contrib/hstore to new contrib/hstore, then "new" values
	 * are impossible here 3. If you're moving from pre-release hstore-new to
	 * hstore-new, then "old" values are impossible here 4. If you're moving
	 * from pre-release hstore-new to new contrib/hstore, you're not doing so
	 * as an in-place upgrade, so there is no issue So the upshot of all this
	 * is that we can treat all the edge cases as "new" if we're being built
	 * as hstore-new, and "old" if we're being built as contrib/hstore.
	 *
	 * XXX the WARNING can probably be downgraded to DEBUG1 once this has been
	 * beta-tested. But for now, it would be very useful to know if anyone can
	 * actually reach this case in a non-contrived setting.
	 */

	if (valid_new)
	{
#if HSTORE_IS_HSTORE_NEW
		elog(WARNING, "ambiguous hstore value resolved as hstore-new");

		/*
		 * force the "new version" flag and the correct varlena length, but
		 * only if we have a writable copy already (which we almost always
		 * will, since short new-format values won't come through here)
		 */
		if (writable)
		{
			HS_SETCOUNT(hs, HS_COUNT(hs));
			HS_FIXSIZE(hs, HS_COUNT(hs));
		}
		return hs;
#else
		elog(WARNING, "ambiguous hstore value resolved as hstore-old");
#endif
	}

	/*
	 * must have an old-style value. Overwrite it in place as a new-style one,
	 * making sure we have a writable copy first.
	 */

	if (!writable)
		hs = (HStore *) PG_DETOAST_DATUM_COPY(orig);

	{
		int			count = hs->size_;
		HEntry	   *new_entries = ARRPTR(hs);
		HOldEntry  *old_entries = (HOldEntry *) ARRPTR(hs);
		int			i;

		for (i = 0; i < count; ++i)
		{
			uint32		pos = old_entries[i].pos;
			uint32		keylen = old_entries[i].keylen;
			uint32		vallen = old_entries[i].vallen;
			bool		isnull = old_entries[i].valisnull;

			if (isnull)
				vallen = 0;

			new_entries[2 * i].entry = (pos + keylen) & HENTRY_POSMASK;
			new_entries[2 * i + 1].entry = (((pos + keylen + vallen) & HENTRY_POSMASK)
											| ((isnull) ? HENTRY_ISNULL : 0));
		}

		if (count)
			new_entries[0].entry |= HENTRY_ISFIRST;
		HS_SETCOUNT(hs, count);
		HS_FIXSIZE(hs, count);
	}

	return hs;
}


PG_FUNCTION_INFO_V1(hstore_version_diag);
Datum
hstore_version_diag(PG_FUNCTION_ARGS)
{
	HStore	   *hs = (HStore *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
	int			valid_new = hstoreValidNewFormat(hs);
	int			valid_old = hstoreValidOldFormat(hs);

	PG_RETURN_INT32(valid_old * 10 + valid_new);
}