summaryrefslogtreecommitdiff
path: root/subversion/include/private/svn_packed_data.h
blob: 6faf0dd0e11628be7aac99b471da76576a5f6e18 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
/* packed_data.h : Interface to the packed binary stream data structure
 *
 * ====================================================================
 *    Licensed to the Apache Software Foundation (ASF) under one
 *    or more contributor license agreements.  See the NOTICE file
 *    distributed with this work for additional information
 *    regarding copyright ownership.  The ASF licenses this file
 *    to you under the Apache License, Version 2.0 (the
 *    "License"); you may not use this file except in compliance
 *    with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *    Unless required by applicable law or agreed to in writing,
 *    software distributed under the License is distributed on an
 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 *    KIND, either express or implied.  See the License for the
 *    specific language governing permissions and limitations
 *    under the License.
 * ====================================================================
 */

#ifndef SVN_PACKED_DATA_H
#define SVN_PACKED_DATA_H

#include "svn_string.h"
#include "svn_io.h"

#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */

/* This API provides Yet Another Serialization Framework.
 *
 * It is geared towards efficiently encoding collections of structured
 * binary data (e.g. an array of noderev objects).  The basic idea is to
 * transform them into hierarchies of streams with each stream usually
 * corresponding to a single attribute in the original data structures.
 * The user is free model the mapping structure <-> streams mapping as she
 * sees fit.
 *
 * With all data inside the same (sub-)stream carrying similar attribute
 * values, the whole stream lends itself to data compression.  Strings /
 * plain byte sequences will be stored as is.  Numbers use a 7b/8b encoding
 * scheme to eliminate leading zeros.  Because values are often dependent
 * (increasing offsets, roughly similar revision number, etc.), streams
 * can be configured as storing (hopefully shorter) deltas instead of the
 * original value.
 *
 * Two stream types are provided: integer and byte streams.  While the
 * first store 64 bit integers only and can be configured to assume
 * signed and / or deltifyable data, the second will store arbitrary
 * byte sequences including their length.  At the root level, you may
 * create an arbitrary number of integer and byte streams.  Any stream
 * may have an arbitrary number of sub-streams of the same kind.  You
 * should create the full stream hierarchy before writing any data to it.
 *
 * As a convenience, when an integer stream has sub-streams, you may write
 * to the parent stream instead of all sub-streams individually and the
 * values will be passed down automatically in a round-robin fashion.
 * Reading from the parent stream is similarly supported.
 *
 * When all data has been added to the stream, it can be written to an
 * ordinary svn_stream_t.  First, we write a description of the stream
 * structure (types, sub-streams, sizes and configurations) followed by
 * zlib compressed stream content.  For each top-level stream, all sub-
 * stream data will be concatenated and then compressed as a single block.
 * To maximize the effect of this, make sure all data in that stream
 * hierarchy has a similar value distribution.
 *
 * Reading data starts with an svn_stream_t and automatically recreates
 * the stream hierarchies.  You only need to extract data from it in the
 * same order as you wrote it.
 *
 * Although not enforced programmatically, you may either only write to a
 * stream hierarchy or only read from it but you cannot do both on the
 * same data structure.
 */



/* We pack / unpack integers en block to minimize calling and setup overhead.
 * This is the number of integers we put into a buffer before writing them
 * them to / after reading them from the 7b/8b stream.  Under 64 bits, this
 * value creates a 128 byte data structure (14 + 2 integers, 8 bytes each).
 */
#define SVN__PACKED_DATA_BUFFER_SIZE 14


/* Data types. */

/* Opaque type for the root object.
 */
typedef struct svn_packed__data_root_t svn_packed__data_root_t;

/* Opaque type for byte streams.
 */
typedef struct svn_packed__byte_stream_t svn_packed__byte_stream_t;

/* Semi-opaque type for integer streams.  We expose the unpacked buffer
 * to allow for replacing svn_packed__add_uint and friends by macros.
 */
typedef struct svn_packed__int_stream_t
{
  /* pointer to the remainder of the data structure */
  void *private_data;

  /* number of value entries in BUFFER */
  apr_size_t buffer_used;

  /* unpacked integers (either yet to be packed or pre-fetched from the
   * packed buffers).  Only the first BUFFER_USED entries are valid. */
  apr_uint64_t buffer[SVN__PACKED_DATA_BUFFER_SIZE];
} svn_packed__int_stream_t;


/* Writing data. */

/* Return a new serialization root object, allocated in POOL.
 */
svn_packed__data_root_t *
svn_packed__data_create_root(apr_pool_t *pool);

/* Create and return a new top-level integer stream in ROOT.  If signed,
 * negative numbers will be put into that stream, SIGNED_INTS should be
 * TRUE as a more efficient encoding will be used in that case.  Set
 * DIFF to TRUE if you expect the difference between consecutive numbers
 * to be much smaller (~100 times) than the actual numbers.
 */
svn_packed__int_stream_t *
svn_packed__create_int_stream(svn_packed__data_root_t *root,
                              svn_boolean_t diff,
                              svn_boolean_t signed_ints);

/* Create and return a sub-stream to the existing integer stream PARENT.
 * If signed, negative numbers will be put into that stream, SIGNED_INTS
 * should be TRUE as a more efficient encoding will be used in that case.
 * Set DIFF to TRUE if you expect the difference between consecutive numbers
 * to be much smaller (~100 times) than the actual numbers.
 */
svn_packed__int_stream_t *
svn_packed__create_int_substream(svn_packed__int_stream_t *parent,
                                 svn_boolean_t diff,
                                 svn_boolean_t signed_ints);

/* Create and return a new top-level byte sequence stream in ROOT.
 */
svn_packed__byte_stream_t *
svn_packed__create_bytes_stream(svn_packed__data_root_t *root);

/* Write the unsigned integer VALUE to STEAM.
 */
void
svn_packed__add_uint(svn_packed__int_stream_t *stream,
                     apr_uint64_t value);

/* Write the signed integer VALUE to STEAM.
 */
void
svn_packed__add_int(svn_packed__int_stream_t *stream,
                    apr_int64_t value);

/* Write the sequence stating at DATA containing LEN bytes to STEAM.
 */
void
svn_packed__add_bytes(svn_packed__byte_stream_t *stream,
                      const char *data,
                      apr_size_t len);

/* Write all contents of ROOT (including all sub-streams) to STREAM.
 * Use SCRATCH_POOL for temporary allocations.
 */
svn_error_t *
svn_packed__data_write(svn_stream_t *stream,
                       svn_packed__data_root_t *root,
                       apr_pool_t *scratch_pool);


/* Reading data. */

/* Return the first integer stream in ROOT.  Returns NULL in case there
 * aren't any.
 */
svn_packed__int_stream_t *
svn_packed__first_int_stream(svn_packed__data_root_t *root);

/* Return the first byte sequence stream in ROOT.  Returns NULL in case
 * there aren't any.
 */
svn_packed__byte_stream_t *
svn_packed__first_byte_stream(svn_packed__data_root_t *root);

/* Return the next (sibling) integer stream to STREAM.  Returns NULL in
 * case there isn't any.
 */
svn_packed__int_stream_t *
svn_packed__next_int_stream(svn_packed__int_stream_t *stream);

/* Return the next (sibling) byte sequence stream to STREAM.  Returns NULL
 * in case there isn't any.
 */
svn_packed__byte_stream_t *
svn_packed__next_byte_stream(svn_packed__byte_stream_t *stream);

/* Return the first sub-stream of STREAM.  Returns NULL in case there
 * isn't any.
 */
svn_packed__int_stream_t *
svn_packed__first_int_substream(svn_packed__int_stream_t *stream);

/* Return the number of integers left to read from STREAM.
 */
apr_size_t
svn_packed__int_count(svn_packed__int_stream_t *stream);

/* Return the number of bytes left to read from STREAM.
 */
apr_size_t
svn_packed__byte_count(svn_packed__byte_stream_t *stream);

/* Return the next number from STREAM as unsigned integer.  Returns 0 when
 * reading beyond the end of the stream.
 */
apr_uint64_t
svn_packed__get_uint(svn_packed__int_stream_t *stream);

/* Return the next number from STREAM as signed integer.  Returns 0 when
 * reading beyond the end of the stream.
 */
apr_int64_t
svn_packed__get_int(svn_packed__int_stream_t *stream);

/* Return the next byte sequence from STREAM and set *LEN to the length
 * of that sequence.  Sets *LEN to 0 when reading beyond the end of the
 * stream.
 */
const char *
svn_packed__get_bytes(svn_packed__byte_stream_t *stream,
                      apr_size_t *len);

/* Allocate a new packed data root in RESULT_POOL, read its structure and
 * stream contents from STREAM and return it in *ROOT_P.  Use SCRATCH_POOL
 * for temporary allocations.
 */
svn_error_t *
svn_packed__data_read(svn_packed__data_root_t **root_p,
                      svn_stream_t *stream,
                      apr_pool_t *result_pool,
                      apr_pool_t *scratch_pool);

#ifdef __cplusplus
}
#endif /* __cplusplus */

#endif /* SVN_PACKED_DATA_H */