summaryrefslogtreecommitdiff
path: root/subversion/libsvn_fs_fs/cached_data.c
diff options
context:
space:
mode:
Diffstat (limited to 'subversion/libsvn_fs_fs/cached_data.c')
-rw-r--r--subversion/libsvn_fs_fs/cached_data.c3502
1 files changed, 3502 insertions, 0 deletions
diff --git a/subversion/libsvn_fs_fs/cached_data.c b/subversion/libsvn_fs_fs/cached_data.c
new file mode 100644
index 0000000..6581a6c
--- /dev/null
+++ b/subversion/libsvn_fs_fs/cached_data.c
@@ -0,0 +1,3502 @@
+/* cached_data.c --- cached (read) access to FSFS data
+ *
+ * ====================================================================
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ * ====================================================================
+ */
+
+#include "cached_data.h"
+
+#include <assert.h>
+
+#include "svn_hash.h"
+#include "svn_ctype.h"
+#include "svn_sorts.h"
+#include "private/svn_delta_private.h"
+#include "private/svn_io_private.h"
+#include "private/svn_sorts_private.h"
+#include "private/svn_subr_private.h"
+#include "private/svn_temp_serializer.h"
+
+#include "fs_fs.h"
+#include "id.h"
+#include "index.h"
+#include "low_level.h"
+#include "pack.h"
+#include "util.h"
+#include "temp_serializer.h"
+
+#include "../libsvn_fs/fs-loader.h"
+#include "../libsvn_delta/delta.h" /* for SVN_DELTA_WINDOW_SIZE */
+
+#include "svn_private_config.h"
+
+/* forward-declare. See implementation for the docstring */
+static svn_error_t *
+block_read(void **result,
+ svn_fs_t *fs,
+ svn_revnum_t revision,
+ apr_uint64_t item_index,
+ svn_fs_fs__revision_file_t *revision_file,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool);
+
+
+/* Defined this to enable access logging via dgb__log_access
+#define SVN_FS_FS__LOG_ACCESS
+ */
+
+/* When SVN_FS_FS__LOG_ACCESS has been defined, write a line to console
+ * showing where REVISION, ITEM_INDEX is located in FS and use ITEM to
+ * show details on it's contents if not NULL. To support format 6 and
+ * earlier repos, ITEM_TYPE (SVN_FS_FS__ITEM_TYPE_*) must match ITEM.
+ * Use SCRATCH_POOL for temporary allocations.
+ *
+ * For pre-format7 repos, the display will be restricted.
+ */
+static svn_error_t *
+dbg_log_access(svn_fs_t *fs,
+ svn_revnum_t revision,
+ apr_uint64_t item_index,
+ void *item,
+ apr_uint32_t item_type,
+ apr_pool_t *scratch_pool)
+{
+ /* no-op if this macro is not defined */
+#ifdef SVN_FS_FS__LOG_ACCESS
+ fs_fs_data_t *ffd = fs->fsap_data;
+ apr_off_t end_offset = 0;
+ svn_fs_fs__p2l_entry_t *entry = NULL;
+ static const char *types[] = {"<n/a>", "frep ", "drep ", "fprop", "dprop",
+ "node ", "chgs ", "rep "};
+ const char *description = "";
+ const char *type = types[item_type];
+ const char *pack = "";
+ apr_off_t offset;
+ svn_fs_fs__revision_file_t *rev_file;
+
+ SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, revision,
+ scratch_pool));
+
+ /* determine rev / pack file offset */
+ SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rev_file, revision, NULL,
+ item_index, scratch_pool));
+
+ /* constructing the pack file description */
+ if (revision < ffd->min_unpacked_rev)
+ pack = apr_psprintf(scratch_pool, "%4ld|",
+ revision / ffd->max_files_per_dir);
+
+ /* construct description if possible */
+ if (item_type == SVN_FS_FS__ITEM_TYPE_NODEREV && item != NULL)
+ {
+ node_revision_t *node = item;
+ const char *data_rep
+ = node->data_rep
+ ? apr_psprintf(scratch_pool, " d=%ld/%" APR_UINT64_T_FMT,
+ node->data_rep->revision,
+ node->data_rep->item_index)
+ : "";
+ const char *prop_rep
+ = node->prop_rep
+ ? apr_psprintf(scratch_pool, " p=%ld/%" APR_UINT64_T_FMT,
+ node->prop_rep->revision,
+ node->prop_rep->item_index)
+ : "";
+ description = apr_psprintf(scratch_pool, "%s (pc=%d%s%s)",
+ node->created_path,
+ node->predecessor_count,
+ data_rep,
+ prop_rep);
+ }
+ else if (item_type == SVN_FS_FS__ITEM_TYPE_ANY_REP)
+ {
+ svn_fs_fs__rep_header_t *header = item;
+ if (header == NULL)
+ description = " (txdelta window)";
+ else if (header->type == svn_fs_fs__rep_plain)
+ description = " PLAIN";
+ else if (header->type == svn_fs_fs__rep_self_delta)
+ description = " DELTA";
+ else
+ description = apr_psprintf(scratch_pool,
+ " DELTA against %ld/%" APR_UINT64_T_FMT,
+ header->base_revision,
+ header->base_item_index);
+ }
+ else if (item_type == SVN_FS_FS__ITEM_TYPE_CHANGES && item != NULL)
+ {
+ apr_array_header_t *changes = item;
+ switch (changes->nelts)
+ {
+ case 0: description = " no change";
+ break;
+ case 1: description = " 1 change";
+ break;
+ default: description = apr_psprintf(scratch_pool, " %d changes",
+ changes->nelts);
+ }
+ }
+
+ /* some info is only available in format7 repos */
+ if (svn_fs_fs__use_log_addressing(fs))
+ {
+ /* reverse index lookup: get item description in ENTRY */
+ SVN_ERR(svn_fs_fs__p2l_entry_lookup(&entry, fs, rev_file, revision,
+ offset, scratch_pool));
+ if (entry)
+ {
+ /* more details */
+ end_offset = offset + entry->size;
+ type = types[entry->type];
+ }
+
+ /* line output */
+ printf("%5s%4lx:%04lx -%4lx:%04lx %s %7ld %5"APR_UINT64_T_FMT" %s\n",
+ pack, (long)(offset / ffd->block_size),
+ (long)(offset % ffd->block_size),
+ (long)(end_offset / ffd->block_size),
+ (long)(end_offset % ffd->block_size),
+ type, revision, item_index, description);
+ }
+ else
+ {
+ /* reduced logging for format 6 and earlier */
+ printf("%5s%10" APR_UINT64_T_HEX_FMT " %s %7ld %7" APR_UINT64_T_FMT \
+ " %s\n",
+ pack, (apr_uint64_t)(offset), type, revision, item_index,
+ description);
+ }
+
+#endif
+
+ return SVN_NO_ERROR;
+}
+
+/* Convenience wrapper around svn_io_file_aligned_seek, taking filesystem
+ FS instead of a block size. */
+static svn_error_t *
+aligned_seek(svn_fs_t *fs,
+ apr_file_t *file,
+ apr_off_t *buffer_start,
+ apr_off_t offset,
+ apr_pool_t *pool)
+{
+ fs_fs_data_t *ffd = fs->fsap_data;
+ return svn_error_trace(svn_io_file_aligned_seek(file, ffd->block_size,
+ buffer_start, offset,
+ pool));
+}
+
+/* Open the revision file for revision REV in filesystem FS and store
+ the newly opened file in FILE. Seek to location OFFSET before
+ returning. Perform temporary allocations in POOL. */
+static svn_error_t *
+open_and_seek_revision(svn_fs_fs__revision_file_t **file,
+ svn_fs_t *fs,
+ svn_revnum_t rev,
+ apr_uint64_t item,
+ apr_pool_t *pool)
+{
+ svn_fs_fs__revision_file_t *rev_file;
+ apr_off_t offset = -1;
+
+ SVN_ERR(svn_fs_fs__ensure_revision_exists(rev, fs, pool));
+
+ SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, rev, pool, pool));
+ SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rev_file, rev, NULL, item,
+ pool));
+
+ SVN_ERR(aligned_seek(fs, rev_file->file, NULL, offset, pool));
+
+ *file = rev_file;
+
+ return SVN_NO_ERROR;
+}
+
+/* Open the representation REP for a node-revision in filesystem FS, seek
+ to its position and store the newly opened file in FILE. Perform
+ temporary allocations in POOL. */
+static svn_error_t *
+open_and_seek_transaction(svn_fs_fs__revision_file_t **file,
+ svn_fs_t *fs,
+ representation_t *rep,
+ apr_pool_t *pool)
+{
+ apr_off_t offset;
+
+ SVN_ERR(svn_fs_fs__open_proto_rev_file(file, fs, &rep->txn_id, pool, pool));
+
+ SVN_ERR(svn_fs_fs__item_offset(&offset, fs, NULL, SVN_INVALID_REVNUM,
+ &rep->txn_id, rep->item_index, pool));
+ SVN_ERR(aligned_seek(fs, (*file)->file, NULL, offset, pool));
+
+ return SVN_NO_ERROR;
+}
+
+/* Given a node-id ID, and a representation REP in filesystem FS, open
+ the correct file and seek to the correction location. Store this
+ file in *FILE_P. Perform any allocations in POOL. */
+static svn_error_t *
+open_and_seek_representation(svn_fs_fs__revision_file_t **file_p,
+ svn_fs_t *fs,
+ representation_t *rep,
+ apr_pool_t *pool)
+{
+ if (! svn_fs_fs__id_txn_used(&rep->txn_id))
+ return open_and_seek_revision(file_p, fs, rep->revision, rep->item_index,
+ pool);
+ else
+ return open_and_seek_transaction(file_p, fs, rep, pool);
+}
+
+
+
+static svn_error_t *
+err_dangling_id(svn_fs_t *fs, const svn_fs_id_t *id)
+{
+ svn_string_t *id_str = svn_fs_fs__id_unparse(id, fs->pool);
+ return svn_error_createf
+ (SVN_ERR_FS_ID_NOT_FOUND, 0,
+ _("Reference to non-existent node '%s' in filesystem '%s'"),
+ id_str->data, fs->path);
+}
+
+/* Return TRUE, if FS is of a format that supports block-read and the
+ feature has been enabled. */
+static svn_boolean_t
+use_block_read(svn_fs_t *fs)
+{
+ fs_fs_data_t *ffd = fs->fsap_data;
+ return svn_fs_fs__use_log_addressing(fs) && ffd->use_block_read;
+}
+
+/* Get the node-revision for the node ID in FS.
+ Set *NODEREV_P to the new node-revision structure, allocated in POOL.
+ See svn_fs_fs__get_node_revision, which wraps this and adds another
+ error. */
+static svn_error_t *
+get_node_revision_body(node_revision_t **noderev_p,
+ svn_fs_t *fs,
+ const svn_fs_id_t *id,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
+{
+ svn_error_t *err;
+ svn_boolean_t is_cached = FALSE;
+ fs_fs_data_t *ffd = fs->fsap_data;
+
+ if (svn_fs_fs__id_is_txn(id))
+ {
+ apr_file_t *file;
+
+ /* This is a transaction node-rev. Its storage logic is very
+ different from that of rev / pack files. */
+ err = svn_io_file_open(&file,
+ svn_fs_fs__path_txn_node_rev(fs, id,
+ scratch_pool),
+ APR_READ | APR_BUFFERED, APR_OS_DEFAULT,
+ scratch_pool);
+ if (err)
+ {
+ if (APR_STATUS_IS_ENOENT(err->apr_err))
+ {
+ svn_error_clear(err);
+ return svn_error_trace(err_dangling_id(fs, id));
+ }
+
+ return svn_error_trace(err);
+ }
+
+ SVN_ERR(svn_fs_fs__read_noderev(noderev_p,
+ svn_stream_from_aprfile2(file,
+ FALSE,
+ scratch_pool),
+ result_pool, scratch_pool));
+ }
+ else
+ {
+ svn_fs_fs__revision_file_t *revision_file;
+
+ /* noderevs in rev / pack files can be cached */
+ const svn_fs_fs__id_part_t *rev_item = svn_fs_fs__id_rev_item(id);
+ pair_cache_key_t key = { 0 };
+ key.revision = rev_item->revision;
+ key.second = rev_item->number;
+
+ /* Not found or not applicable. Try a noderev cache lookup.
+ * If that succeeds, we are done here. */
+ if (ffd->node_revision_cache)
+ {
+ SVN_ERR(svn_cache__get((void **) noderev_p,
+ &is_cached,
+ ffd->node_revision_cache,
+ &key,
+ result_pool));
+ if (is_cached)
+ return SVN_NO_ERROR;
+ }
+
+ /* read the data from disk */
+ SVN_ERR(open_and_seek_revision(&revision_file, fs,
+ rev_item->revision,
+ rev_item->number,
+ scratch_pool));
+
+ if (use_block_read(fs))
+ {
+ /* block-read will parse the whole block and will also return
+ the one noderev that we need right now. */
+ SVN_ERR(block_read((void **)noderev_p, fs,
+ rev_item->revision,
+ rev_item->number,
+ revision_file,
+ result_pool,
+ scratch_pool));
+ }
+ else
+ {
+ /* physical addressing mode reading, parsing and caching */
+ SVN_ERR(svn_fs_fs__read_noderev(noderev_p,
+ revision_file->stream,
+ result_pool,
+ scratch_pool));
+
+ /* Workaround issue #4031: is-fresh-txn-root in revision files. */
+ (*noderev_p)->is_fresh_txn_root = FALSE;
+
+ /* The noderev is not in cache, yet. Add it, if caching has been enabled. */
+ if (ffd->node_revision_cache)
+ SVN_ERR(svn_cache__set(ffd->node_revision_cache,
+ &key,
+ *noderev_p,
+ scratch_pool));
+ }
+
+ SVN_ERR(svn_fs_fs__close_revision_file(revision_file));
+ }
+
+ return SVN_NO_ERROR;
+}
+
+svn_error_t *
+svn_fs_fs__get_node_revision(node_revision_t **noderev_p,
+ svn_fs_t *fs,
+ const svn_fs_id_t *id,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
+{
+ const svn_fs_fs__id_part_t *rev_item = svn_fs_fs__id_rev_item(id);
+
+ svn_error_t *err = get_node_revision_body(noderev_p, fs, id,
+ result_pool, scratch_pool);
+ if (err && err->apr_err == SVN_ERR_FS_CORRUPT)
+ {
+ svn_string_t *id_string = svn_fs_fs__id_unparse(id, scratch_pool);
+ return svn_error_createf(SVN_ERR_FS_CORRUPT, err,
+ "Corrupt node-revision '%s'",
+ id_string->data);
+ }
+
+ SVN_ERR(dbg_log_access(fs,
+ rev_item->revision,
+ rev_item->number,
+ *noderev_p,
+ SVN_FS_FS__ITEM_TYPE_NODEREV,
+ scratch_pool));
+
+ return svn_error_trace(err);
+}
+
+
+/* Given a revision file REV_FILE, opened to REV in FS, find the Node-ID
+ of the header located at OFFSET and store it in *ID_P. Allocate
+ temporary variables from POOL. */
+static svn_error_t *
+get_fs_id_at_offset(svn_fs_id_t **id_p,
+ svn_fs_fs__revision_file_t *rev_file,
+ svn_fs_t *fs,
+ svn_revnum_t rev,
+ apr_off_t offset,
+ apr_pool_t *pool)
+{
+ node_revision_t *noderev;
+
+ SVN_ERR(aligned_seek(fs, rev_file->file, NULL, offset, pool));
+ SVN_ERR(svn_fs_fs__read_noderev(&noderev,
+ rev_file->stream,
+ pool, pool));
+
+ /* noderev->id is const, get rid of that */
+ *id_p = svn_fs_fs__id_copy(noderev->id, pool);
+
+ /* assert that the txn_id is REV
+ * (asserting on offset would be harder because we the rev_offset is not
+ * known here) */
+ assert(svn_fs_fs__id_rev(*id_p) == rev);
+
+ return SVN_NO_ERROR;
+}
+
+
+/* Given an open revision file REV_FILE in FS for REV, locate the trailer that
+ specifies the offset to the root node-id and to the changed path
+ information. Store the root node offset in *ROOT_OFFSET and the
+ changed path offset in *CHANGES_OFFSET. If either of these
+ pointers is NULL, do nothing with it.
+
+ Allocate temporary variables from POOL. */
+static svn_error_t *
+get_root_changes_offset(apr_off_t *root_offset,
+ apr_off_t *changes_offset,
+ svn_fs_fs__revision_file_t *rev_file,
+ svn_fs_t *fs,
+ svn_revnum_t rev,
+ apr_pool_t *pool)
+{
+ fs_fs_data_t *ffd = fs->fsap_data;
+ apr_off_t rev_offset;
+ apr_seek_where_t seek_relative;
+ svn_stringbuf_t *trailer;
+ char buffer[64];
+ apr_off_t start;
+ apr_off_t end;
+ apr_size_t len;
+
+ /* Determine where to seek to in the file.
+
+ If we've got a pack file, we want to seek to the end of the desired
+ revision. But we don't track that, so we seek to the beginning of the
+ next revision.
+
+ Unless the next revision is in a different file, in which case, we can
+ just seek to the end of the pack file -- just like we do in the
+ non-packed case. */
+ if (rev_file->is_packed && ((rev + 1) % ffd->max_files_per_dir != 0))
+ {
+ SVN_ERR(svn_fs_fs__get_packed_offset(&end, fs, rev + 1, pool));
+ seek_relative = APR_SET;
+ }
+ else
+ {
+ seek_relative = APR_END;
+ end = 0;
+ }
+
+ /* Offset of the revision from the start of the pack file, if applicable. */
+ if (rev_file->is_packed)
+ SVN_ERR(svn_fs_fs__get_packed_offset(&rev_offset, fs, rev, pool));
+ else
+ rev_offset = 0;
+
+ /* We will assume that the last line containing the two offsets
+ will never be longer than 64 characters. */
+ SVN_ERR(svn_io_file_seek(rev_file->file, seek_relative, &end, pool));
+
+ if (end < sizeof(buffer))
+ {
+ len = (apr_size_t)end;
+ start = 0;
+ }
+ else
+ {
+ len = sizeof(buffer);
+ start = end - sizeof(buffer);
+ }
+
+ /* Read in this last block, from which we will identify the last line. */
+ SVN_ERR(aligned_seek(fs, rev_file->file, NULL, start, pool));
+ SVN_ERR(svn_io_file_read_full2(rev_file->file, buffer, len, NULL, NULL,
+ pool));
+
+ /* Parse the last line. */
+ trailer = svn_stringbuf_ncreate(buffer, len, pool);
+ SVN_ERR(svn_fs_fs__parse_revision_trailer(root_offset,
+ changes_offset,
+ trailer,
+ rev));
+
+ /* return absolute offsets */
+ if (root_offset)
+ *root_offset += rev_offset;
+ if (changes_offset)
+ *changes_offset += rev_offset;
+
+ return SVN_NO_ERROR;
+}
+
+svn_error_t *
+svn_fs_fs__rev_get_root(svn_fs_id_t **root_id_p,
+ svn_fs_t *fs,
+ svn_revnum_t rev,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
+{
+ fs_fs_data_t *ffd = fs->fsap_data;
+ SVN_ERR(svn_fs_fs__ensure_revision_exists(rev, fs, scratch_pool));
+
+ if (svn_fs_fs__use_log_addressing(fs))
+ {
+ *root_id_p = svn_fs_fs__id_create_root(rev, result_pool);
+ }
+ else
+ {
+ svn_fs_fs__revision_file_t *revision_file;
+ apr_off_t root_offset;
+ svn_fs_id_t *root_id = NULL;
+ svn_boolean_t is_cached;
+
+ SVN_ERR(svn_cache__get((void **) root_id_p, &is_cached,
+ ffd->rev_root_id_cache, &rev, result_pool));
+ if (is_cached)
+ return SVN_NO_ERROR;
+
+ SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&revision_file, fs, rev,
+ scratch_pool, scratch_pool));
+ SVN_ERR(get_root_changes_offset(&root_offset, NULL,
+ revision_file, fs, rev,
+ scratch_pool));
+
+ SVN_ERR(get_fs_id_at_offset(&root_id, revision_file, fs, rev,
+ root_offset, result_pool));
+
+ SVN_ERR(svn_fs_fs__close_revision_file(revision_file));
+
+ SVN_ERR(svn_cache__set(ffd->rev_root_id_cache, &rev, root_id,
+ scratch_pool));
+
+ *root_id_p = root_id;
+ }
+
+ return SVN_NO_ERROR;
+}
+
+/* Describes a lazily opened rev / pack file. Instances will be shared
+ between multiple instances of rep_state_t. */
+typedef struct shared_file_t
+{
+ /* The opened file. NULL while file is not open, yet. */
+ svn_fs_fs__revision_file_t *rfile;
+
+ /* file system to open the file in */
+ svn_fs_t *fs;
+
+ /* a revision contained in the FILE. Since this file may be shared,
+ that value may be different from REP_STATE_T->REVISION. */
+ svn_revnum_t revision;
+
+ /* pool to use when creating the FILE. This guarantees that the file
+ remains open / valid beyond the respective local context that required
+ the file to be opened eventually. */
+ apr_pool_t *pool;
+} shared_file_t;
+
+/* Represents where in the current svndiff data block each
+ representation is. */
+typedef struct rep_state_t
+{
+ /* shared lazy-open rev/pack file structure */
+ shared_file_t *sfile;
+ /* The txdelta window cache to use or NULL. */
+ svn_cache__t *raw_window_cache;
+ /* Caches raw (unparsed) windows. May be NULL. */
+ svn_cache__t *window_cache;
+ /* Caches un-deltified windows. May be NULL. */
+ svn_cache__t *combined_cache;
+ /* revision containing the representation */
+ svn_revnum_t revision;
+ /* representation's item index in REVISION */
+ apr_uint64_t item_index;
+ /* length of the header at the start of the rep.
+ 0 iff this is rep is stored in a container
+ (i.e. does not have a header) */
+ apr_size_t header_size;
+ apr_off_t start; /* The starting offset for the raw
+ svndiff/plaintext data minus header.
+ -1 if the offset is yet unknown. */
+ apr_off_t current;/* The current offset relative to START. */
+ apr_off_t size; /* The on-disk size of the representation. */
+ int ver; /* If a delta, what svndiff version?
+ -1 for unknown delta version. */
+ int chunk_index; /* number of the window to read */
+} rep_state_t;
+
+/* Simple wrapper around svn_fs_fs__get_file_offset to simplify callers. */
+static svn_error_t *
+get_file_offset(apr_off_t *offset,
+ rep_state_t *rs,
+ apr_pool_t *pool)
+{
+ return svn_error_trace(svn_fs_fs__get_file_offset(offset,
+ rs->sfile->rfile->file,
+ pool));
+}
+
+/* Simple wrapper around svn_io_file_aligned_seek to simplify callers. */
+static svn_error_t *
+rs_aligned_seek(rep_state_t *rs,
+ apr_off_t *buffer_start,
+ apr_off_t offset,
+ apr_pool_t *pool)
+{
+ fs_fs_data_t *ffd = rs->sfile->fs->fsap_data;
+ return svn_error_trace(svn_io_file_aligned_seek(rs->sfile->rfile->file,
+ ffd->block_size,
+ buffer_start, offset,
+ pool));
+}
+
+/* Open FILE->FILE and FILE->STREAM if they haven't been opened, yet. */
+static svn_error_t*
+auto_open_shared_file(shared_file_t *file)
+{
+ if (file->rfile == NULL)
+ SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&file->rfile, file->fs,
+ file->revision, file->pool,
+ file->pool));
+
+ return SVN_NO_ERROR;
+}
+
+/* Set RS->START to the begin of the representation raw in RS->FILE->FILE,
+ if that hasn't been done yet. Use POOL for temporary allocations. */
+static svn_error_t*
+auto_set_start_offset(rep_state_t *rs, apr_pool_t *pool)
+{
+ if (rs->start == -1)
+ {
+ SVN_ERR(svn_fs_fs__item_offset(&rs->start, rs->sfile->fs,
+ rs->sfile->rfile, rs->revision, NULL,
+ rs->item_index, pool));
+ rs->start += rs->header_size;
+ }
+
+ return SVN_NO_ERROR;
+}
+
+/* Set RS->VER depending on what is found in the already open RS->FILE->FILE
+ if the diff version is still unknown. Use POOL for temporary allocations.
+ */
+static svn_error_t*
+auto_read_diff_version(rep_state_t *rs, apr_pool_t *pool)
+{
+ if (rs->ver == -1)
+ {
+ char buf[4];
+ SVN_ERR(rs_aligned_seek(rs, NULL, rs->start, pool));
+ SVN_ERR(svn_io_file_read_full2(rs->sfile->rfile->file, buf,
+ sizeof(buf), NULL, NULL, pool));
+
+ /* ### Layering violation */
+ if (! ((buf[0] == 'S') && (buf[1] == 'V') && (buf[2] == 'N')))
+ return svn_error_create
+ (SVN_ERR_FS_CORRUPT, NULL,
+ _("Malformed svndiff data in representation"));
+ rs->ver = buf[3];
+
+ rs->chunk_index = 0;
+ rs->current = 4;
+ }
+
+ return SVN_NO_ERROR;
+}
+
+/* See create_rep_state, which wraps this and adds another error. */
+static svn_error_t *
+create_rep_state_body(rep_state_t **rep_state,
+ svn_fs_fs__rep_header_t **rep_header,
+ shared_file_t **shared_file,
+ representation_t *rep,
+ svn_fs_t *fs,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
+{
+ fs_fs_data_t *ffd = fs->fsap_data;
+ rep_state_t *rs = apr_pcalloc(result_pool, sizeof(*rs));
+ svn_fs_fs__rep_header_t *rh;
+ svn_boolean_t is_cached = FALSE;
+ apr_uint64_t estimated_window_storage;
+
+ /* If the hint is
+ * - given,
+ * - refers to a valid revision,
+ * - refers to a packed revision,
+ * - as does the rep we want to read, and
+ * - refers to the same pack file as the rep
+ * we can re-use the same, already open file object
+ */
+ svn_boolean_t reuse_shared_file
+ = shared_file && *shared_file && (*shared_file)->rfile
+ && SVN_IS_VALID_REVNUM((*shared_file)->revision)
+ && (*shared_file)->revision < ffd->min_unpacked_rev
+ && rep->revision < ffd->min_unpacked_rev
+ && ( ((*shared_file)->revision / ffd->max_files_per_dir)
+ == (rep->revision / ffd->max_files_per_dir));
+
+ pair_cache_key_t key;
+ key.revision = rep->revision;
+ key.second = rep->item_index;
+
+ /* continue constructing RS and RA */
+ rs->size = rep->size;
+ rs->revision = rep->revision;
+ rs->item_index = rep->item_index;
+ rs->raw_window_cache = ffd->raw_window_cache;
+ rs->ver = -1;
+ rs->start = -1;
+
+ /* Very long files stored as self-delta will produce a huge number of
+ delta windows. Don't cache them lest we don't thrash the cache.
+ Since we don't know the depth of the delta chain, let's assume, the
+ whole contents get rewritten 3 times.
+ */
+ estimated_window_storage
+ = 4 * ( (rep->expanded_size ? rep->expanded_size : rep->size)
+ + SVN_DELTA_WINDOW_SIZE);
+ estimated_window_storage = MIN(estimated_window_storage, APR_SIZE_MAX);
+
+ rs->window_cache = ffd->txdelta_window_cache
+ && svn_cache__is_cachable(ffd->txdelta_window_cache,
+ (apr_size_t)estimated_window_storage)
+ ? ffd->txdelta_window_cache
+ : NULL;
+ rs->combined_cache = ffd->combined_window_cache
+ && svn_cache__is_cachable(ffd->combined_window_cache,
+ (apr_size_t)estimated_window_storage)
+ ? ffd->combined_window_cache
+ : NULL;
+
+ /* cache lookup, i.e. skip reading the rep header if possible */
+ if (ffd->rep_header_cache && !svn_fs_fs__id_txn_used(&rep->txn_id))
+ SVN_ERR(svn_cache__get((void **) &rh, &is_cached,
+ ffd->rep_header_cache, &key, result_pool));
+
+ /* initialize the (shared) FILE member in RS */
+ if (reuse_shared_file)
+ {
+ rs->sfile = *shared_file;
+ }
+ else
+ {
+ shared_file_t *file = apr_pcalloc(result_pool, sizeof(*file));
+ file->revision = rep->revision;
+ file->pool = result_pool;
+ file->fs = fs;
+ rs->sfile = file;
+
+ /* remember the current file, if suggested by the caller */
+ if (shared_file)
+ *shared_file = file;
+ }
+
+ /* read rep header, if necessary */
+ if (!is_cached)
+ {
+ /* ensure file is open and navigate to the start of rep header */
+ if (reuse_shared_file)
+ {
+ apr_off_t offset;
+
+ /* ... we can re-use the same, already open file object.
+ * This implies that we don't read from a txn.
+ */
+ rs->sfile = *shared_file;
+ SVN_ERR(auto_open_shared_file(rs->sfile));
+ SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rs->sfile->rfile,
+ rep->revision, NULL, rep->item_index,
+ scratch_pool));
+ SVN_ERR(rs_aligned_seek(rs, NULL, offset, scratch_pool));
+ }
+ else
+ {
+ /* otherwise, create a new file object. May or may not be
+ * an in-txn file.
+ */
+ SVN_ERR(open_and_seek_representation(&rs->sfile->rfile, fs, rep,
+ result_pool));
+ }
+
+ SVN_ERR(svn_fs_fs__read_rep_header(&rh, rs->sfile->rfile->stream,
+ result_pool, scratch_pool));
+ SVN_ERR(get_file_offset(&rs->start, rs, result_pool));
+
+ /* populate the cache if appropriate */
+ if (! svn_fs_fs__id_txn_used(&rep->txn_id))
+ {
+ if (use_block_read(fs))
+ SVN_ERR(block_read(NULL, fs, rep->revision, rep->item_index,
+ rs->sfile->rfile, result_pool, scratch_pool));
+ else
+ if (ffd->rep_header_cache)
+ SVN_ERR(svn_cache__set(ffd->rep_header_cache, &key, rh,
+ scratch_pool));
+ }
+ }
+
+ /* finalize */
+ SVN_ERR(dbg_log_access(fs, rep->revision, rep->item_index, rh,
+ SVN_FS_FS__ITEM_TYPE_ANY_REP, scratch_pool));
+
+ rs->header_size = rh->header_size;
+ *rep_state = rs;
+ *rep_header = rh;
+
+ if (rh->type == svn_fs_fs__rep_plain)
+ /* This is a plaintext, so just return the current rep_state. */
+ return SVN_NO_ERROR;
+
+ /* skip "SVNx" diff marker */
+ rs->current = 4;
+
+ return SVN_NO_ERROR;
+}
+
+/* Read the rep args for REP in filesystem FS and create a rep_state
+ for reading the representation. Return the rep_state in *REP_STATE
+ and the rep header in *REP_HEADER, both allocated in POOL.
+
+ When reading multiple reps, i.e. a skip delta chain, you may provide
+ non-NULL SHARED_FILE. (If SHARED_FILE is not NULL, in the first
+ call it should be a pointer to NULL.) The function will use this
+ variable to store the previous call results and tries to re-use it.
+ This may result in significant savings in I/O for packed files and
+ number of open file handles.
+ */
+static svn_error_t *
+create_rep_state(rep_state_t **rep_state,
+ svn_fs_fs__rep_header_t **rep_header,
+ shared_file_t **shared_file,
+ representation_t *rep,
+ svn_fs_t *fs,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
+{
+ svn_error_t *err = create_rep_state_body(rep_state, rep_header,
+ shared_file, rep, fs,
+ result_pool, scratch_pool);
+ if (err && err->apr_err == SVN_ERR_FS_CORRUPT)
+ {
+ fs_fs_data_t *ffd = fs->fsap_data;
+ const char *rep_str;
+
+ /* ### This always returns "-1" for transaction reps, because
+ ### this particular bit of code doesn't know if the rep is
+ ### stored in the protorev or in the mutable area (for props
+ ### or dir contents). It is pretty rare for FSFS to *read*
+ ### from the protorev file, though, so this is probably OK.
+ ### And anyone going to debug corruption errors is probably
+ ### going to jump straight to this comment anyway! */
+ rep_str = rep
+ ? svn_fs_fs__unparse_representation
+ (rep, ffd->format, TRUE, scratch_pool, scratch_pool)->data
+ : "(null)";
+
+ return svn_error_createf(SVN_ERR_FS_CORRUPT, err,
+ "Corrupt representation '%s'",
+ rep_str);
+ }
+ /* ### Call representation_string() ? */
+ return svn_error_trace(err);
+}
+
+svn_error_t *
+svn_fs_fs__check_rep(representation_t *rep,
+ svn_fs_t *fs,
+ void **hint,
+ apr_pool_t *scratch_pool)
+{
+ if (svn_fs_fs__use_log_addressing(fs))
+ {
+ apr_off_t offset;
+ svn_fs_fs__p2l_entry_t *entry;
+ svn_fs_fs__revision_file_t *rev_file = NULL;
+
+ /* Reuse the revision file provided by *HINT, if it is given and
+ * actually the rev / pack file that we want. */
+ svn_revnum_t start_rev = svn_fs_fs__packed_base_rev(fs, rep->revision);
+ if (hint)
+ rev_file = *(svn_fs_fs__revision_file_t **)hint;
+
+ if (rev_file == NULL || rev_file->start_revision != start_rev)
+ SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, rep->revision,
+ scratch_pool, scratch_pool));
+
+ if (hint)
+ *hint = rev_file;
+
+ /* This will auto-retry if there was a background pack. */
+ SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rev_file, rep->revision,
+ NULL, rep->item_index, scratch_pool));
+
+ /* This may fail if there is a background pack operation (can't auto-
+ retry because the item offset lookup has to be redone as well). */
+ SVN_ERR(svn_fs_fs__p2l_entry_lookup(&entry, fs, rev_file,
+ rep->revision, offset,
+ scratch_pool, scratch_pool));
+
+ if ( entry == NULL
+ || entry->type < SVN_FS_FS__ITEM_TYPE_FILE_REP
+ || entry->type > SVN_FS_FS__ITEM_TYPE_DIR_PROPS)
+ return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
+ _("No representation found at offset %s "
+ "for item %s in revision %ld"),
+ apr_off_t_toa(scratch_pool, offset),
+ apr_psprintf(scratch_pool,
+ "%" APR_UINT64_T_FMT,
+ rep->item_index),
+ rep->revision);
+ }
+ else
+ {
+ rep_state_t *rs;
+ svn_fs_fs__rep_header_t *rep_header;
+
+ /* ### Should this be using read_rep_line() directly? */
+ SVN_ERR(create_rep_state(&rs, &rep_header, (shared_file_t**)hint,
+ rep, fs, scratch_pool, scratch_pool));
+ }
+
+ return SVN_NO_ERROR;
+}
+
+svn_error_t *
+svn_fs_fs__rep_chain_length(int *chain_length,
+ int *shard_count,
+ representation_t *rep,
+ svn_fs_t *fs,
+ apr_pool_t *scratch_pool)
+{
+ fs_fs_data_t *ffd = fs->fsap_data;
+ svn_revnum_t shard_size = ffd->max_files_per_dir
+ ? ffd->max_files_per_dir
+ : 1;
+ apr_pool_t *subpool = svn_pool_create(scratch_pool);
+ apr_pool_t *iterpool = svn_pool_create(scratch_pool);
+ svn_boolean_t is_delta = FALSE;
+ int count = 0;
+ int shards = 1;
+ svn_revnum_t last_shard = rep->revision / shard_size;
+
+ /* Check whether the length of the deltification chain is acceptable.
+ * Otherwise, shared reps may form a non-skipping delta chain in
+ * extreme cases. */
+ representation_t base_rep = *rep;
+
+ /* re-use open files between iterations */
+ shared_file_t *file_hint = NULL;
+
+ svn_fs_fs__rep_header_t *header;
+
+ /* follow the delta chain towards the end but for at most
+ * MAX_CHAIN_LENGTH steps. */
+ do
+ {
+ rep_state_t *rep_state;
+
+ svn_pool_clear(iterpool);
+
+ if (base_rep.revision / shard_size != last_shard)
+ {
+ last_shard = base_rep.revision / shard_size;
+ ++shards;
+ }
+
+ SVN_ERR(create_rep_state_body(&rep_state,
+ &header,
+ &file_hint,
+ &base_rep,
+ fs,
+ subpool,
+ iterpool));
+
+ base_rep.revision = header->base_revision;
+ base_rep.item_index = header->base_item_index;
+ base_rep.size = header->base_length;
+ svn_fs_fs__id_txn_reset(&base_rep.txn_id);
+ is_delta = header->type == svn_fs_fs__rep_delta;
+
+ /* Clear it the SUBPOOL once in a while. Doing it too frequently
+ * renders the FILE_HINT ineffective. Doing too infrequently, may
+ * leave us with too many open file handles.
+ *
+ * Note that this is mostly about efficiency, with larger values
+ * being more efficient, and any non-zero value is legal here. When
+ * reading deltified contents, we may keep 10s of rev files open at
+ * the same time and the system has to cope with that. Thus, the
+ * limit of 16 chosen below is in the same ballpark.
+ */
+ ++count;
+ if (count % 16 == 0)
+ {
+ file_hint = NULL;
+ svn_pool_clear(subpool);
+ }
+ }
+ while (is_delta && base_rep.revision);
+
+ *chain_length = count;
+ *shard_count = shards;
+ svn_pool_destroy(subpool);
+ svn_pool_destroy(iterpool);
+
+ return SVN_NO_ERROR;
+}
+
+struct rep_read_baton
+{
+ /* The FS from which we're reading. */
+ svn_fs_t *fs;
+
+ /* Representation to read. */
+ representation_t rep;
+
+ /* If not NULL, this is the base for the first delta window in rs_list */
+ svn_stringbuf_t *base_window;
+
+ /* The state of all prior delta representations. */
+ apr_array_header_t *rs_list;
+
+ /* The plaintext state, if there is a plaintext. */
+ rep_state_t *src_state;
+
+ /* The index of the current delta chunk, if we are reading a delta. */
+ int chunk_index;
+
+ /* The buffer where we store undeltified data. */
+ char *buf;
+ apr_size_t buf_pos;
+ apr_size_t buf_len;
+
+ /* A checksum context for summing the data read in order to verify it.
+ Note: we don't need to use the sha1 checksum because we're only doing
+ data verification, for which md5 is perfectly safe. */
+ svn_checksum_ctx_t *md5_checksum_ctx;
+
+ svn_boolean_t checksum_finalized;
+
+ /* The stored checksum of the representation we are reading, its
+ length, and the amount we've read so far. Some of this
+ information is redundant with rs_list and src_state, but it's
+ convenient for the checksumming code to have it here. */
+ unsigned char md5_digest[APR_MD5_DIGESTSIZE];
+
+ svn_filesize_t len;
+ svn_filesize_t off;
+
+ /* The key for the fulltext cache for this rep, if there is a
+ fulltext cache. */
+ pair_cache_key_t fulltext_cache_key;
+ /* The text we've been reading, if we're going to cache it. */
+ svn_stringbuf_t *current_fulltext;
+
+ /* If not NULL, attempt to read the data from this cache.
+ Once that lookup fails, reset it to NULL. */
+ svn_cache__t *fulltext_cache;
+
+ /* Bytes delivered from the FULLTEXT_CACHE so far. If the next
+ lookup fails, we need to skip that much data from the reconstructed
+ window stream before we continue normal operation. */
+ svn_filesize_t fulltext_delivered;
+
+ /* Used for temporary allocations during the read. */
+ apr_pool_t *pool;
+
+ /* Pool used to store file handles and other data that is persistant
+ for the entire stream read. */
+ apr_pool_t *filehandle_pool;
+};
+
+/* Set window key in *KEY to address the window described by RS.
+ For convenience, return the KEY. */
+static window_cache_key_t *
+get_window_key(window_cache_key_t *key, rep_state_t *rs)
+{
+ assert(rs->revision <= APR_UINT32_MAX);
+ key->revision = (apr_uint32_t)rs->revision;
+ key->item_index = rs->item_index;
+ key->chunk_index = rs->chunk_index;
+
+ return key;
+}
+
+/* Implement svn_cache__partial_getter_func_t for raw txdelta windows.
+ * Parse the raw data and return a svn_fs_fs__txdelta_cached_window_t.
+ */
+static svn_error_t *
+parse_raw_window(void **out,
+ const void *data,
+ apr_size_t data_len,
+ void *baton,
+ apr_pool_t *result_pool)
+{
+ svn_string_t raw_window;
+ svn_stream_t *stream;
+
+ /* unparsed and parsed window */
+ const svn_fs_fs__raw_cached_window_t *window
+ = (const svn_fs_fs__raw_cached_window_t *)data;
+ svn_fs_fs__txdelta_cached_window_t *result
+ = apr_pcalloc(result_pool, sizeof(*result));
+
+ /* create a read stream taking the raw window as input */
+ raw_window.data = svn_temp_deserializer__ptr(window,
+ (const void * const *)&window->window.data);
+ raw_window.len = window->window.len;
+ stream = svn_stream_from_string(&raw_window, result_pool);
+
+ /* parse it */
+ SVN_ERR(svn_txdelta_read_svndiff_window(&result->window, stream, 1,
+ result_pool));
+
+ /* complete the window and return it */
+ result->end_offset = window->end_offset;
+ *out = result;
+
+ return SVN_NO_ERROR;
+}
+
+
+/* Read the WINDOW_P number CHUNK_INDEX for the representation given in
+ * rep state RS from the current FSFS session's cache. This will be a
+ * no-op and IS_CACHED will be set to FALSE if no cache has been given.
+ * If a cache is available IS_CACHED will inform the caller about the
+ * success of the lookup. Allocations of the window in will be made
+ * from RESULT_POOL. Use SCRATCH_POOL for temporary allocations.
+ *
+ * If the information could be found, put RS to CHUNK_INDEX.
+ */
+static svn_error_t *
+get_cached_window(svn_txdelta_window_t **window_p,
+ rep_state_t *rs,
+ int chunk_index,
+ svn_boolean_t *is_cached,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
+{
+ if (! rs->window_cache)
+ {
+ /* txdelta window has not been enabled */
+ *is_cached = FALSE;
+ }
+ else
+ {
+ /* ask the cache for the desired txdelta window */
+ svn_fs_fs__txdelta_cached_window_t *cached_window;
+ window_cache_key_t key = { 0 };
+ get_window_key(&key, rs);
+ key.chunk_index = chunk_index;
+ SVN_ERR(svn_cache__get((void **) &cached_window,
+ is_cached,
+ rs->window_cache,
+ &key,
+ result_pool));
+
+ /* If we did not find a parsed txdelta window, we might have a raw
+ version of it in our cache. If so, read, parse and re-cache it. */
+ if (!*is_cached && rs->raw_window_cache)
+ {
+ SVN_ERR(svn_cache__get_partial((void **) &cached_window, is_cached,
+ rs->raw_window_cache, &key,
+ parse_raw_window, NULL, result_pool));
+ if (*is_cached)
+ SVN_ERR(svn_cache__set(rs->window_cache, &key, cached_window,
+ scratch_pool));
+ }
+
+ /* Return cached information. */
+ if (*is_cached)
+ {
+ /* found it. Pass it back to the caller. */
+ *window_p = cached_window->window;
+
+ /* manipulate the RS as if we just read the data */
+ rs->current = cached_window->end_offset;
+ rs->chunk_index = chunk_index;
+ }
+ }
+
+ return SVN_NO_ERROR;
+}
+
+/* Store the WINDOW read for the rep state RS in the current FSFS
+ * session's cache. This will be a no-op if no cache has been given.
+ * Temporary allocations will be made from SCRATCH_POOL. */
+static svn_error_t *
+set_cached_window(svn_txdelta_window_t *window,
+ rep_state_t *rs,
+ apr_pool_t *scratch_pool)
+{
+ if (rs->window_cache)
+ {
+ /* store the window and the first offset _past_ it */
+ svn_fs_fs__txdelta_cached_window_t cached_window;
+ window_cache_key_t key = {0};
+
+ cached_window.window = window;
+ cached_window.end_offset = rs->current;
+
+ /* but key it with the start offset because that is the known state
+ * when we will look it up */
+ SVN_ERR(svn_cache__set(rs->window_cache,
+ get_window_key(&key, rs),
+ &cached_window,
+ scratch_pool));
+ }
+
+ return SVN_NO_ERROR;
+}
+
+/* Read the WINDOW_P for the rep state RS from the current FSFS session's
+ * cache. This will be a no-op and IS_CACHED will be set to FALSE if no
+ * cache has been given. If a cache is available IS_CACHED will inform
+ * the caller about the success of the lookup. Allocations (of the window
+ * in particular) will be made from POOL.
+ */
+static svn_error_t *
+get_cached_combined_window(svn_stringbuf_t **window_p,
+ rep_state_t *rs,
+ svn_boolean_t *is_cached,
+ apr_pool_t *pool)
+{
+ if (! rs->combined_cache)
+ {
+ /* txdelta window has not been enabled */
+ *is_cached = FALSE;
+ }
+ else
+ {
+ /* ask the cache for the desired txdelta window */
+ window_cache_key_t key = { 0 };
+ return svn_cache__get((void **)window_p,
+ is_cached,
+ rs->combined_cache,
+ get_window_key(&key, rs),
+ pool);
+ }
+
+ return SVN_NO_ERROR;
+}
+
+/* Store the WINDOW read for the rep state RS in the current FSFS session's
+ * cache. This will be a no-op if no cache has been given.
+ * Temporary allocations will be made from SCRATCH_POOL. */
+static svn_error_t *
+set_cached_combined_window(svn_stringbuf_t *window,
+ rep_state_t *rs,
+ apr_pool_t *scratch_pool)
+{
+ if (rs->combined_cache)
+ {
+ /* but key it with the start offset because that is the known state
+ * when we will look it up */
+ window_cache_key_t key = { 0 };
+ return svn_cache__set(rs->combined_cache,
+ get_window_key(&key, rs),
+ window,
+ scratch_pool);
+ }
+
+ return SVN_NO_ERROR;
+}
+
+/* Build an array of rep_state structures in *LIST giving the delta
+ reps from first_rep to a plain-text or self-compressed rep. Set
+ *SRC_STATE to the plain-text rep we find at the end of the chain,
+ or to NULL if the final delta representation is self-compressed.
+ The representation to start from is designated by filesystem FS, id
+ ID, and representation REP.
+ Also, set *WINDOW_P to the base window content for *LIST, if it
+ could be found in cache. Otherwise, *LIST will contain the base
+ representation for the whole delta chain.
+ Finally, return the expanded size of the representation in
+ *EXPANDED_SIZE. It will take care of cases where only the on-disk
+ size is known. */
+static svn_error_t *
+build_rep_list(apr_array_header_t **list,
+ svn_stringbuf_t **window_p,
+ rep_state_t **src_state,
+ svn_filesize_t *expanded_size,
+ svn_fs_t *fs,
+ representation_t *first_rep,
+ apr_pool_t *pool)
+{
+ representation_t rep;
+ rep_state_t *rs = NULL;
+ svn_fs_fs__rep_header_t *rep_header;
+ svn_boolean_t is_cached = FALSE;
+ shared_file_t *shared_file = NULL;
+ apr_pool_t *iterpool = svn_pool_create(pool);
+
+ *list = apr_array_make(pool, 1, sizeof(rep_state_t *));
+ rep = *first_rep;
+
+ /* The value as stored in the data struct.
+ 0 is either for unknown length or actually zero length. */
+ *expanded_size = first_rep->expanded_size;
+
+ /* for the top-level rep, we need the rep_args */
+ SVN_ERR(create_rep_state(&rs, &rep_header, &shared_file, &rep, fs, pool,
+ iterpool));
+
+ /* Unknown size or empty representation?
+ That implies the this being the first iteration.
+ Usually size equals on-disk size, except for empty,
+ compressed representations (delta, size = 4).
+ Please note that for all non-empty deltas have
+ a 4-byte header _plus_ some data. */
+ if (*expanded_size == 0)
+ if (rep_header->type == svn_fs_fs__rep_plain || first_rep->size != 4)
+ *expanded_size = first_rep->size;
+
+ while (1)
+ {
+ svn_pool_clear(iterpool);
+
+ /* fetch state, if that has not been done already */
+ if (!rs)
+ SVN_ERR(create_rep_state(&rs, &rep_header, &shared_file,
+ &rep, fs, pool, iterpool));
+
+ /* for txn reps, there won't be a cached combined window */
+ if (!svn_fs_fs__id_txn_used(&rep.txn_id))
+ SVN_ERR(get_cached_combined_window(window_p, rs, &is_cached, pool));
+
+ if (is_cached)
+ {
+ /* We already have a reconstructed window in our cache.
+ Write a pseudo rep_state with the full length. */
+ rs->start = 0;
+ rs->current = 0;
+ rs->size = (*window_p)->len;
+ *src_state = rs;
+ break;
+ }
+
+ if (rep_header->type == svn_fs_fs__rep_plain)
+ {
+ /* This is a plaintext, so just return the current rep_state. */
+ *src_state = rs;
+ break;
+ }
+
+ /* Push this rep onto the list. If it's self-compressed, we're done. */
+ APR_ARRAY_PUSH(*list, rep_state_t *) = rs;
+ if (rep_header->type == svn_fs_fs__rep_self_delta)
+ {
+ *src_state = NULL;
+ break;
+ }
+
+ rep.revision = rep_header->base_revision;
+ rep.item_index = rep_header->base_item_index;
+ rep.size = rep_header->base_length;
+ svn_fs_fs__id_txn_reset(&rep.txn_id);
+
+ rs = NULL;
+ }
+ svn_pool_destroy(iterpool);
+
+ return SVN_NO_ERROR;
+}
+
+
+/* Create a rep_read_baton structure for node revision NODEREV in
+ filesystem FS and store it in *RB_P. Perform all allocations in
+ POOL. If rep is mutable, it must be for file contents. */
+static svn_error_t *
+rep_read_get_baton(struct rep_read_baton **rb_p,
+ svn_fs_t *fs,
+ representation_t *rep,
+ pair_cache_key_t fulltext_cache_key,
+ apr_pool_t *pool)
+{
+ struct rep_read_baton *b;
+
+ b = apr_pcalloc(pool, sizeof(*b));
+ b->fs = fs;
+ b->rep = *rep;
+ b->base_window = NULL;
+ b->chunk_index = 0;
+ b->buf = NULL;
+ b->md5_checksum_ctx = svn_checksum_ctx_create(svn_checksum_md5, pool);
+ b->checksum_finalized = FALSE;
+ memcpy(b->md5_digest, rep->md5_digest, sizeof(rep->md5_digest));
+ b->len = rep->expanded_size;
+ b->off = 0;
+ b->fulltext_cache_key = fulltext_cache_key;
+ b->pool = svn_pool_create(pool);
+ b->filehandle_pool = svn_pool_create(pool);
+ b->fulltext_cache = NULL;
+ b->fulltext_delivered = 0;
+ b->current_fulltext = NULL;
+
+ /* Save our output baton. */
+ *rb_p = b;
+
+ return SVN_NO_ERROR;
+}
+
+/* Skip forwards to THIS_CHUNK in REP_STATE and then read the next delta
+ window into *NWIN. Note that RS->CHUNK_INDEX will be THIS_CHUNK rather
+ than THIS_CHUNK + 1 when this function returns. */
+static svn_error_t *
+read_delta_window(svn_txdelta_window_t **nwin, int this_chunk,
+ rep_state_t *rs, apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
+{
+ svn_boolean_t is_cached;
+ apr_off_t start_offset;
+ apr_off_t end_offset;
+ apr_pool_t *iterpool;
+
+ SVN_ERR_ASSERT(rs->chunk_index <= this_chunk);
+
+ SVN_ERR(dbg_log_access(rs->sfile->fs, rs->revision, rs->item_index,
+ NULL, SVN_FS_FS__ITEM_TYPE_ANY_REP, scratch_pool));
+
+ /* Read the next window. But first, try to find it in the cache. */
+ SVN_ERR(get_cached_window(nwin, rs, this_chunk, &is_cached,
+ result_pool, scratch_pool));
+ if (is_cached)
+ return SVN_NO_ERROR;
+
+ /* someone has to actually read the data from file. Open it */
+ SVN_ERR(auto_open_shared_file(rs->sfile));
+
+ /* invoke the 'block-read' feature for non-txn data.
+ However, don't do that if we are in the middle of some representation,
+ because the block is unlikely to contain other data. */
+ if ( rs->chunk_index == 0
+ && SVN_IS_VALID_REVNUM(rs->revision)
+ && use_block_read(rs->sfile->fs)
+ && rs->raw_window_cache)
+ {
+ SVN_ERR(block_read(NULL, rs->sfile->fs, rs->revision, rs->item_index,
+ rs->sfile->rfile, result_pool, scratch_pool));
+
+ /* reading the whole block probably also provided us with the
+ desired txdelta window */
+ SVN_ERR(get_cached_window(nwin, rs, this_chunk, &is_cached,
+ result_pool, scratch_pool));
+ if (is_cached)
+ return SVN_NO_ERROR;
+ }
+
+ /* data is still not cached -> we need to read it.
+ Make sure we have all the necessary info. */
+ SVN_ERR(auto_set_start_offset(rs, scratch_pool));
+ SVN_ERR(auto_read_diff_version(rs, scratch_pool));
+
+ /* RS->FILE may be shared between RS instances -> make sure we point
+ * to the right data. */
+ start_offset = rs->start + rs->current;
+ SVN_ERR(rs_aligned_seek(rs, NULL, start_offset, scratch_pool));
+
+ /* Skip windows to reach the current chunk if we aren't there yet. */
+ iterpool = svn_pool_create(scratch_pool);
+ while (rs->chunk_index < this_chunk)
+ {
+ svn_pool_clear(iterpool);
+ SVN_ERR(svn_txdelta_skip_svndiff_window(rs->sfile->rfile->file,
+ rs->ver, iterpool));
+ rs->chunk_index++;
+ SVN_ERR(get_file_offset(&start_offset, rs, iterpool));
+ rs->current = start_offset - rs->start;
+ if (rs->current >= rs->size)
+ return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
+ _("Reading one svndiff window read "
+ "beyond the end of the "
+ "representation"));
+ }
+ svn_pool_destroy(iterpool);
+
+ /* Actually read the next window. */
+ SVN_ERR(svn_txdelta_read_svndiff_window(nwin, rs->sfile->rfile->stream,
+ rs->ver, result_pool));
+ SVN_ERR(get_file_offset(&end_offset, rs, scratch_pool));
+ rs->current = end_offset - rs->start;
+ if (rs->current > rs->size)
+ return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
+ _("Reading one svndiff window read beyond "
+ "the end of the representation"));
+
+ /* the window has not been cached before, thus cache it now
+ * (if caching is used for them at all) */
+ if (SVN_IS_VALID_REVNUM(rs->revision))
+ SVN_ERR(set_cached_window(*nwin, rs, scratch_pool));
+
+ return SVN_NO_ERROR;
+}
+
+/* Read SIZE bytes from the representation RS and return it in *NWIN. */
+static svn_error_t *
+read_plain_window(svn_stringbuf_t **nwin, rep_state_t *rs,
+ apr_size_t size, apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
+{
+ apr_off_t offset;
+
+ /* RS->FILE may be shared between RS instances -> make sure we point
+ * to the right data. */
+ SVN_ERR(auto_open_shared_file(rs->sfile));
+ SVN_ERR(auto_set_start_offset(rs, scratch_pool));
+
+ offset = rs->start + rs->current;
+ SVN_ERR(rs_aligned_seek(rs, NULL, offset, scratch_pool));
+
+ /* Read the plain data. */
+ *nwin = svn_stringbuf_create_ensure(size, result_pool);
+ SVN_ERR(svn_io_file_read_full2(rs->sfile->rfile->file, (*nwin)->data, size,
+ NULL, NULL, result_pool));
+ (*nwin)->data[size] = 0;
+
+ /* Update RS. */
+ rs->current += (apr_off_t)size;
+
+ return SVN_NO_ERROR;
+}
+
+/* Skip SIZE bytes from the PLAIN representation RS. */
+static svn_error_t *
+skip_plain_window(rep_state_t *rs,
+ apr_size_t size)
+{
+ /* Update RS. */
+ rs->current += (apr_off_t)size;
+
+ return SVN_NO_ERROR;
+}
+
+/* Get the undeltified window that is a result of combining all deltas
+ from the current desired representation identified in *RB with its
+ base representation. Store the window in *RESULT. */
+static svn_error_t *
+get_combined_window(svn_stringbuf_t **result,
+ struct rep_read_baton *rb)
+{
+ apr_pool_t *pool, *new_pool, *window_pool;
+ int i;
+ apr_array_header_t *windows;
+ svn_stringbuf_t *source, *buf = rb->base_window;
+ rep_state_t *rs;
+ apr_pool_t *iterpool;
+
+ /* Read all windows that we need to combine. This is fine because
+ the size of each window is relatively small (100kB) and skip-
+ delta limits the number of deltas in a chain to well under 100.
+ Stop early if one of them does not depend on its predecessors. */
+ window_pool = svn_pool_create(rb->pool);
+ windows = apr_array_make(window_pool, 0, sizeof(svn_txdelta_window_t *));
+ iterpool = svn_pool_create(rb->pool);
+ for (i = 0; i < rb->rs_list->nelts; ++i)
+ {
+ svn_txdelta_window_t *window;
+
+ svn_pool_clear(iterpool);
+
+ rs = APR_ARRAY_IDX(rb->rs_list, i, rep_state_t *);
+ SVN_ERR(read_delta_window(&window, rb->chunk_index, rs, window_pool,
+ iterpool));
+
+ APR_ARRAY_PUSH(windows, svn_txdelta_window_t *) = window;
+ if (window->src_ops == 0)
+ {
+ ++i;
+ break;
+ }
+ }
+
+ /* Combine in the windows from the other delta reps. */
+ pool = svn_pool_create(rb->pool);
+ for (--i; i >= 0; --i)
+ {
+ svn_txdelta_window_t *window;
+
+ svn_pool_clear(iterpool);
+
+ rs = APR_ARRAY_IDX(rb->rs_list, i, rep_state_t *);
+ window = APR_ARRAY_IDX(windows, i, svn_txdelta_window_t *);
+
+ /* Maybe, we've got a PLAIN start representation. If we do, read
+ as much data from it as the needed for the txdelta window's source
+ view.
+ Note that BUF / SOURCE may only be NULL in the first iteration.
+ Also note that we may have short-cut reading the delta chain --
+ in which case SRC_OPS is 0 and it might not be a PLAIN rep. */
+ source = buf;
+ if (source == NULL && rb->src_state != NULL)
+ {
+ /* Even if we don't need the source rep now, we still must keep
+ * its read offset in sync with what we might need for the next
+ * window. */
+ if (window->src_ops)
+ SVN_ERR(read_plain_window(&source, rb->src_state,
+ window->sview_len,
+ pool, iterpool));
+ else
+ SVN_ERR(skip_plain_window(rb->src_state, window->sview_len));
+ }
+
+ /* Combine this window with the current one. */
+ new_pool = svn_pool_create(rb->pool);
+ buf = svn_stringbuf_create_ensure(window->tview_len, new_pool);
+ buf->len = window->tview_len;
+
+ svn_txdelta_apply_instructions(window, source ? source->data : NULL,
+ buf->data, &buf->len);
+ if (buf->len != window->tview_len)
+ return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
+ _("svndiff window length is "
+ "corrupt"));
+
+ /* Cache windows only if the whole rep content could be read as a
+ single chunk. Only then will no other chunk need a deeper RS
+ list than the cached chunk. */
+ if ( (rb->chunk_index == 0) && (rs->current == rs->size)
+ && SVN_IS_VALID_REVNUM(rs->revision))
+ SVN_ERR(set_cached_combined_window(buf, rs, new_pool));
+
+ rs->chunk_index++;
+
+ /* Cycle pools so that we only need to hold three windows at a time. */
+ svn_pool_destroy(pool);
+ pool = new_pool;
+ }
+ svn_pool_destroy(iterpool);
+
+ svn_pool_destroy(window_pool);
+
+ *result = buf;
+ return SVN_NO_ERROR;
+}
+
+/* Returns whether or not the expanded fulltext of the file is cachable
+ * based on its size SIZE. The decision depends on the cache used by RB.
+ */
+static svn_boolean_t
+fulltext_size_is_cachable(fs_fs_data_t *ffd, svn_filesize_t size)
+{
+ return (size < APR_SIZE_MAX)
+ && svn_cache__is_cachable(ffd->fulltext_cache, (apr_size_t)size);
+}
+
+/* Close method used on streams returned by read_representation().
+ */
+static svn_error_t *
+rep_read_contents_close(void *baton)
+{
+ struct rep_read_baton *rb = baton;
+
+ svn_pool_destroy(rb->pool);
+ svn_pool_destroy(rb->filehandle_pool);
+
+ return SVN_NO_ERROR;
+}
+
+/* Return the next *LEN bytes of the rep from our plain / delta windows
+ and store them in *BUF. */
+static svn_error_t *
+get_contents_from_windows(struct rep_read_baton *rb,
+ char *buf,
+ apr_size_t *len)
+{
+ apr_size_t copy_len, remaining = *len;
+ char *cur = buf;
+ rep_state_t *rs;
+
+ /* Special case for when there are no delta reps, only a plain
+ text. */
+ if (rb->rs_list->nelts == 0)
+ {
+ copy_len = remaining;
+ rs = rb->src_state;
+
+ if (rb->base_window != NULL)
+ {
+ /* We got the desired rep directly from the cache.
+ This is where we need the pseudo rep_state created
+ by build_rep_list(). */
+ apr_size_t offset = (apr_size_t)rs->current;
+ if (copy_len + offset > rb->base_window->len)
+ copy_len = offset < rb->base_window->len
+ ? rb->base_window->len - offset
+ : 0ul;
+
+ memcpy (cur, rb->base_window->data + offset, copy_len);
+ }
+ else
+ {
+ apr_off_t offset;
+ if (((apr_off_t) copy_len) > rs->size - rs->current)
+ copy_len = (apr_size_t) (rs->size - rs->current);
+
+ SVN_ERR(auto_open_shared_file(rs->sfile));
+ SVN_ERR(auto_set_start_offset(rs, rb->pool));
+
+ offset = rs->start + rs->current;
+ SVN_ERR(rs_aligned_seek(rs, NULL, offset, rb->pool));
+ SVN_ERR(svn_io_file_read_full2(rs->sfile->rfile->file, cur,
+ copy_len, NULL, NULL, rb->pool));
+ }
+
+ rs->current += copy_len;
+ *len = copy_len;
+ return SVN_NO_ERROR;
+ }
+
+ while (remaining > 0)
+ {
+ /* If we have buffered data from a previous chunk, use that. */
+ if (rb->buf)
+ {
+ /* Determine how much to copy from the buffer. */
+ copy_len = rb->buf_len - rb->buf_pos;
+ if (copy_len > remaining)
+ copy_len = remaining;
+
+ /* Actually copy the data. */
+ memcpy(cur, rb->buf + rb->buf_pos, copy_len);
+ rb->buf_pos += copy_len;
+ cur += copy_len;
+ remaining -= copy_len;
+
+ /* If the buffer is all used up, clear it and empty the
+ local pool. */
+ if (rb->buf_pos == rb->buf_len)
+ {
+ svn_pool_clear(rb->pool);
+ rb->buf = NULL;
+ }
+ }
+ else
+ {
+ svn_stringbuf_t *sbuf = NULL;
+
+ rs = APR_ARRAY_IDX(rb->rs_list, 0, rep_state_t *);
+ if (rs->current == rs->size)
+ break;
+
+ /* Get more buffered data by evaluating a chunk. */
+ SVN_ERR(get_combined_window(&sbuf, rb));
+
+ rb->chunk_index++;
+ rb->buf_len = sbuf->len;
+ rb->buf = sbuf->data;
+ rb->buf_pos = 0;
+ }
+ }
+
+ *len = cur - buf;
+
+ return SVN_NO_ERROR;
+}
+
+/* Baton type for get_fulltext_partial. */
+typedef struct fulltext_baton_t
+{
+ /* Target buffer to write to; of at least LEN bytes. */
+ char *buffer;
+
+ /* Offset within the respective fulltext at which we shall start to
+ copy data into BUFFER. */
+ apr_size_t start;
+
+ /* Number of bytes to copy. The actual amount may be less in case
+ the fulltext is short(er). */
+ apr_size_t len;
+
+ /* Number of bytes actually copied into BUFFER. */
+ apr_size_t read;
+} fulltext_baton_t;
+
+/* Implement svn_cache__partial_getter_func_t for fulltext caches.
+ * From the fulltext in DATA, we copy the range specified by the
+ * fulltext_baton_t* BATON into the buffer provided by that baton.
+ * OUT and RESULT_POOL are not used.
+ */
+static svn_error_t *
+get_fulltext_partial(void **out,
+ const void *data,
+ apr_size_t data_len,
+ void *baton,
+ apr_pool_t *result_pool)
+{
+ fulltext_baton_t *fulltext_baton = baton;
+
+ /* We cached the fulltext with an NUL appended to it. */
+ apr_size_t fulltext_len = data_len - 1;
+
+ /* Clip the copy range to what the fulltext size allows. */
+ apr_size_t start = MIN(fulltext_baton->start, fulltext_len);
+ fulltext_baton->read = MIN(fulltext_len - start, fulltext_baton->len);
+
+ /* Copy the data to the output buffer and be done. */
+ memcpy(fulltext_baton->buffer, (const char *)data + start,
+ fulltext_baton->read);
+
+ return SVN_NO_ERROR;
+}
+
+/* Find the fulltext specified in BATON in the fulltext cache given
+ * as well by BATON. If that succeeds, set *CACHED to TRUE and copy
+ * up to the next *LEN bytes into BUFFER. Set *LEN to the actual
+ * number of bytes copied.
+ */
+static svn_error_t *
+get_contents_from_fulltext(svn_boolean_t *cached,
+ struct rep_read_baton *baton,
+ char *buffer,
+ apr_size_t *len)
+{
+ void *dummy;
+ fulltext_baton_t fulltext_baton;
+
+ SVN_ERR_ASSERT((apr_size_t)baton->fulltext_delivered
+ == baton->fulltext_delivered);
+ fulltext_baton.buffer = buffer;
+ fulltext_baton.start = (apr_size_t)baton->fulltext_delivered;
+ fulltext_baton.len = *len;
+ fulltext_baton.read = 0;
+
+ SVN_ERR(svn_cache__get_partial(&dummy, cached, baton->fulltext_cache,
+ &baton->fulltext_cache_key,
+ get_fulltext_partial, &fulltext_baton,
+ baton->pool));
+
+ if (*cached)
+ {
+ baton->fulltext_delivered += fulltext_baton.read;
+ *len = fulltext_baton.read;
+ }
+
+ return SVN_NO_ERROR;
+}
+
+/* Determine the optimal size of a string buf that shall receive a
+ * (full-) text of NEEDED bytes.
+ *
+ * The critical point is that those buffers may be very large and
+ * can cause memory fragmentation. We apply simple heuristics to
+ * make fragmentation less likely.
+ */
+static apr_size_t
+optimimal_allocation_size(apr_size_t needed)
+{
+ /* For all allocations, assume some overhead that is shared between
+ * OS memory managemnt, APR memory management and svn_stringbuf_t. */
+ const apr_size_t overhead = 0x400;
+ apr_size_t optimal;
+
+ /* If an allocation size if safe for other ephemeral buffers, it should
+ * be safe for ours. */
+ if (needed <= SVN__STREAM_CHUNK_SIZE)
+ return needed;
+
+ /* Paranoia edge case:
+ * Skip our heuristics if they created arithmetical overflow.
+ * Beware to make this test work for NEEDED = APR_SIZE_MAX as well! */
+ if (needed >= APR_SIZE_MAX / 2 - overhead)
+ return needed;
+
+ /* As per definition SVN__STREAM_CHUNK_SIZE is a power of two.
+ * Since we know NEEDED to be larger than that, use it as the
+ * starting point.
+ *
+ * Heuristics: Allocate a power-of-two number of bytes that fit
+ * NEEDED plus some OVERHEAD. The APR allocator
+ * will round it up to the next full page size.
+ */
+ optimal = SVN__STREAM_CHUNK_SIZE;
+ while (optimal - overhead < needed)
+ optimal *= 2;
+
+ /* This is above or equal to NEEDED. */
+ return optimal - overhead;
+}
+
+/* After a fulltext cache lookup failure, we will continue to read from
+ * combined delta or plain windows. However, we must first make that data
+ * stream in BATON catch up tho the position LEN already delivered from the
+ * fulltext cache. Also, we need to store the reconstructed fulltext if we
+ * want to cache it at the end.
+ */
+static svn_error_t *
+skip_contents(struct rep_read_baton *baton,
+ svn_filesize_t len)
+{
+ svn_error_t *err = SVN_NO_ERROR;
+
+ /* Do we want to cache the reconstructed fulltext? */
+ if (SVN_IS_VALID_REVNUM(baton->fulltext_cache_key.revision))
+ {
+ char *buffer;
+ svn_filesize_t to_alloc = MAX(len, baton->len);
+
+ /* This should only be happening if BATON->LEN and LEN are
+ * cacheable, implying they fit into memory. */
+ SVN_ERR_ASSERT((apr_size_t)to_alloc == to_alloc);
+
+ /* Allocate the fulltext buffer. */
+ baton->current_fulltext = svn_stringbuf_create_ensure(
+ optimimal_allocation_size((apr_size_t)to_alloc),
+ baton->filehandle_pool);
+
+ /* Read LEN bytes from the window stream and store the data
+ * in the fulltext buffer (will be filled by further reads later). */
+ baton->current_fulltext->len = (apr_size_t)len;
+ baton->current_fulltext->data[(apr_size_t)len] = 0;
+
+ buffer = baton->current_fulltext->data;
+ while (len > 0 && !err)
+ {
+ apr_size_t to_read = (apr_size_t)len;
+ err = get_contents_from_windows(baton, buffer, &to_read);
+ len -= to_read;
+ buffer += to_read;
+ }
+ }
+ else if (len > 0)
+ {
+ /* Simply drain LEN bytes from the window stream. */
+ apr_pool_t *subpool = subpool = svn_pool_create(baton->pool);
+ char *buffer = apr_palloc(subpool, SVN__STREAM_CHUNK_SIZE);
+
+ while (len > 0 && !err)
+ {
+ apr_size_t to_read = len > SVN__STREAM_CHUNK_SIZE
+ ? SVN__STREAM_CHUNK_SIZE
+ : (apr_size_t)len;
+
+ err = get_contents_from_windows(baton, buffer, &to_read);
+ len -= to_read;
+ }
+
+ svn_pool_destroy(subpool);
+ }
+
+ return svn_error_trace(err);
+}
+
+/* BATON is of type `rep_read_baton'; read the next *LEN bytes of the
+ representation and store them in *BUF. Sum as we read and verify
+ the MD5 sum at the end. */
+static svn_error_t *
+rep_read_contents(void *baton,
+ char *buf,
+ apr_size_t *len)
+{
+ struct rep_read_baton *rb = baton;
+
+ /* Get data from the fulltext cache for as long as we can. */
+ if (rb->fulltext_cache)
+ {
+ svn_boolean_t cached;
+ SVN_ERR(get_contents_from_fulltext(&cached, rb, buf, len));
+ if (cached)
+ return SVN_NO_ERROR;
+
+ /* Cache miss. From now on, we will never read from the fulltext
+ * cache for this representation anymore. */
+ rb->fulltext_cache = NULL;
+ }
+
+ /* No fulltext cache to help us. We must read from the window stream. */
+ if (!rb->rs_list)
+ {
+ /* Window stream not initialized, yet. Do it now. */
+ SVN_ERR(build_rep_list(&rb->rs_list, &rb->base_window,
+ &rb->src_state, &rb->len, rb->fs, &rb->rep,
+ rb->filehandle_pool));
+
+ /* In case we did read from the fulltext cache before, make the
+ * window stream catch up. Also, initialize the fulltext buffer
+ * if we want to cache the fulltext at the end. */
+ SVN_ERR(skip_contents(rb, rb->fulltext_delivered));
+ }
+
+ /* Get the next block of data.
+ * Keep in mind that the representation might be empty and leave us
+ * already positioned at the end of the rep. */
+ if (rb->off == rb->len)
+ *len = 0;
+ else
+ SVN_ERR(get_contents_from_windows(rb, buf, len));
+
+ if (rb->current_fulltext)
+ svn_stringbuf_appendbytes(rb->current_fulltext, buf, *len);
+
+ /* Perform checksumming. We want to check the checksum as soon as
+ the last byte of data is read, in case the caller never performs
+ a short read, but we don't want to finalize the MD5 context
+ twice. */
+ if (!rb->checksum_finalized)
+ {
+ SVN_ERR(svn_checksum_update(rb->md5_checksum_ctx, buf, *len));
+ rb->off += *len;
+ if (rb->off == rb->len)
+ {
+ svn_checksum_t *md5_checksum;
+ svn_checksum_t expected;
+ expected.kind = svn_checksum_md5;
+ expected.digest = rb->md5_digest;
+
+ rb->checksum_finalized = TRUE;
+ SVN_ERR(svn_checksum_final(&md5_checksum, rb->md5_checksum_ctx,
+ rb->pool));
+ if (!svn_checksum_match(md5_checksum, &expected))
+ return svn_error_create(SVN_ERR_FS_CORRUPT,
+ svn_checksum_mismatch_err(&expected, md5_checksum,
+ rb->pool,
+ _("Checksum mismatch while reading representation")),
+ NULL);
+ }
+ }
+
+ if (rb->off == rb->len && rb->current_fulltext)
+ {
+ fs_fs_data_t *ffd = rb->fs->fsap_data;
+ SVN_ERR(svn_cache__set(ffd->fulltext_cache, &rb->fulltext_cache_key,
+ rb->current_fulltext, rb->pool));
+ rb->current_fulltext = NULL;
+ }
+
+ return SVN_NO_ERROR;
+}
+
+svn_error_t *
+svn_fs_fs__get_contents(svn_stream_t **contents_p,
+ svn_fs_t *fs,
+ representation_t *rep,
+ svn_boolean_t cache_fulltext,
+ apr_pool_t *pool)
+{
+ if (! rep)
+ {
+ *contents_p = svn_stream_empty(pool);
+ }
+ else
+ {
+ fs_fs_data_t *ffd = fs->fsap_data;
+ svn_filesize_t len = rep->expanded_size ? rep->expanded_size : rep->size;
+ struct rep_read_baton *rb;
+
+ pair_cache_key_t fulltext_cache_key = { 0 };
+ fulltext_cache_key.revision = rep->revision;
+ fulltext_cache_key.second = rep->item_index;
+
+ /* Initialize the reader baton. Some members may added lazily
+ * while reading from the stream */
+ SVN_ERR(rep_read_get_baton(&rb, fs, rep, fulltext_cache_key, pool));
+
+ /* Make the stream attempt fulltext cache lookups if the fulltext
+ * is cacheable. If it is not, then also don't try to buffer and
+ * cache it. */
+ if (ffd->fulltext_cache && cache_fulltext
+ && SVN_IS_VALID_REVNUM(rep->revision)
+ && fulltext_size_is_cachable(ffd, len))
+ {
+ rb->fulltext_cache = ffd->fulltext_cache;
+ }
+ else
+ {
+ /* This will also prevent the reconstructed fulltext from being
+ put into the cache. */
+ rb->fulltext_cache_key.revision = SVN_INVALID_REVNUM;
+ }
+
+ *contents_p = svn_stream_create(rb, pool);
+ svn_stream_set_read2(*contents_p, NULL /* only full read support */,
+ rep_read_contents);
+ svn_stream_set_close(*contents_p, rep_read_contents_close);
+ }
+
+ return SVN_NO_ERROR;
+}
+
+svn_error_t *
+svn_fs_fs__get_contents_from_file(svn_stream_t **contents_p,
+ svn_fs_t *fs,
+ representation_t *rep,
+ apr_file_t *file,
+ apr_off_t offset,
+ apr_pool_t *pool)
+{
+ struct rep_read_baton *rb;
+ pair_cache_key_t fulltext_cache_key = { SVN_INVALID_REVNUM, 0 };
+ rep_state_t *rs = apr_pcalloc(pool, sizeof(*rs));
+ svn_fs_fs__rep_header_t *rh;
+
+ /* Initialize the reader baton. Some members may added lazily
+ * while reading from the stream. */
+ SVN_ERR(rep_read_get_baton(&rb, fs, rep, fulltext_cache_key, pool));
+
+ /* Continue constructing RS. Leave caches as NULL. */
+ rs->size = rep->size;
+ rs->revision = SVN_INVALID_REVNUM;
+ rs->item_index = 0;
+ rs->ver = -1;
+ rs->start = -1;
+
+ /* Provide just enough file access info to allow for a basic read from
+ * FILE but leave all index / footer info with empty values b/c FILE
+ * probably is not a complete revision file. */
+ rs->sfile = apr_pcalloc(pool, sizeof(*rs->sfile));
+ rs->sfile->revision = rep->revision;
+ rs->sfile->pool = pool;
+ rs->sfile->fs = fs;
+ rs->sfile->rfile = apr_pcalloc(pool, sizeof(*rs->sfile->rfile));
+ rs->sfile->rfile->start_revision = SVN_INVALID_REVNUM;
+ rs->sfile->rfile->file = file;
+ rs->sfile->rfile->stream = svn_stream_from_aprfile2(file, TRUE, pool);
+
+ /* Read the rep header. */
+ SVN_ERR(aligned_seek(fs, file, NULL, offset, pool));
+ SVN_ERR(svn_fs_fs__read_rep_header(&rh, rs->sfile->rfile->stream,
+ pool, pool));
+ SVN_ERR(get_file_offset(&rs->start, rs, pool));
+ rs->header_size = rh->header_size;
+
+ /* Log the access. */
+ SVN_ERR(dbg_log_access(fs, SVN_INVALID_REVNUM, 0, rh,
+ SVN_FS_FS__ITEM_TYPE_ANY_REP, pool));
+
+ /* Build the representation list (delta chain). */
+ if (rh->type == svn_fs_fs__rep_plain)
+ {
+ rb->rs_list = apr_array_make(pool, 0, sizeof(rep_state_t *));
+ rb->src_state = rs;
+ }
+ else if (rh->type == svn_fs_fs__rep_self_delta)
+ {
+ rb->rs_list = apr_array_make(pool, 1, sizeof(rep_state_t *));
+ APR_ARRAY_PUSH(rb->rs_list, rep_state_t *) = rs;
+ rb->src_state = NULL;
+ }
+ else
+ {
+ representation_t next_rep = { 0 };
+
+ /* skip "SVNx" diff marker */
+ rs->current = 4;
+
+ /* REP's base rep is inside a proper revision.
+ * It can be reconstructed in the usual way. */
+ next_rep.revision = rh->base_revision;
+ next_rep.item_index = rh->base_item_index;
+ next_rep.size = rh->base_length;
+ svn_fs_fs__id_txn_reset(&next_rep.txn_id);
+
+ SVN_ERR(build_rep_list(&rb->rs_list, &rb->base_window,
+ &rb->src_state, &rb->len, rb->fs, &next_rep,
+ rb->filehandle_pool));
+
+ /* Insert the access to REP as the first element of the delta chain. */
+ svn_sort__array_insert(rb->rs_list, &rs, 0);
+ }
+
+ /* Now, the baton is complete and we can assemble the stream around it. */
+ *contents_p = svn_stream_create(rb, pool);
+ svn_stream_set_read2(*contents_p, NULL /* only full read support */,
+ rep_read_contents);
+ svn_stream_set_close(*contents_p, rep_read_contents_close);
+
+ return SVN_NO_ERROR;
+}
+
+/* Baton for cache_access_wrapper. Wraps the original parameters of
+ * svn_fs_fs__try_process_file_content().
+ */
+typedef struct cache_access_wrapper_baton_t
+{
+ svn_fs_process_contents_func_t func;
+ void* baton;
+} cache_access_wrapper_baton_t;
+
+/* Wrapper to translate between svn_fs_process_contents_func_t and
+ * svn_cache__partial_getter_func_t.
+ */
+static svn_error_t *
+cache_access_wrapper(void **out,
+ const void *data,
+ apr_size_t data_len,
+ void *baton,
+ apr_pool_t *pool)
+{
+ cache_access_wrapper_baton_t *wrapper_baton = baton;
+
+ SVN_ERR(wrapper_baton->func((const unsigned char *)data,
+ data_len - 1, /* cache adds terminating 0 */
+ wrapper_baton->baton,
+ pool));
+
+ /* non-NULL value to signal the calling cache that all went well */
+ *out = baton;
+
+ return SVN_NO_ERROR;
+}
+
+svn_error_t *
+svn_fs_fs__try_process_file_contents(svn_boolean_t *success,
+ svn_fs_t *fs,
+ node_revision_t *noderev,
+ svn_fs_process_contents_func_t processor,
+ void* baton,
+ apr_pool_t *pool)
+{
+ representation_t *rep = noderev->data_rep;
+ if (rep)
+ {
+ fs_fs_data_t *ffd = fs->fsap_data;
+ pair_cache_key_t fulltext_cache_key = { 0 };
+
+ fulltext_cache_key.revision = rep->revision;
+ fulltext_cache_key.second = rep->item_index;
+ if (ffd->fulltext_cache && SVN_IS_VALID_REVNUM(rep->revision)
+ && fulltext_size_is_cachable(ffd, rep->expanded_size))
+ {
+ cache_access_wrapper_baton_t wrapper_baton;
+ void *dummy = NULL;
+
+ wrapper_baton.func = processor;
+ wrapper_baton.baton = baton;
+ return svn_cache__get_partial(&dummy, success,
+ ffd->fulltext_cache,
+ &fulltext_cache_key,
+ cache_access_wrapper,
+ &wrapper_baton,
+ pool);
+ }
+ }
+
+ *success = FALSE;
+ return SVN_NO_ERROR;
+}
+
+
+/* Baton used when reading delta windows. */
+struct delta_read_baton
+{
+ rep_state_t *rs;
+ unsigned char md5_digest[APR_MD5_DIGESTSIZE];
+};
+
+/* This implements the svn_txdelta_next_window_fn_t interface. */
+static svn_error_t *
+delta_read_next_window(svn_txdelta_window_t **window, void *baton,
+ apr_pool_t *pool)
+{
+ struct delta_read_baton *drb = baton;
+ apr_pool_t *scratch_pool = svn_pool_create(pool);
+
+ *window = NULL;
+ if (drb->rs->current < drb->rs->size)
+ {
+ SVN_ERR(read_delta_window(window, drb->rs->chunk_index, drb->rs, pool,
+ scratch_pool));
+ drb->rs->chunk_index++;
+ }
+
+ svn_pool_destroy(scratch_pool);
+
+ return SVN_NO_ERROR;
+}
+
+/* This implements the svn_txdelta_md5_digest_fn_t interface. */
+static const unsigned char *
+delta_read_md5_digest(void *baton)
+{
+ struct delta_read_baton *drb = baton;
+ return drb->md5_digest;
+}
+
+/* Return a txdelta stream for on-disk representation REP_STATE
+ * of TARGET. Allocate the result in POOL.
+ */
+static svn_txdelta_stream_t *
+get_storaged_delta_stream(rep_state_t *rep_state,
+ node_revision_t *target,
+ apr_pool_t *pool)
+{
+ /* Create the delta read baton. */
+ struct delta_read_baton *drb = apr_pcalloc(pool, sizeof(*drb));
+ drb->rs = rep_state;
+ memcpy(drb->md5_digest, target->data_rep->md5_digest,
+ sizeof(drb->md5_digest));
+ return svn_txdelta_stream_create(drb, delta_read_next_window,
+ delta_read_md5_digest, pool);
+}
+
+svn_error_t *
+svn_fs_fs__get_file_delta_stream(svn_txdelta_stream_t **stream_p,
+ svn_fs_t *fs,
+ node_revision_t *source,
+ node_revision_t *target,
+ apr_pool_t *pool)
+{
+ svn_stream_t *source_stream, *target_stream;
+ rep_state_t *rep_state;
+ svn_fs_fs__rep_header_t *rep_header;
+ fs_fs_data_t *ffd = fs->fsap_data;
+
+ /* Try a shortcut: if the target is stored as a delta against the source,
+ then just use that delta. However, prefer using the fulltext cache
+ whenever that is available. */
+ if (target->data_rep && (source || ! ffd->fulltext_cache))
+ {
+ /* Read target's base rep if any. */
+ SVN_ERR(create_rep_state(&rep_state, &rep_header, NULL,
+ target->data_rep, fs, pool, pool));
+
+ if (source && source->data_rep && target->data_rep)
+ {
+ /* If that matches source, then use this delta as is.
+ Note that we want an actual delta here. E.g. a self-delta would
+ not be good enough. */
+ if (rep_header->type == svn_fs_fs__rep_delta
+ && rep_header->base_revision == source->data_rep->revision
+ && rep_header->base_item_index == source->data_rep->item_index)
+ {
+ *stream_p = get_storaged_delta_stream(rep_state, target, pool);
+ return SVN_NO_ERROR;
+ }
+ }
+ else if (!source)
+ {
+ /* We want a self-delta. There is a fair chance that TARGET got
+ added in this revision and is already stored in the requested
+ format. */
+ if (rep_header->type == svn_fs_fs__rep_self_delta)
+ {
+ *stream_p = get_storaged_delta_stream(rep_state, target, pool);
+ return SVN_NO_ERROR;
+ }
+ }
+
+ /* Don't keep file handles open for longer than necessary. */
+ if (rep_state->sfile->rfile)
+ {
+ SVN_ERR(svn_fs_fs__close_revision_file(rep_state->sfile->rfile));
+ rep_state->sfile->rfile = NULL;
+ }
+ }
+
+ /* Read both fulltexts and construct a delta. */
+ if (source)
+ SVN_ERR(svn_fs_fs__get_contents(&source_stream, fs, source->data_rep,
+ TRUE, pool));
+ else
+ source_stream = svn_stream_empty(pool);
+ SVN_ERR(svn_fs_fs__get_contents(&target_stream, fs, target->data_rep,
+ TRUE, pool));
+
+ /* Because source and target stream will already verify their content,
+ * there is no need to do this once more. In particular if the stream
+ * content is being fetched from cache. */
+ svn_txdelta2(stream_p, source_stream, target_stream, FALSE, pool);
+
+ return SVN_NO_ERROR;
+}
+
+/* Return TRUE when all svn_fs_dirent_t* in ENTRIES are already sorted
+ by their respective name. */
+static svn_boolean_t
+sorted(apr_array_header_t *entries)
+{
+ int i;
+
+ const svn_fs_dirent_t * const *dirents = (const void *)entries->elts;
+ for (i = 0; i < entries->nelts-1; ++i)
+ if (strcmp(dirents[i]->name, dirents[i+1]->name) > 0)
+ return FALSE;
+
+ return TRUE;
+}
+
+/* Compare the names of the two dirents given in **A and **B. */
+static int
+compare_dirents(const void *a, const void *b)
+{
+ const svn_fs_dirent_t *lhs = *((const svn_fs_dirent_t * const *) a);
+ const svn_fs_dirent_t *rhs = *((const svn_fs_dirent_t * const *) b);
+
+ return strcmp(lhs->name, rhs->name);
+}
+
+/* Compare the name of the dirents given in **A with the C string in *B. */
+static int
+compare_dirent_name(const void *a, const void *b)
+{
+ const svn_fs_dirent_t *lhs = *((const svn_fs_dirent_t * const *) a);
+ const char *rhs = b;
+
+ return strcmp(lhs->name, rhs);
+}
+
+/* Into ENTRIES, read all directories entries from the key-value text in
+ * STREAM. If INCREMENTAL is TRUE, read until the end of the STREAM and
+ * update the data. ID is provided for nicer error messages.
+ */
+static svn_error_t *
+read_dir_entries(apr_array_header_t *entries,
+ svn_stream_t *stream,
+ svn_boolean_t incremental,
+ const svn_fs_id_t *id,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
+{
+ apr_pool_t *iterpool = svn_pool_create(scratch_pool);
+ apr_hash_t *hash = incremental ? svn_hash__make(scratch_pool) : NULL;
+ const char *terminator = SVN_HASH_TERMINATOR;
+
+ /* Read until the terminator (non-incremental) or the end of STREAM
+ (incremental mode). In the latter mode, we use a temporary HASH
+ to make updating and removing entries cheaper. */
+ while (1)
+ {
+ svn_hash__entry_t entry;
+ svn_fs_dirent_t *dirent;
+ char *str;
+
+ svn_pool_clear(iterpool);
+ SVN_ERR(svn_hash__read_entry(&entry, stream, terminator,
+ incremental, iterpool));
+
+ /* End of directory? */
+ if (entry.key == NULL)
+ {
+ /* In incremental mode, we skip the terminator and read the
+ increments following it until the end of the stream. */
+ if (incremental && terminator)
+ terminator = NULL;
+ else
+ break;
+ }
+
+ /* Deleted entry? */
+ if (entry.val == NULL)
+ {
+ /* We must be in incremental mode */
+ assert(hash);
+ apr_hash_set(hash, entry.key, entry.keylen, NULL);
+ continue;
+ }
+
+ /* Add a new directory entry. */
+ dirent = apr_pcalloc(result_pool, sizeof(*dirent));
+ dirent->name = apr_pstrmemdup(result_pool, entry.key, entry.keylen);
+
+ str = svn_cstring_tokenize(" ", &entry.val);
+ if (str == NULL)
+ return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
+ _("Directory entry corrupt in '%s'"),
+ svn_fs_fs__id_unparse(id, scratch_pool)->data);
+
+ if (strcmp(str, SVN_FS_FS__KIND_FILE) == 0)
+ {
+ dirent->kind = svn_node_file;
+ }
+ else if (strcmp(str, SVN_FS_FS__KIND_DIR) == 0)
+ {
+ dirent->kind = svn_node_dir;
+ }
+ else
+ {
+ return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
+ _("Directory entry corrupt in '%s'"),
+ svn_fs_fs__id_unparse(id, scratch_pool)->data);
+ }
+
+ str = svn_cstring_tokenize(" ", &entry.val);
+ if (str == NULL)
+ return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
+ _("Directory entry corrupt in '%s'"),
+ svn_fs_fs__id_unparse(id, scratch_pool)->data);
+
+ SVN_ERR(svn_fs_fs__id_parse(&dirent->id, str, result_pool));
+
+ /* In incremental mode, update the hash; otherwise, write to the
+ * final array. Be sure to use hash keys that survive this iteration.
+ */
+ if (incremental)
+ apr_hash_set(hash, dirent->name, entry.keylen, dirent);
+ else
+ APR_ARRAY_PUSH(entries, svn_fs_dirent_t *) = dirent;
+ }
+
+ /* Convert container to a sorted array. */
+ if (incremental)
+ {
+ apr_hash_index_t *hi;
+ for (hi = apr_hash_first(iterpool, hash); hi; hi = apr_hash_next(hi))
+ APR_ARRAY_PUSH(entries, svn_fs_dirent_t *) = apr_hash_this_val(hi);
+ }
+
+ if (!sorted(entries))
+ svn_sort__array(entries, compare_dirents);
+
+ svn_pool_destroy(iterpool);
+
+ return SVN_NO_ERROR;
+}
+
+/* Fetch the contents of a directory into ENTRIES. Values are stored
+ as filename to string mappings; further conversion is necessary to
+ convert them into svn_fs_dirent_t values. */
+static svn_error_t *
+get_dir_contents(apr_array_header_t **entries,
+ svn_fs_t *fs,
+ node_revision_t *noderev,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
+{
+ svn_stream_t *contents;
+
+ *entries = apr_array_make(result_pool, 16, sizeof(svn_fs_dirent_t *));
+ if (noderev->data_rep && svn_fs_fs__id_txn_used(&noderev->data_rep->txn_id))
+ {
+ const char *filename
+ = svn_fs_fs__path_txn_node_children(fs, noderev->id, scratch_pool);
+
+ /* The representation is mutable. Read the old directory
+ contents from the mutable children file, followed by the
+ changes we've made in this transaction. */
+ SVN_ERR(svn_stream_open_readonly(&contents, filename, scratch_pool,
+ scratch_pool));
+ SVN_ERR(read_dir_entries(*entries, contents, TRUE, noderev->id,
+ result_pool, scratch_pool));
+ SVN_ERR(svn_stream_close(contents));
+ }
+ else if (noderev->data_rep)
+ {
+ /* Undeltify content before parsing it. Otherwise, we could only
+ * parse it byte-by-byte.
+ */
+ apr_size_t len = noderev->data_rep->expanded_size
+ ? (apr_size_t)noderev->data_rep->expanded_size
+ : (apr_size_t)noderev->data_rep->size;
+ svn_stringbuf_t *text;
+
+ /* The representation is immutable. Read it normally. */
+ SVN_ERR(svn_fs_fs__get_contents(&contents, fs, noderev->data_rep,
+ FALSE, scratch_pool));
+ SVN_ERR(svn_stringbuf_from_stream(&text, contents, len, scratch_pool));
+ SVN_ERR(svn_stream_close(contents));
+
+ /* de-serialize hash */
+ contents = svn_stream_from_stringbuf(text, scratch_pool);
+ SVN_ERR(read_dir_entries(*entries, contents, FALSE, noderev->id,
+ result_pool, scratch_pool));
+ }
+
+ return SVN_NO_ERROR;
+}
+
+
+/* Return the cache object in FS responsible to storing the directory the
+ * NODEREV plus the corresponding *KEY. If no cache exists, return NULL.
+ * PAIR_KEY must point to some key struct, which does not need to be
+ * initialized. We use it to avoid dynamic allocation.
+ */
+static svn_cache__t *
+locate_dir_cache(svn_fs_t *fs,
+ const void **key,
+ pair_cache_key_t *pair_key,
+ node_revision_t *noderev,
+ apr_pool_t *pool)
+{
+ fs_fs_data_t *ffd = fs->fsap_data;
+ if (svn_fs_fs__id_is_txn(noderev->id))
+ {
+ /* data in txns requires the expensive fs_id-based addressing mode */
+ *key = svn_fs_fs__id_unparse(noderev->id, pool)->data;
+ return ffd->txn_dir_cache;
+ }
+ else
+ {
+ /* committed data can use simple rev,item pairs */
+ if (noderev->data_rep)
+ {
+ pair_key->revision = noderev->data_rep->revision;
+ pair_key->second = noderev->data_rep->item_index;
+ *key = pair_key;
+ }
+ else
+ {
+ /* no data rep -> empty directory.
+ A NULL key causes a cache miss. */
+ *key = NULL;
+ }
+
+ return ffd->dir_cache;
+ }
+}
+
+svn_error_t *
+svn_fs_fs__rep_contents_dir(apr_array_header_t **entries_p,
+ svn_fs_t *fs,
+ node_revision_t *noderev,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
+{
+ pair_cache_key_t pair_key = { 0 };
+ const void *key;
+
+ /* find the cache we may use */
+ svn_cache__t *cache = locate_dir_cache(fs, &key, &pair_key, noderev,
+ scratch_pool);
+ if (cache)
+ {
+ svn_boolean_t found;
+
+ SVN_ERR(svn_cache__get((void **)entries_p, &found, cache, key,
+ result_pool));
+ if (found)
+ return SVN_NO_ERROR;
+ }
+
+ /* Read in the directory contents. */
+ SVN_ERR(get_dir_contents(entries_p, fs, noderev, result_pool,
+ scratch_pool));
+
+ /* Update the cache, if we are to use one.
+ *
+ * Don't even attempt to serialize very large directories; it would cause
+ * an unnecessary memory allocation peak. 150 bytes/entry is about right.
+ */
+ if (cache && svn_cache__is_cachable(cache, 150 * (*entries_p)->nelts))
+ SVN_ERR(svn_cache__set(cache, key, *entries_p, scratch_pool));
+
+ return SVN_NO_ERROR;
+}
+
+svn_fs_dirent_t *
+svn_fs_fs__find_dir_entry(apr_array_header_t *entries,
+ const char *name,
+ int *hint)
+{
+ svn_fs_dirent_t **result
+ = svn_sort__array_lookup(entries, name, hint, compare_dirent_name);
+ return result ? *result : NULL;
+}
+
+svn_error_t *
+svn_fs_fs__rep_contents_dir_entry(svn_fs_dirent_t **dirent,
+ svn_fs_t *fs,
+ node_revision_t *noderev,
+ const char *name,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
+{
+ svn_boolean_t found = FALSE;
+
+ /* find the cache we may use */
+ pair_cache_key_t pair_key = { 0 };
+ const void *key;
+ svn_cache__t *cache = locate_dir_cache(fs, &key, &pair_key, noderev,
+ scratch_pool);
+ if (cache)
+ {
+ /* Cache lookup. */
+ SVN_ERR(svn_cache__get_partial((void **)dirent,
+ &found,
+ cache,
+ key,
+ svn_fs_fs__extract_dir_entry,
+ (void*)name,
+ result_pool));
+ }
+
+ /* fetch data from disk if we did not find it in the cache */
+ if (! found)
+ {
+ apr_array_header_t *entries;
+ svn_fs_dirent_t *entry;
+ svn_fs_dirent_t *entry_copy = NULL;
+
+ /* read the dir from the file system. It will probably be put it
+ into the cache for faster lookup in future calls. */
+ SVN_ERR(svn_fs_fs__rep_contents_dir(&entries, fs, noderev,
+ scratch_pool, scratch_pool));
+
+ /* find desired entry and return a copy in POOL, if found */
+ entry = svn_fs_fs__find_dir_entry(entries, name, NULL);
+ if (entry)
+ {
+ entry_copy = apr_palloc(result_pool, sizeof(*entry_copy));
+ entry_copy->name = apr_pstrdup(result_pool, entry->name);
+ entry_copy->id = svn_fs_fs__id_copy(entry->id, result_pool);
+ entry_copy->kind = entry->kind;
+ }
+
+ *dirent = entry_copy;
+ }
+
+ return SVN_NO_ERROR;
+}
+
+svn_error_t *
+svn_fs_fs__get_proplist(apr_hash_t **proplist_p,
+ svn_fs_t *fs,
+ node_revision_t *noderev,
+ apr_pool_t *pool)
+{
+ apr_hash_t *proplist;
+ svn_stream_t *stream;
+
+ if (noderev->prop_rep && svn_fs_fs__id_txn_used(&noderev->prop_rep->txn_id))
+ {
+ svn_error_t *err;
+ const char *filename
+ = svn_fs_fs__path_txn_node_props(fs, noderev->id, pool);
+ proplist = apr_hash_make(pool);
+
+ SVN_ERR(svn_stream_open_readonly(&stream, filename, pool, pool));
+ err = svn_hash_read2(proplist, stream, SVN_HASH_TERMINATOR, pool);
+ if (err)
+ {
+ svn_string_t *id_str = svn_fs_fs__id_unparse(noderev->id, pool);
+
+ svn_error_clear(svn_stream_close(stream));
+ return svn_error_quick_wrapf(err,
+ _("malformed property list for node-revision '%s' in '%s'"),
+ id_str->data, filename);
+ }
+ SVN_ERR(svn_stream_close(stream));
+ }
+ else if (noderev->prop_rep)
+ {
+ svn_error_t *err;
+ fs_fs_data_t *ffd = fs->fsap_data;
+ representation_t *rep = noderev->prop_rep;
+ pair_cache_key_t key = { 0 };
+
+ key.revision = rep->revision;
+ key.second = rep->item_index;
+ if (ffd->properties_cache && SVN_IS_VALID_REVNUM(rep->revision))
+ {
+ svn_boolean_t is_cached;
+ SVN_ERR(svn_cache__get((void **) proplist_p, &is_cached,
+ ffd->properties_cache, &key, pool));
+ if (is_cached)
+ return SVN_NO_ERROR;
+ }
+
+ proplist = apr_hash_make(pool);
+ SVN_ERR(svn_fs_fs__get_contents(&stream, fs, noderev->prop_rep, FALSE,
+ pool));
+ err = svn_hash_read2(proplist, stream, SVN_HASH_TERMINATOR, pool);
+ if (err)
+ {
+ svn_string_t *id_str = svn_fs_fs__id_unparse(noderev->id, pool);
+
+ svn_error_clear(svn_stream_close(stream));
+ return svn_error_quick_wrapf(err,
+ _("malformed property list for node-revision '%s'"),
+ id_str->data);
+ }
+ SVN_ERR(svn_stream_close(stream));
+
+ if (ffd->properties_cache && SVN_IS_VALID_REVNUM(rep->revision))
+ SVN_ERR(svn_cache__set(ffd->properties_cache, &key, proplist, pool));
+ }
+ else
+ {
+ /* return an empty prop list if the node doesn't have any props */
+ proplist = apr_hash_make(pool);
+ }
+
+ *proplist_p = proplist;
+
+ return SVN_NO_ERROR;
+}
+
+svn_error_t *
+svn_fs_fs__get_changes(apr_array_header_t **changes,
+ svn_fs_t *fs,
+ svn_revnum_t rev,
+ apr_pool_t *result_pool)
+{
+ apr_off_t changes_offset = SVN_FS_FS__ITEM_INDEX_CHANGES;
+ svn_fs_fs__revision_file_t *revision_file;
+ svn_boolean_t found;
+ fs_fs_data_t *ffd = fs->fsap_data;
+ apr_pool_t *scratch_pool = svn_pool_create(result_pool);
+
+ /* try cache lookup first */
+
+ if (ffd->changes_cache)
+ {
+ SVN_ERR(svn_cache__get((void **) changes, &found, ffd->changes_cache,
+ &rev, result_pool));
+ }
+ else
+ {
+ found = FALSE;
+ }
+
+ if (!found)
+ {
+ /* read changes from revision file */
+
+ SVN_ERR(svn_fs_fs__ensure_revision_exists(rev, fs, scratch_pool));
+ SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&revision_file, fs, rev,
+ scratch_pool, scratch_pool));
+
+ if (use_block_read(fs))
+ {
+ /* 'block-read' will also provide us with the desired data */
+ SVN_ERR(block_read((void **)changes, fs,
+ rev, SVN_FS_FS__ITEM_INDEX_CHANGES,
+ revision_file, result_pool, scratch_pool));
+ }
+ else
+ {
+ /* Addressing is very different for old formats
+ * (needs to read the revision trailer). */
+ if (svn_fs_fs__use_log_addressing(fs))
+ SVN_ERR(svn_fs_fs__item_offset(&changes_offset, fs,
+ revision_file, rev, NULL,
+ SVN_FS_FS__ITEM_INDEX_CHANGES,
+ scratch_pool));
+ else
+ SVN_ERR(get_root_changes_offset(NULL, &changes_offset,
+ revision_file, fs, rev,
+ scratch_pool));
+
+ /* Actual reading and parsing are the same, though. */
+ SVN_ERR(aligned_seek(fs, revision_file->file, NULL, changes_offset,
+ scratch_pool));
+ SVN_ERR(svn_fs_fs__read_changes(changes, revision_file->stream,
+ result_pool, scratch_pool));
+
+ /* cache for future reference */
+
+ if (ffd->changes_cache)
+ {
+ /* Guesstimate for the size of the in-cache representation. */
+ apr_size_t estimated_size = (apr_size_t)250 * (*changes)->nelts;
+
+ /* Don't even serialize data that probably won't fit into the
+ * cache. This often implies that either CHANGES is very
+ * large, memory is scarce or both. Having a huge temporary
+ * copy would not be a good thing in either case. */
+ if (svn_cache__is_cachable(ffd->changes_cache, estimated_size))
+ SVN_ERR(svn_cache__set(ffd->changes_cache, &rev, *changes,
+ scratch_pool));
+ }
+ }
+
+ SVN_ERR(svn_fs_fs__close_revision_file(revision_file));
+ }
+
+ SVN_ERR(dbg_log_access(fs, rev, changes_offset, *changes,
+ SVN_FS_FS__ITEM_TYPE_CHANGES, scratch_pool));
+
+ svn_pool_destroy(scratch_pool);
+ return SVN_NO_ERROR;
+}
+
+/* Inialize the representation read state RS for the given REP_HEADER and
+ * p2l index ENTRY. If not NULL, assign FILE and STREAM to RS.
+ * Use RESULT_POOL for allocations.
+ */
+static svn_error_t *
+init_rep_state(rep_state_t *rs,
+ svn_fs_fs__rep_header_t *rep_header,
+ svn_fs_t *fs,
+ svn_fs_fs__revision_file_t *file,
+ svn_fs_fs__p2l_entry_t* entry,
+ apr_pool_t *result_pool)
+{
+ fs_fs_data_t *ffd = fs->fsap_data;
+ shared_file_t *shared_file = apr_pcalloc(result_pool, sizeof(*shared_file));
+
+ /* this function does not apply to representation containers */
+ SVN_ERR_ASSERT(entry->type >= SVN_FS_FS__ITEM_TYPE_FILE_REP
+ && entry->type <= SVN_FS_FS__ITEM_TYPE_DIR_PROPS);
+
+ shared_file->rfile = file;
+ shared_file->fs = fs;
+ shared_file->revision = entry->item.revision;
+ shared_file->pool = result_pool;
+
+ rs->sfile = shared_file;
+ rs->revision = entry->item.revision;
+ rs->item_index = entry->item.number;
+ rs->header_size = rep_header->header_size;
+ rs->start = entry->offset + rs->header_size;
+ rs->current = rep_header->type == svn_fs_fs__rep_plain ? 0 : 4;
+ rs->size = entry->size - rep_header->header_size - 7;
+ rs->ver = 1;
+ rs->chunk_index = 0;
+ rs->raw_window_cache = ffd->raw_window_cache;
+ rs->window_cache = ffd->txdelta_window_cache;
+ rs->combined_cache = ffd->combined_window_cache;
+
+ return SVN_NO_ERROR;
+}
+
+/* Implement svn_cache__partial_getter_func_t for txdelta windows.
+ * Instead of the whole window data, return only END_OFFSET member.
+ */
+static svn_error_t *
+get_txdelta_window_end(void **out,
+ const void *data,
+ apr_size_t data_len,
+ void *baton,
+ apr_pool_t *result_pool)
+{
+ const svn_fs_fs__txdelta_cached_window_t *window
+ = (const svn_fs_fs__txdelta_cached_window_t *)data;
+ *(apr_off_t*)out = window->end_offset;
+
+ return SVN_NO_ERROR;
+}
+
+/* Implement svn_cache__partial_getter_func_t for raw windows.
+ * Instead of the whole window data, return only END_OFFSET member.
+ */
+static svn_error_t *
+get_raw_window_end(void **out,
+ const void *data,
+ apr_size_t data_len,
+ void *baton,
+ apr_pool_t *result_pool)
+{
+ const svn_fs_fs__raw_cached_window_t *window
+ = (const svn_fs_fs__raw_cached_window_t *)data;
+ *(apr_off_t*)out = window->end_offset;
+
+ return SVN_NO_ERROR;
+}
+
+/* Walk through all windows in the representation addressed by RS in FS
+ * (excluding the delta bases) and put those not already cached into the
+ * window caches. If MAX_OFFSET is not -1, don't read windows that start
+ * at or beyond that offset. Use POOL for temporary allocations.
+ *
+ * This function requires RS->RAW_WINDOW_CACHE and RS->WINDOW_CACHE to
+ * be non-NULL.
+ */
+static svn_error_t *
+cache_windows(svn_fs_t *fs,
+ rep_state_t *rs,
+ apr_off_t max_offset,
+ apr_pool_t *pool)
+{
+ apr_pool_t *iterpool = svn_pool_create(pool);
+ while (rs->current < rs->size)
+ {
+ apr_off_t end_offset;
+ svn_boolean_t found = FALSE;
+ window_cache_key_t key = { 0 };
+
+ svn_pool_clear(iterpool);
+
+ if (max_offset != -1 && rs->start + rs->current >= max_offset)
+ {
+ svn_pool_destroy(iterpool);
+ return SVN_NO_ERROR;
+ }
+
+ /* We don't need to read the data again if it is already in cache.
+ * It might be cached as either raw or parsed window.
+ */
+ SVN_ERR(svn_cache__get_partial((void **) &end_offset, &found,
+ rs->raw_window_cache,
+ get_window_key(&key, rs),
+ get_raw_window_end, NULL,
+ iterpool));
+ if (! found)
+ SVN_ERR(svn_cache__get_partial((void **) &end_offset, &found,
+ rs->window_cache, &key,
+ get_txdelta_window_end, NULL,
+ iterpool));
+
+ if (found)
+ {
+ rs->current = end_offset;
+ }
+ else
+ {
+ /* Read, decode and cache the window. */
+ svn_fs_fs__raw_cached_window_t window;
+ apr_off_t start_offset = rs->start + rs->current;
+ apr_size_t window_len;
+ char *buf;
+
+ /* navigate to the current window */
+ SVN_ERR(rs_aligned_seek(rs, NULL, start_offset, iterpool));
+ SVN_ERR(svn_txdelta__read_raw_window_len(&window_len,
+ rs->sfile->rfile->stream,
+ iterpool));
+
+ /* Read the raw window. */
+ buf = apr_palloc(iterpool, window_len + 1);
+ SVN_ERR(rs_aligned_seek(rs, NULL, start_offset, iterpool));
+ SVN_ERR(svn_io_file_read_full2(rs->sfile->rfile->file, buf,
+ window_len, NULL, NULL, iterpool));
+ buf[window_len] = 0;
+
+ /* update relative offset in representation */
+ rs->current += window_len;
+
+ /* Construct the cachable raw window object. */
+ window.end_offset = rs->current;
+ window.window.len = window_len;
+ window.window.data = buf;
+
+ /* cache the window now */
+ SVN_ERR(svn_cache__set(rs->raw_window_cache, &key, &window,
+ iterpool));
+ }
+
+ if (rs->current > rs->size)
+ return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
+ _("Reading one svndiff window read beyond "
+ "the end of the representation"));
+
+ rs->chunk_index++;
+ }
+
+ svn_pool_destroy(iterpool);
+ return SVN_NO_ERROR;
+}
+
+/* Read all txdelta / plain windows following REP_HEADER in FS as described
+ * by ENTRY. Read the data from the already open FILE and the wrapping
+ * STREAM object. If MAX_OFFSET is not -1, don't read windows that start
+ * at or beyond that offset. Use SCRATCH_POOL for temporary allocations.
+ * If caching is not enabled, this is a no-op.
+ */
+static svn_error_t *
+block_read_windows(svn_fs_fs__rep_header_t *rep_header,
+ svn_fs_t *fs,
+ svn_fs_fs__revision_file_t *rev_file,
+ svn_fs_fs__p2l_entry_t* entry,
+ apr_off_t max_offset,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
+{
+ fs_fs_data_t *ffd = fs->fsap_data;
+ rep_state_t rs = { 0 };
+ apr_off_t offset;
+ window_cache_key_t key = { 0 };
+
+ if ( (rep_header->type != svn_fs_fs__rep_plain
+ && (!ffd->txdelta_window_cache || !ffd->raw_window_cache))
+ || (rep_header->type == svn_fs_fs__rep_plain
+ && !ffd->combined_window_cache))
+ return SVN_NO_ERROR;
+
+ SVN_ERR(init_rep_state(&rs, rep_header, fs, rev_file, entry,
+ result_pool));
+
+ /* RS->FILE may be shared between RS instances -> make sure we point
+ * to the right data. */
+ offset = rs.start + rs.current;
+ if (rep_header->type == svn_fs_fs__rep_plain)
+ {
+ svn_stringbuf_t *plaintext;
+ svn_boolean_t is_cached;
+
+ /* already in cache? */
+ SVN_ERR(svn_cache__has_key(&is_cached, rs.combined_cache,
+ get_window_key(&key, &rs),
+ scratch_pool));
+ if (is_cached)
+ return SVN_NO_ERROR;
+
+ /* for larger reps, the header may have crossed a block boundary.
+ * make sure we still read blocks properly aligned, i.e. don't use
+ * plain seek here. */
+ SVN_ERR(aligned_seek(fs, rev_file->file, NULL, offset, scratch_pool));
+
+ plaintext = svn_stringbuf_create_ensure(rs.size, result_pool);
+ SVN_ERR(svn_io_file_read_full2(rev_file->file, plaintext->data,
+ rs.size, &plaintext->len, NULL,
+ result_pool));
+ plaintext->data[plaintext->len] = 0;
+ rs.current += rs.size;
+
+ SVN_ERR(set_cached_combined_window(plaintext, &rs, scratch_pool));
+ }
+ else
+ {
+ SVN_ERR(cache_windows(fs, &rs, max_offset, scratch_pool));
+ }
+
+ return SVN_NO_ERROR;
+}
+
+/* Try to get the representation header identified by KEY from FS's cache.
+ * If it has not been cached, read it from the current position in STREAM
+ * and put it into the cache (if caching has been enabled for rep headers).
+ * Return the result in *REP_HEADER. Use POOL for allocations.
+ */
+static svn_error_t *
+read_rep_header(svn_fs_fs__rep_header_t **rep_header,
+ svn_fs_t *fs,
+ svn_stream_t *stream,
+ pair_cache_key_t *key,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
+{
+ fs_fs_data_t *ffd = fs->fsap_data;
+ svn_boolean_t is_cached = FALSE;
+
+ if (ffd->rep_header_cache)
+ {
+ SVN_ERR(svn_cache__get((void**)rep_header, &is_cached,
+ ffd->rep_header_cache, key,
+ result_pool));
+ if (is_cached)
+ return SVN_NO_ERROR;
+ }
+
+ SVN_ERR(svn_fs_fs__read_rep_header(rep_header, stream, result_pool,
+ scratch_pool));
+
+ if (ffd->rep_header_cache)
+ SVN_ERR(svn_cache__set(ffd->rep_header_cache, key, *rep_header,
+ scratch_pool));
+
+ return SVN_NO_ERROR;
+}
+
+/* Fetch the representation data (header, txdelta / plain windows)
+ * addressed by ENTRY->ITEM in FS and cache it if caches are enabled.
+ * Read the data from the already open FILE and the wrapping
+ * STREAM object. If MAX_OFFSET is not -1, don't read windows that start
+ * at or beyond that offset.
+ * Use SCRATCH_POOL for temporary allocations.
+ */
+static svn_error_t *
+block_read_contents(svn_fs_t *fs,
+ svn_fs_fs__revision_file_t *rev_file,
+ svn_fs_fs__p2l_entry_t* entry,
+ apr_off_t max_offset,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
+{
+ pair_cache_key_t header_key = { 0 };
+ svn_fs_fs__rep_header_t *rep_header;
+
+ header_key.revision = (apr_int32_t)entry->item.revision;
+ header_key.second = entry->item.number;
+
+ SVN_ERR(read_rep_header(&rep_header, fs, rev_file->stream, &header_key,
+ result_pool, scratch_pool));
+ SVN_ERR(block_read_windows(rep_header, fs, rev_file, entry, max_offset,
+ result_pool, scratch_pool));
+
+ return SVN_NO_ERROR;
+}
+
+/* For the given REV_FILE in FS, in *STREAM return a stream covering the
+ * item specified by ENTRY. Also, verify the item's content by low-level
+ * checksum. Allocate the result in POOL.
+ */
+static svn_error_t *
+read_item(svn_stream_t **stream,
+ svn_fs_t *fs,
+ svn_fs_fs__revision_file_t *rev_file,
+ svn_fs_fs__p2l_entry_t* entry,
+ apr_pool_t *pool)
+{
+ apr_uint32_t digest;
+ svn_checksum_t *expected, *actual;
+ apr_uint32_t plain_digest;
+
+ /* Read item into string buffer. */
+ svn_stringbuf_t *text = svn_stringbuf_create_ensure(entry->size, pool);
+ text->len = entry->size;
+ text->data[text->len] = 0;
+ SVN_ERR(svn_io_file_read_full2(rev_file->file, text->data, text->len,
+ NULL, NULL, pool));
+
+ /* Return (construct, calculate) stream and checksum. */
+ *stream = svn_stream_from_stringbuf(text, pool);
+ digest = svn__fnv1a_32x4(text->data, text->len);
+
+ /* Checksums will match most of the time. */
+ if (entry->fnv1_checksum == digest)
+ return SVN_NO_ERROR;
+
+ /* Construct proper checksum objects from their digests to allow for
+ * nice error messages. */
+ plain_digest = htonl(entry->fnv1_checksum);
+ expected = svn_checksum__from_digest_fnv1a_32x4(
+ (const unsigned char *)&plain_digest, pool);
+ plain_digest = htonl(digest);
+ actual = svn_checksum__from_digest_fnv1a_32x4(
+ (const unsigned char *)&plain_digest, pool);
+
+ /* Construct the full error message with all the info we have. */
+ return svn_checksum_mismatch_err(expected, actual, pool,
+ _("Low-level checksum mismatch while reading\n"
+ "%s bytes of meta data at offset %s "
+ "for item %s in revision %ld"),
+ apr_psprintf(pool, "%" APR_OFF_T_FMT, entry->size),
+ apr_psprintf(pool, "%" APR_OFF_T_FMT, entry->offset),
+ apr_psprintf(pool, "%" APR_UINT64_T_FMT, entry->item.number),
+ entry->item.revision);
+}
+
+/* If not already cached or if MUST_READ is set, read the changed paths
+ * list addressed by ENTRY in FS and retúrn it in *CHANGES. Cache the
+ * result if caching is enabled. Read the data from the already open
+ * FILE and wrapping FILE_STREAM. Use POOL for allocations.
+ */
+static svn_error_t *
+block_read_changes(apr_array_header_t **changes,
+ svn_fs_t *fs,
+ svn_fs_fs__revision_file_t *rev_file,
+ svn_fs_fs__p2l_entry_t *entry,
+ svn_boolean_t must_read,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
+{
+ fs_fs_data_t *ffd = fs->fsap_data;
+ svn_stream_t *stream;
+ if (!must_read && !ffd->changes_cache)
+ return SVN_NO_ERROR;
+
+ /* already in cache? */
+ if (!must_read && ffd->changes_cache)
+ {
+ svn_boolean_t is_cached;
+ SVN_ERR(svn_cache__has_key(&is_cached, ffd->changes_cache,
+ &entry->item.revision,
+ scratch_pool));
+ if (is_cached)
+ return SVN_NO_ERROR;
+ }
+
+ SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool));
+
+ /* read changes from revision file */
+ SVN_ERR(svn_fs_fs__read_changes(changes, stream, result_pool,
+ scratch_pool));
+
+ /* cache for future reference */
+ if (ffd->changes_cache)
+ SVN_ERR(svn_cache__set(ffd->changes_cache, &entry->item.revision,
+ *changes, scratch_pool));
+
+ return SVN_NO_ERROR;
+}
+
+/* If not already cached or if MUST_READ is set, read the nod revision
+ * addressed by ENTRY in FS and retúrn it in *NODEREV_P. Cache the
+ * result if caching is enabled. Read the data from the already open
+ * FILE and wrapping FILE_STREAM. Use SCRATCH_POOL for temporary allocations.
+ */
+static svn_error_t *
+block_read_noderev(node_revision_t **noderev_p,
+ svn_fs_t *fs,
+ svn_fs_fs__revision_file_t *rev_file,
+ svn_fs_fs__p2l_entry_t *entry,
+ svn_boolean_t must_read,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
+{
+ fs_fs_data_t *ffd = fs->fsap_data;
+ svn_stream_t *stream;
+
+ pair_cache_key_t key = { 0 };
+ key.revision = entry->item.revision;
+ key.second = entry->item.number;
+
+ if (!must_read && !ffd->node_revision_cache)
+ return SVN_NO_ERROR;
+
+ /* already in cache? */
+ if (!must_read && ffd->node_revision_cache)
+ {
+ svn_boolean_t is_cached;
+ SVN_ERR(svn_cache__has_key(&is_cached, ffd->node_revision_cache,
+ &key, scratch_pool));
+ if (is_cached)
+ return SVN_NO_ERROR;
+ }
+
+ SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool));
+
+ /* read node rev from revision file */
+ SVN_ERR(svn_fs_fs__read_noderev(noderev_p, stream,
+ result_pool, scratch_pool));
+
+ /* Workaround issue #4031: is-fresh-txn-root in revision files. */
+ (*noderev_p)->is_fresh_txn_root = FALSE;
+
+ if (ffd->node_revision_cache)
+ SVN_ERR(svn_cache__set(ffd->node_revision_cache, &key, *noderev_p,
+ scratch_pool));
+
+ return SVN_NO_ERROR;
+}
+
+/* Read the whole (e.g. 64kB) block containing ITEM_INDEX of REVISION in FS
+ * and put all data into cache. If necessary and depending on heuristics,
+ * neighboring blocks may also get read. The data is being read from
+ * already open REVISION_FILE, which must be the correct rev / pack file
+ * w.r.t. REVISION.
+ *
+ * For noderevs and changed path lists, the item fetched can be allocated
+ * RESULT_POOL and returned in *RESULT. Otherwise, RESULT must be NULL.
+ */
+static svn_error_t *
+block_read(void **result,
+ svn_fs_t *fs,
+ svn_revnum_t revision,
+ apr_uint64_t item_index,
+ svn_fs_fs__revision_file_t *revision_file,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
+{
+ fs_fs_data_t *ffd = fs->fsap_data;
+ apr_off_t offset, wanted_offset = 0;
+ apr_off_t block_start = 0;
+ apr_array_header_t *entries;
+ int run_count = 0;
+ int i;
+ apr_pool_t *iterpool;
+
+ /* Block read is an optional feature. If the caller does not want anything
+ * specific we may not have to read anything. */
+ if (!result)
+ return SVN_NO_ERROR;
+
+ iterpool = svn_pool_create(scratch_pool);
+
+ /* don't try this on transaction protorev files */
+ SVN_ERR_ASSERT(SVN_IS_VALID_REVNUM(revision));
+
+ /* index lookup: find the OFFSET of the item we *must* read plus (in the
+ * "do-while" block) the list of items in the same block. */
+ SVN_ERR(svn_fs_fs__item_offset(&wanted_offset, fs, revision_file,
+ revision, NULL, item_index, iterpool));
+
+ offset = wanted_offset;
+
+ /* Heuristics:
+ *
+ * Read this block. If the last item crosses the block boundary, read
+ * the next block but stop there. Because cross-boundary items cause
+ * blocks to be read twice, this heuristics will limit this effect to
+ * approx. 50% of blocks, probably less, while providing a sensible
+ * amount of read-ahead.
+ */
+ do
+ {
+ /* fetch list of items in the block surrounding OFFSET */
+ block_start = offset - (offset % ffd->block_size);
+ SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, fs, revision_file,
+ revision, block_start,
+ ffd->block_size, scratch_pool,
+ scratch_pool));
+
+ SVN_ERR(aligned_seek(fs, revision_file->file, &block_start, offset,
+ iterpool));
+
+ /* read all items from the block */
+ for (i = 0; i < entries->nelts; ++i)
+ {
+ svn_boolean_t is_result, is_wanted;
+ apr_pool_t *pool;
+ svn_fs_fs__p2l_entry_t* entry;
+
+ svn_pool_clear(iterpool);
+
+ /* skip empty sections */
+ entry = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t);
+ if (entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED)
+ continue;
+
+ /* the item / container we were looking for? */
+ is_wanted = entry->offset == wanted_offset
+ && entry->item.revision == revision
+ && entry->item.number == item_index;
+ is_result = result && is_wanted;
+
+ /* select the pool that we want the item to be allocated in */
+ pool = is_result ? result_pool : iterpool;
+
+ /* handle all items that start within this block and are relatively
+ * small (i.e. < block size). Always read the item we need to return.
+ */
+ if (is_result || ( entry->offset >= block_start
+ && entry->size < ffd->block_size))
+ {
+ void *item = NULL;
+ SVN_ERR(svn_io_file_seek(revision_file->file, APR_SET,
+ &entry->offset, iterpool));
+ switch (entry->type)
+ {
+ case SVN_FS_FS__ITEM_TYPE_FILE_REP:
+ case SVN_FS_FS__ITEM_TYPE_DIR_REP:
+ case SVN_FS_FS__ITEM_TYPE_FILE_PROPS:
+ case SVN_FS_FS__ITEM_TYPE_DIR_PROPS:
+ SVN_ERR(block_read_contents(fs, revision_file, entry,
+ is_wanted
+ ? -1
+ : block_start + ffd->block_size,
+ pool, iterpool));
+ break;
+
+ case SVN_FS_FS__ITEM_TYPE_NODEREV:
+ if (ffd->node_revision_cache || is_result)
+ SVN_ERR(block_read_noderev((node_revision_t **)&item,
+ fs, revision_file,
+ entry, is_result, pool,
+ iterpool));
+ break;
+
+ case SVN_FS_FS__ITEM_TYPE_CHANGES:
+ SVN_ERR(block_read_changes((apr_array_header_t **)&item,
+ fs, revision_file,
+ entry, is_result,
+ pool, iterpool));
+ break;
+
+ default:
+ break;
+ }
+
+ if (is_result)
+ *result = item;
+
+ /* if we crossed a block boundary, read the remainder of
+ * the last block as well */
+ offset = entry->offset + entry->size;
+ if (offset > block_start + ffd->block_size)
+ ++run_count;
+ }
+ }
+
+ }
+ while(run_count++ == 1); /* can only be true once and only if a block
+ * boundary got crossed */
+
+ /* if the caller requested a result, we must have provided one by now */
+ assert(!result || *result);
+ svn_pool_destroy(iterpool);
+
+ return SVN_NO_ERROR;
+}