summaryrefslogtreecommitdiff
path: root/subversion/libsvn_fs_fs/recovery.c
diff options
context:
space:
mode:
Diffstat (limited to 'subversion/libsvn_fs_fs/recovery.c')
-rw-r--r--subversion/libsvn_fs_fs/recovery.c509
1 files changed, 509 insertions, 0 deletions
diff --git a/subversion/libsvn_fs_fs/recovery.c b/subversion/libsvn_fs_fs/recovery.c
new file mode 100644
index 0000000..125d47a
--- /dev/null
+++ b/subversion/libsvn_fs_fs/recovery.c
@@ -0,0 +1,509 @@
+/* recovery.c --- FSFS recovery functionality
+*
+ * ====================================================================
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ * ====================================================================
+ */
+
+#include "recovery.h"
+
+#include "svn_hash.h"
+#include "svn_pools.h"
+#include "private/svn_string_private.h"
+
+#include "index.h"
+#include "low_level.h"
+#include "rep-cache.h"
+#include "revprops.h"
+#include "util.h"
+#include "cached_data.h"
+
+#include "../libsvn_fs/fs-loader.h"
+
+#include "svn_private_config.h"
+
+/* Part of the recovery procedure. Return the largest revision *REV in
+ filesystem FS. Use POOL for temporary allocation. */
+static svn_error_t *
+recover_get_largest_revision(svn_fs_t *fs, svn_revnum_t *rev, apr_pool_t *pool)
+{
+ /* Discovering the largest revision in the filesystem would be an
+ expensive operation if we did a readdir() or searched linearly,
+ so we'll do a form of binary search. left is a revision that we
+ know exists, right a revision that we know does not exist. */
+ apr_pool_t *iterpool;
+ svn_revnum_t left, right = 1;
+
+ iterpool = svn_pool_create(pool);
+ /* Keep doubling right, until we find a revision that doesn't exist. */
+ while (1)
+ {
+ svn_error_t *err;
+ svn_fs_fs__revision_file_t *file;
+ svn_pool_clear(iterpool);
+
+ err = svn_fs_fs__open_pack_or_rev_file(&file, fs, right, iterpool,
+ iterpool);
+ if (err && err->apr_err == SVN_ERR_FS_NO_SUCH_REVISION)
+ {
+ svn_error_clear(err);
+ break;
+ }
+ else
+ SVN_ERR(err);
+
+ right <<= 1;
+ }
+
+ left = right >> 1;
+
+ /* We know that left exists and right doesn't. Do a normal bsearch to find
+ the last revision. */
+ while (left + 1 < right)
+ {
+ svn_revnum_t probe = left + ((right - left) / 2);
+ svn_error_t *err;
+ svn_fs_fs__revision_file_t *file;
+ svn_pool_clear(iterpool);
+
+ err = svn_fs_fs__open_pack_or_rev_file(&file, fs, probe, iterpool,
+ iterpool);
+ if (err && err->apr_err == SVN_ERR_FS_NO_SUCH_REVISION)
+ {
+ svn_error_clear(err);
+ right = probe;
+ }
+ else
+ {
+ SVN_ERR(err);
+ left = probe;
+ }
+ }
+
+ svn_pool_destroy(iterpool);
+
+ /* left is now the largest revision that exists. */
+ *rev = left;
+ return SVN_NO_ERROR;
+}
+
+/* A baton for reading a fixed amount from an open file. For
+ recover_find_max_ids() below. */
+struct recover_read_from_file_baton
+{
+ svn_stream_t *stream;
+ apr_pool_t *pool;
+ apr_off_t remaining;
+};
+
+/* A stream read handler used by recover_find_max_ids() below.
+ Read and return at most BATON->REMAINING bytes from the stream,
+ returning nothing after that to indicate EOF. */
+static svn_error_t *
+read_handler_recover(void *baton, char *buffer, apr_size_t *len)
+{
+ struct recover_read_from_file_baton *b = baton;
+ apr_size_t bytes_to_read = *len;
+
+ if (b->remaining == 0)
+ {
+ /* Return a successful read of zero bytes to signal EOF. */
+ *len = 0;
+ return SVN_NO_ERROR;
+ }
+
+ if ((apr_int64_t)bytes_to_read > (apr_int64_t)b->remaining)
+ bytes_to_read = (apr_size_t)b->remaining;
+ b->remaining -= bytes_to_read;
+
+ return svn_stream_read_full(b->stream, buffer, &bytes_to_read);
+}
+
+/* Part of the recovery procedure. Read the directory noderev at offset
+ OFFSET of file REV_FILE (the revision file of revision REV of
+ filesystem FS), and set MAX_NODE_ID and MAX_COPY_ID to be the node-id
+ and copy-id of that node, if greater than the current value stored
+ in either. Recurse into any child directories that were modified in
+ this revision.
+
+ MAX_NODE_ID and MAX_COPY_ID must be arrays of at least MAX_KEY_SIZE.
+
+ Perform temporary allocation in POOL. */
+static svn_error_t *
+recover_find_max_ids(svn_fs_t *fs,
+ svn_revnum_t rev,
+ svn_fs_fs__revision_file_t *rev_file,
+ apr_off_t offset,
+ apr_uint64_t *max_node_id,
+ apr_uint64_t *max_copy_id,
+ apr_pool_t *pool)
+{
+ svn_fs_fs__rep_header_t *header;
+ struct recover_read_from_file_baton baton;
+ svn_stream_t *stream;
+ apr_hash_t *entries;
+ apr_hash_index_t *hi;
+ apr_pool_t *iterpool;
+ node_revision_t *noderev;
+ svn_error_t *err;
+
+ baton.stream = rev_file->stream;
+ SVN_ERR(svn_io_file_seek(rev_file->file, APR_SET, &offset, pool));
+ SVN_ERR(svn_fs_fs__read_noderev(&noderev, baton.stream, pool, pool));
+
+ /* Check that this is a directory. It should be. */
+ if (noderev->kind != svn_node_dir)
+ return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
+ _("Recovery encountered a non-directory node"));
+
+ /* Get the data location. No data location indicates an empty directory. */
+ if (!noderev->data_rep)
+ return SVN_NO_ERROR;
+
+ /* If the directory's data representation wasn't changed in this revision,
+ we've already scanned the directory's contents for noderevs, so we don't
+ need to again. This will occur if a property is changed on a directory
+ without changing the directory's contents. */
+ if (noderev->data_rep->revision != rev)
+ return SVN_NO_ERROR;
+
+ /* We could use get_dir_contents(), but this is much cheaper. It does
+ rely on directory entries being stored as PLAIN reps, though. */
+ SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rev_file, rev, NULL,
+ noderev->data_rep->item_index, pool));
+ SVN_ERR(svn_io_file_seek(rev_file->file, APR_SET, &offset, pool));
+ SVN_ERR(svn_fs_fs__read_rep_header(&header, baton.stream, pool, pool));
+ if (header->type != svn_fs_fs__rep_plain)
+ return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
+ _("Recovery encountered a deltified directory "
+ "representation"));
+
+ /* Now create a stream that's allowed to read only as much data as is
+ stored in the representation. Note that this is a directory, i.e.
+ represented using the hash format on disk and can never have 0 length. */
+ baton.pool = pool;
+ baton.remaining = noderev->data_rep->expanded_size
+ ? noderev->data_rep->expanded_size
+ : noderev->data_rep->size;
+ stream = svn_stream_create(&baton, pool);
+ svn_stream_set_read2(stream, NULL /* only full read support */,
+ read_handler_recover);
+
+ /* Now read the entries from that stream. */
+ entries = apr_hash_make(pool);
+ err = svn_hash_read2(entries, stream, SVN_HASH_TERMINATOR, pool);
+ if (err)
+ {
+ svn_string_t *id_str = svn_fs_fs__id_unparse(noderev->id, pool);
+
+ svn_error_clear(svn_stream_close(stream));
+ return svn_error_quick_wrapf(err,
+ _("malformed representation for node-revision '%s'"),
+ id_str->data);
+ }
+ SVN_ERR(svn_stream_close(stream));
+
+ /* Now check each of the entries in our directory to find new node and
+ copy ids, and recurse into new subdirectories. */
+ iterpool = svn_pool_create(pool);
+ for (hi = apr_hash_first(pool, entries); hi; hi = apr_hash_next(hi))
+ {
+ char *str_val;
+ char *str;
+ svn_node_kind_t kind;
+ const svn_fs_id_t *id;
+ const svn_fs_fs__id_part_t *rev_item;
+ apr_uint64_t node_id, copy_id;
+ apr_off_t child_dir_offset;
+ const svn_string_t *path = apr_hash_this_val(hi);
+
+ svn_pool_clear(iterpool);
+
+ str_val = apr_pstrdup(iterpool, path->data);
+
+ str = svn_cstring_tokenize(" ", &str_val);
+ if (str == NULL)
+ return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
+ _("Directory entry corrupt"));
+
+ if (strcmp(str, SVN_FS_FS__KIND_FILE) == 0)
+ kind = svn_node_file;
+ else if (strcmp(str, SVN_FS_FS__KIND_DIR) == 0)
+ kind = svn_node_dir;
+ else
+ {
+ return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
+ _("Directory entry corrupt"));
+ }
+
+ str = svn_cstring_tokenize(" ", &str_val);
+ if (str == NULL)
+ return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
+ _("Directory entry corrupt"));
+
+ SVN_ERR(svn_fs_fs__id_parse(&id, str, iterpool));
+
+ rev_item = svn_fs_fs__id_rev_item(id);
+ if (rev_item->revision != rev)
+ {
+ /* If the node wasn't modified in this revision, we've already
+ checked the node and copy id. */
+ continue;
+ }
+
+ node_id = svn_fs_fs__id_node_id(id)->number;
+ copy_id = svn_fs_fs__id_copy_id(id)->number;
+
+ if (node_id > *max_node_id)
+ *max_node_id = node_id;
+ if (copy_id > *max_copy_id)
+ *max_copy_id = copy_id;
+
+ if (kind == svn_node_file)
+ continue;
+
+ SVN_ERR(svn_fs_fs__item_offset(&child_dir_offset, fs,
+ rev_file, rev, NULL, rev_item->number,
+ iterpool));
+ SVN_ERR(recover_find_max_ids(fs, rev, rev_file, child_dir_offset,
+ max_node_id, max_copy_id, iterpool));
+ }
+ svn_pool_destroy(iterpool);
+
+ return SVN_NO_ERROR;
+}
+
+/* Part of the recovery procedure. Given an open non-packed revision file
+ REV_FILE for REV, locate the trailer that specifies the offset to the root
+ node-id and store this offset in *ROOT_OFFSET. Do temporary allocations in
+ POOL. */
+static svn_error_t *
+recover_get_root_offset(apr_off_t *root_offset,
+ svn_revnum_t rev,
+ svn_fs_fs__revision_file_t *rev_file,
+ apr_pool_t *pool)
+{
+ char buffer[64];
+ svn_stringbuf_t *trailer;
+ apr_off_t start;
+ apr_off_t end;
+ apr_size_t len;
+
+ SVN_ERR_ASSERT(!rev_file->is_packed);
+
+ /* We will assume that the last line containing the two offsets (to the root
+ node-id and to the changed path information) will never be longer than 64
+ characters. */
+ end = 0;
+ SVN_ERR(svn_io_file_seek(rev_file->file, APR_END, &end, pool));
+
+ if (end < sizeof(buffer))
+ {
+ len = (apr_size_t)end;
+ start = 0;
+ }
+ else
+ {
+ len = sizeof(buffer);
+ start = end - sizeof(buffer);
+ }
+
+ SVN_ERR(svn_io_file_seek(rev_file->file, APR_SET, &start, pool));
+ SVN_ERR(svn_io_file_read_full2(rev_file->file, buffer, len,
+ NULL, NULL, pool));
+
+ trailer = svn_stringbuf_ncreate(buffer, len, pool);
+ SVN_ERR(svn_fs_fs__parse_revision_trailer(root_offset, NULL, trailer, rev));
+
+ return SVN_NO_ERROR;
+}
+
+/* Baton used for recover_body below. */
+struct recover_baton {
+ svn_fs_t *fs;
+ svn_cancel_func_t cancel_func;
+ void *cancel_baton;
+};
+
+/* The work-horse for svn_fs_fs__recover, called with the FS
+ write lock. This implements the svn_fs_fs__with_write_lock()
+ 'body' callback type. BATON is a 'struct recover_baton *'. */
+static svn_error_t *
+recover_body(void *baton, apr_pool_t *pool)
+{
+ struct recover_baton *b = baton;
+ svn_fs_t *fs = b->fs;
+ fs_fs_data_t *ffd = fs->fsap_data;
+ svn_revnum_t max_rev;
+ apr_uint64_t next_node_id = 0;
+ apr_uint64_t next_copy_id = 0;
+ svn_revnum_t youngest_rev;
+ svn_node_kind_t youngest_revprops_kind;
+
+ /* The admin may have created a plain copy of this repo before attempting
+ to recover it (hotcopy may or may not work with corrupted repos).
+ Bump the instance ID. */
+ SVN_ERR(svn_fs_fs__set_uuid(fs, fs->uuid, NULL, pool));
+
+ /* We need to know the largest revision in the filesystem. */
+ SVN_ERR(recover_get_largest_revision(fs, &max_rev, pool));
+
+ /* Get the expected youngest revision */
+ SVN_ERR(svn_fs_fs__youngest_rev(&youngest_rev, fs, pool));
+
+ /* Policy note:
+
+ Since the revprops file is written after the revs file, the true
+ maximum available revision is the youngest one for which both are
+ present. That's probably the same as the max_rev we just found,
+ but if it's not, we could, in theory, repeatedly decrement
+ max_rev until we find a revision that has both a revs and
+ revprops file, then write db/current with that.
+
+ But we choose not to. If a repository is so corrupt that it's
+ missing at least one revprops file, we shouldn't assume that the
+ youngest revision for which both the revs and revprops files are
+ present is healthy. In other words, we're willing to recover
+ from a missing or out-of-date db/current file, because db/current
+ is truly redundant -- it's basically a cache so we don't have to
+ find max_rev each time, albeit a cache with unusual semantics,
+ since it also officially defines when a revision goes live. But
+ if we're missing more than the cache, it's time to back out and
+ let the admin reconstruct things by hand: correctness at that
+ point may depend on external things like checking a commit email
+ list, looking in particular working copies, etc.
+
+ This policy matches well with a typical naive backup scenario.
+ Say you're rsyncing your FSFS repository nightly to the same
+ location. Once revs and revprops are written, you've got the
+ maximum rev; if the backup should bomb before db/current is
+ written, then db/current could stay arbitrarily out-of-date, but
+ we can still recover. It's a small window, but we might as well
+ do what we can. */
+
+ /* Even if db/current were missing, it would be created with 0 by
+ get_youngest(), so this conditional remains valid. */
+ if (youngest_rev > max_rev)
+ return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
+ _("Expected current rev to be <= %ld "
+ "but found %ld"), max_rev, youngest_rev);
+
+ /* We only need to search for maximum IDs for old FS formats which
+ se global ID counters. */
+ if (ffd->format < SVN_FS_FS__MIN_NO_GLOBAL_IDS_FORMAT)
+ {
+ /* Next we need to find the maximum node id and copy id in use across the
+ filesystem. Unfortunately, the only way we can get this information
+ is to scan all the noderevs of all the revisions and keep track as
+ we go along. */
+ svn_revnum_t rev;
+ apr_pool_t *iterpool = svn_pool_create(pool);
+
+ for (rev = 0; rev <= max_rev; rev++)
+ {
+ svn_fs_fs__revision_file_t *rev_file;
+ apr_off_t root_offset;
+
+ svn_pool_clear(iterpool);
+
+ if (b->cancel_func)
+ SVN_ERR(b->cancel_func(b->cancel_baton));
+
+ SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, rev, pool,
+ iterpool));
+ SVN_ERR(recover_get_root_offset(&root_offset, rev, rev_file, pool));
+ SVN_ERR(recover_find_max_ids(fs, rev, rev_file, root_offset,
+ &next_node_id, &next_copy_id, pool));
+ SVN_ERR(svn_fs_fs__close_revision_file(rev_file));
+ }
+ svn_pool_destroy(iterpool);
+
+ /* Now that we finally have the maximum revision, node-id and copy-id, we
+ can bump the two ids to get the next of each. */
+ next_node_id++;
+ next_copy_id++;
+ }
+
+ /* Before setting current, verify that there is a revprops file
+ for the youngest revision. (Issue #2992) */
+ SVN_ERR(svn_io_check_path(svn_fs_fs__path_revprops(fs, max_rev, pool),
+ &youngest_revprops_kind, pool));
+ if (youngest_revprops_kind == svn_node_none)
+ {
+ svn_boolean_t missing = TRUE;
+ if (!svn_fs_fs__packed_revprop_available(&missing, fs, max_rev, pool))
+ {
+ if (missing)
+ {
+ return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
+ _("Revision %ld has a revs file but no "
+ "revprops file"),
+ max_rev);
+ }
+ else
+ {
+ return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
+ _("Revision %ld has a revs file but the "
+ "revprops file is inaccessible"),
+ max_rev);
+ }
+ }
+ }
+ else if (youngest_revprops_kind != svn_node_file)
+ {
+ return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
+ _("Revision %ld has a non-file where its "
+ "revprops file should be"),
+ max_rev);
+ }
+
+ /* Prune younger-than-(newfound-youngest) revisions from the rep
+ cache if sharing is enabled taking care not to create the cache
+ if it does not exist. */
+ if (ffd->rep_sharing_allowed)
+ {
+ svn_boolean_t rep_cache_exists;
+
+ SVN_ERR(svn_fs_fs__exists_rep_cache(&rep_cache_exists, fs, pool));
+ if (rep_cache_exists)
+ SVN_ERR(svn_fs_fs__del_rep_reference(fs, max_rev, pool));
+ }
+
+ /* Now store the discovered youngest revision, and the next IDs if
+ relevant, in a new 'current' file. */
+ return svn_fs_fs__write_current(fs, max_rev, next_node_id, next_copy_id,
+ pool);
+}
+
+/* This implements the fs_library_vtable_t.recover() API. */
+svn_error_t *
+svn_fs_fs__recover(svn_fs_t *fs,
+ svn_cancel_func_t cancel_func, void *cancel_baton,
+ apr_pool_t *pool)
+{
+ struct recover_baton b;
+
+ /* We have no way to take out an exclusive lock in FSFS, so we're
+ restricted as to the types of recovery we can do. Luckily,
+ we just want to recreate the 'current' file, and we can do that just
+ by blocking other writers. */
+ b.fs = fs;
+ b.cancel_func = cancel_func;
+ b.cancel_baton = cancel_baton;
+ return svn_fs_fs__with_all_locks(fs, recover_body, &b, pool);
+}