diff options
Diffstat (limited to 'subversion/libsvn_fs_fs/recovery.c')
-rw-r--r-- | subversion/libsvn_fs_fs/recovery.c | 509 |
1 files changed, 509 insertions, 0 deletions
diff --git a/subversion/libsvn_fs_fs/recovery.c b/subversion/libsvn_fs_fs/recovery.c new file mode 100644 index 0000000..125d47a --- /dev/null +++ b/subversion/libsvn_fs_fs/recovery.c @@ -0,0 +1,509 @@ +/* recovery.c --- FSFS recovery functionality +* + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + +#include "recovery.h" + +#include "svn_hash.h" +#include "svn_pools.h" +#include "private/svn_string_private.h" + +#include "index.h" +#include "low_level.h" +#include "rep-cache.h" +#include "revprops.h" +#include "util.h" +#include "cached_data.h" + +#include "../libsvn_fs/fs-loader.h" + +#include "svn_private_config.h" + +/* Part of the recovery procedure. Return the largest revision *REV in + filesystem FS. Use POOL for temporary allocation. */ +static svn_error_t * +recover_get_largest_revision(svn_fs_t *fs, svn_revnum_t *rev, apr_pool_t *pool) +{ + /* Discovering the largest revision in the filesystem would be an + expensive operation if we did a readdir() or searched linearly, + so we'll do a form of binary search. left is a revision that we + know exists, right a revision that we know does not exist. */ + apr_pool_t *iterpool; + svn_revnum_t left, right = 1; + + iterpool = svn_pool_create(pool); + /* Keep doubling right, until we find a revision that doesn't exist. */ + while (1) + { + svn_error_t *err; + svn_fs_fs__revision_file_t *file; + svn_pool_clear(iterpool); + + err = svn_fs_fs__open_pack_or_rev_file(&file, fs, right, iterpool, + iterpool); + if (err && err->apr_err == SVN_ERR_FS_NO_SUCH_REVISION) + { + svn_error_clear(err); + break; + } + else + SVN_ERR(err); + + right <<= 1; + } + + left = right >> 1; + + /* We know that left exists and right doesn't. Do a normal bsearch to find + the last revision. */ + while (left + 1 < right) + { + svn_revnum_t probe = left + ((right - left) / 2); + svn_error_t *err; + svn_fs_fs__revision_file_t *file; + svn_pool_clear(iterpool); + + err = svn_fs_fs__open_pack_or_rev_file(&file, fs, probe, iterpool, + iterpool); + if (err && err->apr_err == SVN_ERR_FS_NO_SUCH_REVISION) + { + svn_error_clear(err); + right = probe; + } + else + { + SVN_ERR(err); + left = probe; + } + } + + svn_pool_destroy(iterpool); + + /* left is now the largest revision that exists. */ + *rev = left; + return SVN_NO_ERROR; +} + +/* A baton for reading a fixed amount from an open file. For + recover_find_max_ids() below. */ +struct recover_read_from_file_baton +{ + svn_stream_t *stream; + apr_pool_t *pool; + apr_off_t remaining; +}; + +/* A stream read handler used by recover_find_max_ids() below. + Read and return at most BATON->REMAINING bytes from the stream, + returning nothing after that to indicate EOF. */ +static svn_error_t * +read_handler_recover(void *baton, char *buffer, apr_size_t *len) +{ + struct recover_read_from_file_baton *b = baton; + apr_size_t bytes_to_read = *len; + + if (b->remaining == 0) + { + /* Return a successful read of zero bytes to signal EOF. */ + *len = 0; + return SVN_NO_ERROR; + } + + if ((apr_int64_t)bytes_to_read > (apr_int64_t)b->remaining) + bytes_to_read = (apr_size_t)b->remaining; + b->remaining -= bytes_to_read; + + return svn_stream_read_full(b->stream, buffer, &bytes_to_read); +} + +/* Part of the recovery procedure. Read the directory noderev at offset + OFFSET of file REV_FILE (the revision file of revision REV of + filesystem FS), and set MAX_NODE_ID and MAX_COPY_ID to be the node-id + and copy-id of that node, if greater than the current value stored + in either. Recurse into any child directories that were modified in + this revision. + + MAX_NODE_ID and MAX_COPY_ID must be arrays of at least MAX_KEY_SIZE. + + Perform temporary allocation in POOL. */ +static svn_error_t * +recover_find_max_ids(svn_fs_t *fs, + svn_revnum_t rev, + svn_fs_fs__revision_file_t *rev_file, + apr_off_t offset, + apr_uint64_t *max_node_id, + apr_uint64_t *max_copy_id, + apr_pool_t *pool) +{ + svn_fs_fs__rep_header_t *header; + struct recover_read_from_file_baton baton; + svn_stream_t *stream; + apr_hash_t *entries; + apr_hash_index_t *hi; + apr_pool_t *iterpool; + node_revision_t *noderev; + svn_error_t *err; + + baton.stream = rev_file->stream; + SVN_ERR(svn_io_file_seek(rev_file->file, APR_SET, &offset, pool)); + SVN_ERR(svn_fs_fs__read_noderev(&noderev, baton.stream, pool, pool)); + + /* Check that this is a directory. It should be. */ + if (noderev->kind != svn_node_dir) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Recovery encountered a non-directory node")); + + /* Get the data location. No data location indicates an empty directory. */ + if (!noderev->data_rep) + return SVN_NO_ERROR; + + /* If the directory's data representation wasn't changed in this revision, + we've already scanned the directory's contents for noderevs, so we don't + need to again. This will occur if a property is changed on a directory + without changing the directory's contents. */ + if (noderev->data_rep->revision != rev) + return SVN_NO_ERROR; + + /* We could use get_dir_contents(), but this is much cheaper. It does + rely on directory entries being stored as PLAIN reps, though. */ + SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rev_file, rev, NULL, + noderev->data_rep->item_index, pool)); + SVN_ERR(svn_io_file_seek(rev_file->file, APR_SET, &offset, pool)); + SVN_ERR(svn_fs_fs__read_rep_header(&header, baton.stream, pool, pool)); + if (header->type != svn_fs_fs__rep_plain) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Recovery encountered a deltified directory " + "representation")); + + /* Now create a stream that's allowed to read only as much data as is + stored in the representation. Note that this is a directory, i.e. + represented using the hash format on disk and can never have 0 length. */ + baton.pool = pool; + baton.remaining = noderev->data_rep->expanded_size + ? noderev->data_rep->expanded_size + : noderev->data_rep->size; + stream = svn_stream_create(&baton, pool); + svn_stream_set_read2(stream, NULL /* only full read support */, + read_handler_recover); + + /* Now read the entries from that stream. */ + entries = apr_hash_make(pool); + err = svn_hash_read2(entries, stream, SVN_HASH_TERMINATOR, pool); + if (err) + { + svn_string_t *id_str = svn_fs_fs__id_unparse(noderev->id, pool); + + svn_error_clear(svn_stream_close(stream)); + return svn_error_quick_wrapf(err, + _("malformed representation for node-revision '%s'"), + id_str->data); + } + SVN_ERR(svn_stream_close(stream)); + + /* Now check each of the entries in our directory to find new node and + copy ids, and recurse into new subdirectories. */ + iterpool = svn_pool_create(pool); + for (hi = apr_hash_first(pool, entries); hi; hi = apr_hash_next(hi)) + { + char *str_val; + char *str; + svn_node_kind_t kind; + const svn_fs_id_t *id; + const svn_fs_fs__id_part_t *rev_item; + apr_uint64_t node_id, copy_id; + apr_off_t child_dir_offset; + const svn_string_t *path = apr_hash_this_val(hi); + + svn_pool_clear(iterpool); + + str_val = apr_pstrdup(iterpool, path->data); + + str = svn_cstring_tokenize(" ", &str_val); + if (str == NULL) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Directory entry corrupt")); + + if (strcmp(str, SVN_FS_FS__KIND_FILE) == 0) + kind = svn_node_file; + else if (strcmp(str, SVN_FS_FS__KIND_DIR) == 0) + kind = svn_node_dir; + else + { + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Directory entry corrupt")); + } + + str = svn_cstring_tokenize(" ", &str_val); + if (str == NULL) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Directory entry corrupt")); + + SVN_ERR(svn_fs_fs__id_parse(&id, str, iterpool)); + + rev_item = svn_fs_fs__id_rev_item(id); + if (rev_item->revision != rev) + { + /* If the node wasn't modified in this revision, we've already + checked the node and copy id. */ + continue; + } + + node_id = svn_fs_fs__id_node_id(id)->number; + copy_id = svn_fs_fs__id_copy_id(id)->number; + + if (node_id > *max_node_id) + *max_node_id = node_id; + if (copy_id > *max_copy_id) + *max_copy_id = copy_id; + + if (kind == svn_node_file) + continue; + + SVN_ERR(svn_fs_fs__item_offset(&child_dir_offset, fs, + rev_file, rev, NULL, rev_item->number, + iterpool)); + SVN_ERR(recover_find_max_ids(fs, rev, rev_file, child_dir_offset, + max_node_id, max_copy_id, iterpool)); + } + svn_pool_destroy(iterpool); + + return SVN_NO_ERROR; +} + +/* Part of the recovery procedure. Given an open non-packed revision file + REV_FILE for REV, locate the trailer that specifies the offset to the root + node-id and store this offset in *ROOT_OFFSET. Do temporary allocations in + POOL. */ +static svn_error_t * +recover_get_root_offset(apr_off_t *root_offset, + svn_revnum_t rev, + svn_fs_fs__revision_file_t *rev_file, + apr_pool_t *pool) +{ + char buffer[64]; + svn_stringbuf_t *trailer; + apr_off_t start; + apr_off_t end; + apr_size_t len; + + SVN_ERR_ASSERT(!rev_file->is_packed); + + /* We will assume that the last line containing the two offsets (to the root + node-id and to the changed path information) will never be longer than 64 + characters. */ + end = 0; + SVN_ERR(svn_io_file_seek(rev_file->file, APR_END, &end, pool)); + + if (end < sizeof(buffer)) + { + len = (apr_size_t)end; + start = 0; + } + else + { + len = sizeof(buffer); + start = end - sizeof(buffer); + } + + SVN_ERR(svn_io_file_seek(rev_file->file, APR_SET, &start, pool)); + SVN_ERR(svn_io_file_read_full2(rev_file->file, buffer, len, + NULL, NULL, pool)); + + trailer = svn_stringbuf_ncreate(buffer, len, pool); + SVN_ERR(svn_fs_fs__parse_revision_trailer(root_offset, NULL, trailer, rev)); + + return SVN_NO_ERROR; +} + +/* Baton used for recover_body below. */ +struct recover_baton { + svn_fs_t *fs; + svn_cancel_func_t cancel_func; + void *cancel_baton; +}; + +/* The work-horse for svn_fs_fs__recover, called with the FS + write lock. This implements the svn_fs_fs__with_write_lock() + 'body' callback type. BATON is a 'struct recover_baton *'. */ +static svn_error_t * +recover_body(void *baton, apr_pool_t *pool) +{ + struct recover_baton *b = baton; + svn_fs_t *fs = b->fs; + fs_fs_data_t *ffd = fs->fsap_data; + svn_revnum_t max_rev; + apr_uint64_t next_node_id = 0; + apr_uint64_t next_copy_id = 0; + svn_revnum_t youngest_rev; + svn_node_kind_t youngest_revprops_kind; + + /* The admin may have created a plain copy of this repo before attempting + to recover it (hotcopy may or may not work with corrupted repos). + Bump the instance ID. */ + SVN_ERR(svn_fs_fs__set_uuid(fs, fs->uuid, NULL, pool)); + + /* We need to know the largest revision in the filesystem. */ + SVN_ERR(recover_get_largest_revision(fs, &max_rev, pool)); + + /* Get the expected youngest revision */ + SVN_ERR(svn_fs_fs__youngest_rev(&youngest_rev, fs, pool)); + + /* Policy note: + + Since the revprops file is written after the revs file, the true + maximum available revision is the youngest one for which both are + present. That's probably the same as the max_rev we just found, + but if it's not, we could, in theory, repeatedly decrement + max_rev until we find a revision that has both a revs and + revprops file, then write db/current with that. + + But we choose not to. If a repository is so corrupt that it's + missing at least one revprops file, we shouldn't assume that the + youngest revision for which both the revs and revprops files are + present is healthy. In other words, we're willing to recover + from a missing or out-of-date db/current file, because db/current + is truly redundant -- it's basically a cache so we don't have to + find max_rev each time, albeit a cache with unusual semantics, + since it also officially defines when a revision goes live. But + if we're missing more than the cache, it's time to back out and + let the admin reconstruct things by hand: correctness at that + point may depend on external things like checking a commit email + list, looking in particular working copies, etc. + + This policy matches well with a typical naive backup scenario. + Say you're rsyncing your FSFS repository nightly to the same + location. Once revs and revprops are written, you've got the + maximum rev; if the backup should bomb before db/current is + written, then db/current could stay arbitrarily out-of-date, but + we can still recover. It's a small window, but we might as well + do what we can. */ + + /* Even if db/current were missing, it would be created with 0 by + get_youngest(), so this conditional remains valid. */ + if (youngest_rev > max_rev) + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Expected current rev to be <= %ld " + "but found %ld"), max_rev, youngest_rev); + + /* We only need to search for maximum IDs for old FS formats which + se global ID counters. */ + if (ffd->format < SVN_FS_FS__MIN_NO_GLOBAL_IDS_FORMAT) + { + /* Next we need to find the maximum node id and copy id in use across the + filesystem. Unfortunately, the only way we can get this information + is to scan all the noderevs of all the revisions and keep track as + we go along. */ + svn_revnum_t rev; + apr_pool_t *iterpool = svn_pool_create(pool); + + for (rev = 0; rev <= max_rev; rev++) + { + svn_fs_fs__revision_file_t *rev_file; + apr_off_t root_offset; + + svn_pool_clear(iterpool); + + if (b->cancel_func) + SVN_ERR(b->cancel_func(b->cancel_baton)); + + SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, rev, pool, + iterpool)); + SVN_ERR(recover_get_root_offset(&root_offset, rev, rev_file, pool)); + SVN_ERR(recover_find_max_ids(fs, rev, rev_file, root_offset, + &next_node_id, &next_copy_id, pool)); + SVN_ERR(svn_fs_fs__close_revision_file(rev_file)); + } + svn_pool_destroy(iterpool); + + /* Now that we finally have the maximum revision, node-id and copy-id, we + can bump the two ids to get the next of each. */ + next_node_id++; + next_copy_id++; + } + + /* Before setting current, verify that there is a revprops file + for the youngest revision. (Issue #2992) */ + SVN_ERR(svn_io_check_path(svn_fs_fs__path_revprops(fs, max_rev, pool), + &youngest_revprops_kind, pool)); + if (youngest_revprops_kind == svn_node_none) + { + svn_boolean_t missing = TRUE; + if (!svn_fs_fs__packed_revprop_available(&missing, fs, max_rev, pool)) + { + if (missing) + { + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Revision %ld has a revs file but no " + "revprops file"), + max_rev); + } + else + { + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Revision %ld has a revs file but the " + "revprops file is inaccessible"), + max_rev); + } + } + } + else if (youngest_revprops_kind != svn_node_file) + { + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Revision %ld has a non-file where its " + "revprops file should be"), + max_rev); + } + + /* Prune younger-than-(newfound-youngest) revisions from the rep + cache if sharing is enabled taking care not to create the cache + if it does not exist. */ + if (ffd->rep_sharing_allowed) + { + svn_boolean_t rep_cache_exists; + + SVN_ERR(svn_fs_fs__exists_rep_cache(&rep_cache_exists, fs, pool)); + if (rep_cache_exists) + SVN_ERR(svn_fs_fs__del_rep_reference(fs, max_rev, pool)); + } + + /* Now store the discovered youngest revision, and the next IDs if + relevant, in a new 'current' file. */ + return svn_fs_fs__write_current(fs, max_rev, next_node_id, next_copy_id, + pool); +} + +/* This implements the fs_library_vtable_t.recover() API. */ +svn_error_t * +svn_fs_fs__recover(svn_fs_t *fs, + svn_cancel_func_t cancel_func, void *cancel_baton, + apr_pool_t *pool) +{ + struct recover_baton b; + + /* We have no way to take out an exclusive lock in FSFS, so we're + restricted as to the types of recovery we can do. Luckily, + we just want to recreate the 'current' file, and we can do that just + by blocking other writers. */ + b.fs = fs; + b.cancel_func = cancel_func; + b.cancel_baton = cancel_baton; + return svn_fs_fs__with_all_locks(fs, recover_body, &b, pool); +} |