diff options
author | Lorry <lorry@roadtrain.codethink.co.uk> | 2012-08-22 14:29:52 +0100 |
---|---|---|
committer | Lorry <lorry@roadtrain.codethink.co.uk> | 2012-08-22 14:29:52 +0100 |
commit | f1bdf13786f0752c0846cf36f0d91e4fc6747929 (patch) | |
tree | 4223b2035bf2240d681a53822808b3c7f687b905 /tools/server-side | |
download | subversion-tarball-f1bdf13786f0752c0846cf36f0d91e4fc6747929.tar.gz |
Tarball conversion
Diffstat (limited to 'tools/server-side')
-rwxr-xr-x | tools/server-side/fsfs-reshard.py | 399 | ||||
-rw-r--r-- | tools/server-side/mod_dontdothat/README | 53 | ||||
-rw-r--r-- | tools/server-side/mod_dontdothat/mod_dontdothat.c | 661 | ||||
-rwxr-xr-x | tools/server-side/svn-backup-dumps.py | 692 | ||||
-rw-r--r-- | tools/server-side/svn-populate-node-origins-index.c | 193 | ||||
-rw-r--r-- | tools/server-side/svn-rep-sharing-stats.c | 535 | ||||
-rwxr-xr-x | tools/server-side/svn_server_log_parse.py | 460 | ||||
-rw-r--r-- | tools/server-side/svnauthz-validate.c | 76 | ||||
-rwxr-xr-x | tools/server-side/test_svn_server_log_parse.py | 611 |
9 files changed, 3680 insertions, 0 deletions
diff --git a/tools/server-side/fsfs-reshard.py b/tools/server-side/fsfs-reshard.py new file mode 100755 index 0000000..d039885 --- /dev/null +++ b/tools/server-side/fsfs-reshard.py @@ -0,0 +1,399 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# fsfs-reshard.py REPOS_PATH MAX_FILES_PER_SHARD +# +# Perform an offline conversion of an FSFS repository between linear (format +# 2, usable by Subversion 1.4+) and sharded (format 3, usable by Subversion +# 1.5+) layouts. +# +# The MAX_FILES_PER_SHARD argument specifies the maximum number of files +# that will be stored in each shard (directory), or zero to specify a linear +# layout. Subversion 1.5 uses a default value of 1000 files per shard. +# +# As the repository will not be valid while the conversion is in progress, +# the repository administrator must ensure that access to the repository is +# blocked for the duration of the conversion. +# +# In the event that the conversion is interrupted, the repository will be in +# an inconsistent state. The repository administrator should then re-run +# this tool to completion. +# +# +# Note that, currently, resharding from one sharded layout to another is +# likely to be an extremely slow process. To reshard, we convert from a +# sharded to linear layout and then to the new sharded layout. The problem +# is that the initial conversion to the linear layout triggers exactly the +# same 'large number of files in a directory' problem that sharding is +# intended to solve. +# +# ==================================================================== +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ==================================================================== +# +# $HeadURL: http://svn.apache.org/repos/asf/subversion/branches/1.7.x/tools/server-side/fsfs-reshard.py $ +# $LastChangedDate: 2009-11-16 19:07:17 +0000 (Mon, 16 Nov 2009) $ +# $LastChangedBy: hwright $ +# $LastChangedRevision: 880911 $ + +import os, stat, sys + +from errno import EEXIST + +def usage(): + """Print a usage message and exit.""" + print("""usage: %s REPOS_PATH MAX_FILES_PER_SHARD [START END] + +Perform an offline conversion of an FSFS repository between linear +(readable by Subversion 1.4 or later) and sharded (readable by +Subversion 1.5 or later) layouts. + +The MAX_FILES_PER_SHARD argument specifies the maximum number of +files that will be stored in each shard (directory), or zero to +specify a linear layout. Subversion 1.5 uses a default value of +1000 files per shard. + +Convert revisions START through END inclusive if specified, or all +revisions if unspecified. +""" % sys.argv[0]) + sys.exit(1) + +def incompatible_repos_format(repos_path, format): + """Print an error saying that REPOS_PATH is a repository with an + incompatible repository format FORMAT, then exit.""" + sys.stderr.write("""error: unable to convert repository '%s'. + +This repository is not compatible with this tool. Valid +repository formats are '3' or '5'; this repository is +format '%s'. + +""" % (repos_path, format)) + sys.stderr.flush() + sys.exit(1) + +def incompatible_fs_format(repos_path, format): + """Print an error saying that REPOS_PATH is a repository with an + incompatible filesystem format FORMAT, then exit.""" + sys.stderr.write("""error: unable to convert repository '%s'. + +This repository contains a filesystem that is not compatible with +this tool. Valid filesystem formats are '1', '2', or '3'; this +repository contains a filesystem with format '%s'. + +""" % (repos_path, format)) + sys.stderr.flush() + sys.exit(1) + +def unexpected_fs_format_options(repos_path): + """Print an error saying that REPOS_PATH is a repository with + unexpected filesystem format options, then exit.""" + sys.stderr.write("""error: unable to convert repository '%s'. + +This repository contains a filesystem that appears to be invalid - +there is unexpected data after the filesystem format number. + +""" % repos_path) + sys.stderr.flush() + sys.exit(1) + +def incompatible_fs_format_option(repos_path, option): + """Print an error saying that REPOS_PATH is a repository with an + incompatible filesystem format option OPTION, then exit.""" + sys.stderr.write("""error: unable to convert repository '%s'. + +This repository contains a filesystem that is not compatible with +this tool. This tool recognises the 'layout' option but the +filesystem uses the '%s' option. + +""" % (repos_path, option)) + sys.stderr.flush() + sys.exit(1) + +def warn_about_fs_format_1(repos_path, format_path): + """Print a warning saying that REPOS_PATH contains a format 1 FSFS + filesystem that we can't reconstruct, then exit.""" + sys.stderr.write("""warning: conversion of '%s' will be one-way. + +This repository is currently readable by Subversion 1.1 or later. +This tool can convert this repository to one that is readable by +either Subversion 1.4 (or later) or Subversion 1.5 (or later), +but it is not able to convert it back to the original format - a +separate dump/load step would be required. + +If you would like to upgrade this repository anyway, delete the +file '%s' and re-run this tool. + +""" % (repos_path, format_path)) + sys.stderr.flush() + sys.exit(1) + +def check_repos_format(repos_path): + """Check that REPOS_PATH contains a repository with a suitable format; + print a message and exit if not.""" + format_path = os.path.join(repos_path, 'format') + try: + format_file = open(format_path) + format = format_file.readline() + if not format.endswith('\n'): + incompatible_repos_format(repos_path, format + ' <missing newline>') + format = format.rstrip('\n') + if format == '3' or format == '5': + pass + else: + incompatible_repos_format(repos_path, format) + except IOError: + # In all likelihood, the file doesn't exist. + incompatible_repos_format(repos_path, '<unreadable>') + +def check_fs_format(repos_path): + """Check that REPOS_PATH contains a filesystem with a suitable format, + or that it contains no format file; print a message and exit if neither + is true. Return bool whether the filesystem is sharded.""" + sharded = False + db_path = os.path.join(repos_path, 'db') + format_path = os.path.join(db_path, 'format') + try: + format_file = open(format_path) + format = format_file.readline() + if not format.endswith('\n'): + incompatible_fs_format(repos_path, format + ' <missing newline>') + format = format.rstrip('\n') + if format == '1': + # This is a format 1 (svndiff0 only) filesystem. We can upgrade it, + # but we can't downgrade again (since we can't uncompress any of the + # svndiff1 deltas that may have been written). Warn the user and exit. + warn_about_fs_format_1(repos_path, format_path) + if format == '2': + pass + elif format == '3': + pass + else: + incompatible_fs_format(repos_path, format) + + for line in format_file: + if format == '2': + unexpected_fs_format_options(repos_path) + + line = line.rstrip('\n') + if line == 'layout linear': + pass + elif line.startswith('layout sharded '): + sharded = True + else: + incompatible_fs_format_option(repos_path, line) + + format_file.close() + except IOError: + # The format file might not exist if we've previously been interrupted, + # or if the user is following our advice about upgrading a format 1 + # repository. In both cases, we'll just assume the format was + # compatible. + pass + + return sharded + +def current_file(repos_path): + """Return triple of (revision, next_node_id, next_copy_id) from + REPOS_PATH/db/current .""" + return open(os.path.join(repos_path, 'db', 'current')).readline().split() + +def remove_fs_format(repos_path): + """Remove the filesystem format file for repository REPOS_PATH. + Do not raise an error if the file is already missing.""" + format_path = os.path.join(repos_path, 'db', 'format') + try: + statinfo = os.stat(format_path) + except OSError: + # The file probably doesn't exist. + return + + # On Windows, we need to ensure the file is writable before we can + # remove it. + os.chmod(format_path, statinfo.st_mode | stat.S_IWUSR) + os.remove(format_path) + +def write_fs_format(repos_path, contents): + """Write a new filesystem format file for repository REPOS_PATH containing + CONTENTS.""" + format_path = os.path.join(repos_path, 'db', 'format') + f = open(format_path, 'wb') + f.write(contents) + f.close() + os.chmod(format_path, stat.S_IRUSR | stat.S_IRGRP) + +def linearise(path): + """Move all the files in subdirectories of PATH into PATH, and remove the + subdirectories. Handle conflicts between subdirectory names and files + contained in subdirectories by ensuring subdirectories have a '.shard' + suffix prior to moving (the files are assumed not to have this suffix. + Abort if a subdirectory is found to contain another subdirectory.""" + # First enumerate all subdirectories of DIR and rename where necessary + # to include a .shard suffix. + for name in os.listdir(path): + if name.endswith('.shard'): + continue + subdir_path = os.path.join(path, name) + if not os.path.isdir(subdir_path): + continue + os.rename(subdir_path, subdir_path + '.shard') + + # Now move all the subdirectory contents into the parent and remove + # the subdirectories. + for root_path, dirnames, filenames in os.walk(path): + if root_path == path: + continue + if len(dirnames) > 0: + sys.stderr.write("error: directory '%s' contains other unexpected directories.\n" \ + % root_path) + sys.stderr.flush() + sys.exit(1) + for name in filenames: + from_path = os.path.join(root_path, name) + to_path = os.path.join(path, name) + os.rename(from_path, to_path) + os.rmdir(root_path) + +def shard(path, max_files_per_shard, start, end): + """Move the files for revisions START to END inclusive in PATH into + subdirectories of PATH named such that subdirectory '0' contains at most + MAX_FILES_PER_SHARD files, those named [0, MAX_FILES_PER_SHARD). Abort if + PATH is found to contain any entries with non-numeric names.""" + + tmp = path + '.reshard' + try: + os.mkdir(tmp) + except OSError, e: + if e.errno != EEXIST: + raise + + # Move all entries into shards named N.shard. + for rev in range(start, end + 1): + name = str(rev) + shard = rev // max_files_per_shard + shard_name = str(shard) + '.shard' + + from_path = os.path.join(path, name) + to_path = os.path.join(tmp, shard_name, name) + try: + os.rename(from_path, to_path) + except OSError: + # The most likely explanation is that the shard directory doesn't + # exist. Let's create it and retry the rename. + os.mkdir(os.path.join(tmp, shard_name)) + os.rename(from_path, to_path) + + # Now rename all the shards to remove the suffix. + skipped = 0 + for name in os.listdir(tmp): + if not name.endswith('.shard'): + sys.stderr.write("warning: ignoring unexpected subdirectory '%s'.\n" \ + % os.path.join(tmp, name)) + sys.stderr.flush() + skipped += 1 + continue + from_path = os.path.join(tmp, name) + to_path = os.path.join(path, os.path.basename(from_path)[:-6]) + os.rename(from_path, to_path) + skipped == 0 and os.rmdir(tmp) + +def main(): + if len(sys.argv) < 3: + usage() + + repos_path = sys.argv[1] + max_files_per_shard = sys.argv[2] + try: + start = int(sys.argv[3]) + end = int(sys.argv[4]) + except IndexError: + start = 0 + end = int(current_file(repos_path)[0]) + + # Validate the command-line arguments. + db_path = os.path.join(repos_path, 'db') + current_path = os.path.join(db_path, 'current') + if not os.path.exists(current_path): + sys.stderr.write("error: '%s' doesn't appear to be a Subversion FSFS repository.\n" \ + % repos_path) + sys.stderr.flush() + sys.exit(1) + + try: + max_files_per_shard = int(max_files_per_shard) + except ValueError, OverflowError: + sys.stderr.write("error: maximum files per shard ('%s') is not a valid number.\n" \ + % max_files_per_shard) + sys.stderr.flush() + sys.exit(1) + + if max_files_per_shard < 0: + sys.stderr.write("error: maximum files per shard ('%d') must not be negative.\n" \ + % max_files_per_shard) + sys.stderr.flush() + sys.exit(1) + + # Check the format of the repository. + check_repos_format(repos_path) + sharded = check_fs_format(repos_path) + + # Let the user know what's going on. + if max_files_per_shard > 0: + print("Converting '%s' to a sharded structure with %d files per directory" \ + % (repos_path, max_files_per_shard)) + if sharded: + print('(will convert to a linear structure first)') + else: + print("Converting '%s' to a linear structure" % repos_path) + + # Prevent access to the repository for the duration of the conversion. + # There's no clean way to do this, but since the format of the repository + # is indeterminate, let's remove the format file while we're converting. + print('- marking the repository as invalid') + remove_fs_format(repos_path) + + # First, convert to a linear scheme (this makes recovery easier because + # it's easier to reason about the behaviour on restart). + if sharded: + print('- linearising db/revs') + linearise(os.path.join(repos_path, 'db', 'revs')) + print('- linearising db/revprops') + linearise(os.path.join(repos_path, 'db', 'revprops')) + + if max_files_per_shard == 0: + # We're done. Stamp the filesystem with a format 2 db/format file. + print('- marking the repository as a valid linear repository') + write_fs_format(repos_path, '2\n') + else: + print('- sharding db/revs') + shard(os.path.join(repos_path, 'db', 'revs'), max_files_per_shard, + start, end) + print('- sharding db/revprops') + shard(os.path.join(repos_path, 'db', 'revprops'), max_files_per_shard, + start, end) + + # We're done. Stamp the filesystem with a format 3 db/format file. + print('- marking the repository as a valid sharded repository') + write_fs_format(repos_path, '3\nlayout sharded %d\n' % max_files_per_shard) + + print('- done.') + sys.exit(0) + +if __name__ == '__main__': + raise Exception("""This script is unfinished and not ready to be used on live data. + Trust us.""") + main() diff --git a/tools/server-side/mod_dontdothat/README b/tools/server-side/mod_dontdothat/README new file mode 100644 index 0000000..7d4fe36 --- /dev/null +++ b/tools/server-side/mod_dontdothat/README @@ -0,0 +1,53 @@ +mod_dontdothat is an Apache module that allows you to block specific types +of Subversion requests. Specifically, it's designed to keep users from doing +things that are particularly hard on the server, like checking out the root +of the tree, or the tags or branches directories. It works by sticking an +input filter in front of all REPORT requests and looking for dangerous types +of requests. If it finds any, it returns a 403 Forbidden error. + +You can compile and install it via apxs: + +$ apxs -c \ + -I$PREFIX/include/subversion-1 \ + -L$PREFIX/lib -lsvn_subr-1 + mod_dontdothat.c + +$ apxs -i -n dontdothat mod_dontdothat.la + +It is enabled via single httpd.conf directive, DontDoThatConfigFile: + +<Location /svn> + DAV svn + SVNParentPath /path/to/repositories + DontDoThatConfigFile /path/to/config.file + DontDoThatDisallowReplay off +</Location> + +The file you give to DontDoThatConfigFile is a Subversion configuration file +that contains the following sections. + +[recursive-actions] +/*/trunk = allow +/ = deny +/* = deny +/*/tags = deny +/*/branches = deny +/*/* = deny +/*/*/tags = deny +/*/*/branches = deny + +As you might guess, this defines a set of patterns that control what the +user is not allowed to do. Anything with a 'deny' after it is denied, and +as a fallback mechanism anything with an 'allow' after it is special cased +to be allowed, even if it matches something that is denied. + +Note that the wildcard portions of a rule only swallow a single directory, +so /* will match /foo, but not /foo/bar. They also must be at the end of +a directory segment, so /foo* or /* are valid, but /*foo is not. + +These rules are applied to any recursive action, which basically means any +Subversion command that goes through the update-report, like update, diff, +checkout, merge, etc. + +The DontDoThatDisallowReplay option makes mod_dontdothat disallow +replay requests, which is on by default. diff --git a/tools/server-side/mod_dontdothat/mod_dontdothat.c b/tools/server-side/mod_dontdothat/mod_dontdothat.c new file mode 100644 index 0000000..c7c6613 --- /dev/null +++ b/tools/server-side/mod_dontdothat/mod_dontdothat.c @@ -0,0 +1,661 @@ +/* + * mod_dontdothat.c: an Apache filter that allows you to return arbitrary + * errors for various types of Subversion requests. + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + +#include <httpd.h> +#include <http_config.h> +#include <http_protocol.h> +#include <http_request.h> +#include <http_log.h> +#include <util_filter.h> +#include <ap_config.h> +#include <apr_strings.h> + +#include <expat.h> + +#include "mod_dav_svn.h" +#include "svn_string.h" +#include "svn_config.h" + +module AP_MODULE_DECLARE_DATA dontdothat_module; + +typedef struct dontdothat_config_rec { + const char *config_file; + const char *base_path; + int no_replay; +} dontdothat_config_rec; + +static void *create_dontdothat_dir_config(apr_pool_t *pool, char *dir) +{ + dontdothat_config_rec *cfg = apr_pcalloc(pool, sizeof(*cfg)); + + cfg->base_path = dir; + cfg->no_replay = 1; + + return cfg; +} + +static const command_rec dontdothat_cmds[] = +{ + AP_INIT_TAKE1("DontDoThatConfigFile", ap_set_file_slot, + (void *) APR_OFFSETOF(dontdothat_config_rec, config_file), + OR_ALL, + "Text file containing actions to take for specific requests"), + AP_INIT_FLAG("DontDoThatDisallowReplay", ap_set_flag_slot, + (void *) APR_OFFSETOF(dontdothat_config_rec, no_replay), + OR_ALL, "Disallow replay requests as if they are other recursive requests."), + { NULL } +}; + +typedef enum parse_state_t { + STATE_BEGINNING, + STATE_IN_UPDATE, + STATE_IN_SRC_PATH, + STATE_IN_DST_PATH, + STATE_IN_RECURSIVE +} parse_state_t; + +typedef struct dontdothat_filter_ctx { + /* Set to TRUE when we determine that the request is safe and should be + * allowed to continue. */ + svn_boolean_t let_it_go; + + /* Set to TRUE when we determine that the request is unsafe and should be + * stopped in its tracks. */ + svn_boolean_t no_soup_for_you; + + XML_Parser xmlp; + + /* The current location in the REPORT body. */ + parse_state_t state; + + /* A buffer to hold CDATA we encounter. */ + svn_stringbuf_t *buffer; + + dontdothat_config_rec *cfg; + + /* An array of wildcards that are special cased to be allowed. */ + apr_array_header_t *allow_recursive_ops; + + /* An array of wildcards where recursive operations are not allowed. */ + apr_array_header_t *no_recursive_ops; + + /* TRUE if a path has failed a test already. */ + svn_boolean_t path_failed; + + /* An error for when we're using this as a baton while parsing config + * files. */ + svn_error_t *err; + + /* The current request. */ + request_rec *r; +} dontdothat_filter_ctx; + +/* Return TRUE if wildcard WC matches path P, FALSE otherwise. */ +static svn_boolean_t +matches(const char *wc, const char *p) +{ + for (;;) + { + switch (*wc) + { + case '*': + if (wc[1] != '/' && wc[1] != '\0') + abort(); /* This was checked for during parsing of the config. */ + + /* It's a wild card, so eat up until the next / in p. */ + while (*p && p[1] != '/') + ++p; + + /* If we ran out of p and we're out of wc then it matched. */ + if (! *p) + { + if (wc[1] == '\0') + return TRUE; + else + return FALSE; + } + break; + + case '\0': + if (*p != '\0') + /* This means we hit the end of wc without running out of p. */ + return FALSE; + else + /* Or they were exactly the same length, so it's not lower. */ + return TRUE; + + default: + if (*wc != *p) + return FALSE; /* If we don't match, then move on to the next + * case. */ + else + break; + } + + ++wc; + ++p; + + if (! *p && *wc) + return FALSE; + } +} + +static svn_boolean_t +is_this_legal(dontdothat_filter_ctx *ctx, const char *uri) +{ + const char *relative_path; + const char *cleaned_uri; + const char *repos_name; + int trailing_slash; + dav_error *derr; + + /* Ok, so we need to skip past the scheme, host, etc. */ + uri = ap_strstr_c(uri, "://"); + if (uri) + uri = ap_strchr_c(uri + 3, '/'); + + if (uri) + { + const char *repos_path; + + derr = dav_svn_split_uri(ctx->r, + uri, + ctx->cfg->base_path, + &cleaned_uri, + &trailing_slash, + &repos_name, + &relative_path, + &repos_path); + if (! derr) + { + int idx; + + if (! repos_path) + repos_path = ""; + + repos_path = apr_psprintf(ctx->r->pool, "/%s", repos_path); + + /* First check the special cases that are always legal... */ + for (idx = 0; idx < ctx->allow_recursive_ops->nelts; ++idx) + { + const char *wc = APR_ARRAY_IDX(ctx->allow_recursive_ops, + idx, + const char *); + + if (matches(wc, repos_path)) + { + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->r, + "mod_dontdothat: rule %s allows %s", + wc, repos_path); + return TRUE; + } + } + + /* Then look for stuff we explicitly don't allow. */ + for (idx = 0; idx < ctx->no_recursive_ops->nelts; ++idx) + { + const char *wc = APR_ARRAY_IDX(ctx->no_recursive_ops, + idx, + const char *); + + if (matches(wc, repos_path)) + { + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->r, + "mod_dontdothat: rule %s forbids %s", + wc, repos_path); + return FALSE; + } + } + } + } + + return TRUE; +} + +static apr_status_t +dontdothat_filter(ap_filter_t *f, + apr_bucket_brigade *bb, + ap_input_mode_t mode, + apr_read_type_e block, + apr_off_t readbytes) +{ + dontdothat_filter_ctx *ctx = f->ctx; + apr_status_t rv; + apr_bucket *e; + + if (mode != AP_MODE_READBYTES) + return ap_get_brigade(f->next, bb, mode, block, readbytes); + + rv = ap_get_brigade(f->next, bb, mode, block, readbytes); + if (rv) + return rv; + + for (e = APR_BRIGADE_FIRST(bb); + e != APR_BRIGADE_SENTINEL(bb); + e = APR_BUCKET_NEXT(e)) + { + svn_boolean_t last = APR_BUCKET_IS_EOS(e); + const char *str; + apr_size_t len; + + if (last) + { + str = ""; + len = 0; + } + else + { + rv = apr_bucket_read(e, &str, &len, APR_BLOCK_READ); + if (rv) + return rv; + } + + if (! XML_Parse(ctx->xmlp, str, len, last)) + { + /* let_it_go so we clean up our parser, no_soup_for_you so that we + * bail out before bothering to parse this stuff a second time. */ + ctx->let_it_go = TRUE; + ctx->no_soup_for_you = TRUE; + } + + /* If we found something that isn't allowed, set the correct status + * and return an error so it'll bail out before it gets anywhere it + * can do real damage. */ + if (ctx->no_soup_for_you) + { + /* XXX maybe set up the SVN-ACTION env var so that it'll show up + * in the Subversion operational logs? */ + + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, + "mod_dontdothat: client broke the rules, " + "returning error"); + + /* Ok, pass an error bucket and an eos bucket back to the client. + * + * NOTE: The custom error string passed here doesn't seem to be + * used anywhere by httpd. This is quite possibly a bug. + * + * TODO: Try and pass back a custom document body containing a + * serialized svn_error_t so the client displays a better + * error message. */ + bb = apr_brigade_create(f->r->pool, f->c->bucket_alloc); + e = ap_bucket_error_create(403, "No Soup For You!", + f->r->pool, f->c->bucket_alloc); + APR_BRIGADE_INSERT_TAIL(bb, e); + e = apr_bucket_eos_create(f->c->bucket_alloc); + APR_BRIGADE_INSERT_TAIL(bb, e); + + /* Don't forget to remove us, otherwise recursion blows the stack. */ + ap_remove_input_filter(f); + + return ap_pass_brigade(f->r->output_filters, bb); + } + else if (ctx->let_it_go || last) + { + ap_remove_input_filter(f); + + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, + "mod_dontdothat: letting request go through"); + + return rv; + } + } + + return rv; +} + +static void +cdata(void *baton, const char *data, int len) +{ + dontdothat_filter_ctx *ctx = baton; + + if (ctx->no_soup_for_you || ctx->let_it_go) + return; + + switch (ctx->state) + { + case STATE_IN_SRC_PATH: + /* FALLTHROUGH */ + + case STATE_IN_DST_PATH: + /* FALLTHROUGH */ + + case STATE_IN_RECURSIVE: + if (! ctx->buffer) + ctx->buffer = svn_stringbuf_ncreate(data, len, ctx->r->pool); + else + svn_stringbuf_appendbytes(ctx->buffer, data, len); + break; + + default: + break; + } +} + +static void +start_element(void *baton, const char *name, const char **attrs) +{ + dontdothat_filter_ctx *ctx = baton; + const char *sep; + + if (ctx->no_soup_for_you || ctx->let_it_go) + return; + + /* XXX Hack. We should be doing real namespace support, but for now we + * just skip ahead of any namespace prefix. If someone's sending us + * an update-report element outside of the SVN namespace they'll get + * what they deserve... */ + sep = ap_strchr_c(name, ':'); + if (sep) + name = sep + 1; + + switch (ctx->state) + { + case STATE_BEGINNING: + if (strcmp(name, "update-report") == 0) + ctx->state = STATE_IN_UPDATE; + else if (strcmp(name, "replay-report") == 0 && ctx->cfg->no_replay) + { + /* XXX it would be useful if there was a way to override this + * on a per-user basis... */ + if (! is_this_legal(ctx, ctx->r->unparsed_uri)) + ctx->no_soup_for_you = TRUE; + else + ctx->let_it_go = TRUE; + } + else + ctx->let_it_go = TRUE; + break; + + case STATE_IN_UPDATE: + if (strcmp(name, "src-path") == 0) + { + ctx->state = STATE_IN_SRC_PATH; + if (ctx->buffer) + ctx->buffer->len = 0; + } + else if (strcmp(name, "dst-path") == 0) + { + ctx->state = STATE_IN_DST_PATH; + if (ctx->buffer) + ctx->buffer->len = 0; + } + else if (strcmp(name, "recursive") == 0) + { + ctx->state = STATE_IN_RECURSIVE; + if (ctx->buffer) + ctx->buffer->len = 0; + } + else + ; /* XXX Figure out what else we need to deal with... Switch + * has that link-path thing we probably need to look out + * for... */ + break; + + default: + break; + } +} + +static void +end_element(void *baton, const char *name) +{ + dontdothat_filter_ctx *ctx = baton; + const char *sep; + + if (ctx->no_soup_for_you || ctx->let_it_go) + return; + + /* XXX Hack. We should be doing real namespace support, but for now we + * just skip ahead of any namespace prefix. If someone's sending us + * an update-report element outside of the SVN namespace they'll get + * what they deserve... */ + sep = ap_strchr_c(name, ':'); + if (sep) + name = sep + 1; + + switch (ctx->state) + { + case STATE_IN_SRC_PATH: + ctx->state = STATE_IN_UPDATE; + + svn_stringbuf_strip_whitespace(ctx->buffer); + + if (! ctx->path_failed && ! is_this_legal(ctx, ctx->buffer->data)) + ctx->path_failed = TRUE; + break; + + case STATE_IN_DST_PATH: + ctx->state = STATE_IN_UPDATE; + + svn_stringbuf_strip_whitespace(ctx->buffer); + + if (! ctx->path_failed && ! is_this_legal(ctx, ctx->buffer->data)) + ctx->path_failed = TRUE; + break; + + case STATE_IN_RECURSIVE: + ctx->state = STATE_IN_UPDATE; + + svn_stringbuf_strip_whitespace(ctx->buffer); + + /* If this isn't recursive we let it go. */ + if (strcmp(ctx->buffer->data, "no") == 0) + { + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->r, + "mod_dontdothat: letting nonrecursive request go"); + ctx->let_it_go = TRUE; + } + break; + + case STATE_IN_UPDATE: + if (strcmp(name, "update-report") == 0) + { + /* If we made it here without figuring out that this is + * nonrecursive, then the path check is our final word + * on the subject. */ + + if (ctx->path_failed) + ctx->no_soup_for_you = TRUE; + else + ctx->let_it_go = TRUE; + } + else + ; /* XXX Is there other stuff we care about? */ + break; + + default: + abort(); + } +} + +static svn_boolean_t +is_valid_wildcard(const char *wc) +{ + while (*wc) + { + if (*wc == '*') + { + if (wc[1] && wc[1] != '/') + return FALSE; + } + + ++wc; + } + + return TRUE; +} + +static svn_boolean_t +config_enumerator(const char *wildcard, + const char *action, + void *baton, + apr_pool_t *pool) +{ + dontdothat_filter_ctx *ctx = baton; + + if (strcmp(action, "deny") == 0) + { + if (is_valid_wildcard(wildcard)) + APR_ARRAY_PUSH(ctx->no_recursive_ops, const char *) = wildcard; + else + ctx->err = svn_error_createf(APR_EINVAL, + NULL, + "'%s' is an invalid wildcard", + wildcard); + } + else if (strcmp(action, "allow") == 0) + { + if (is_valid_wildcard(wildcard)) + APR_ARRAY_PUSH(ctx->allow_recursive_ops, const char *) = wildcard; + else + ctx->err = svn_error_createf(APR_EINVAL, + NULL, + "'%s' is an invalid wildcard", + wildcard); + } + else + { + ctx->err = svn_error_createf(APR_EINVAL, + NULL, + "'%s' is not a valid action", + action); + } + + if (ctx->err) + return FALSE; + else + return TRUE; +} + +static apr_status_t +clean_up_parser(void *baton) +{ + XML_Parser xmlp = baton; + + XML_ParserFree(xmlp); + + return APR_SUCCESS; +} + +static void +dontdothat_insert_filters(request_rec *r) +{ + dontdothat_config_rec *cfg = ap_get_module_config(r->per_dir_config, + &dontdothat_module); + + if (! cfg->config_file) + return; + + if (strcmp("REPORT", r->method) == 0) + { + dontdothat_filter_ctx *ctx = apr_pcalloc(r->pool, sizeof(*ctx)); + svn_config_t *config; + svn_error_t *err; + + ctx->r = r; + + ctx->cfg = cfg; + + ctx->allow_recursive_ops = apr_array_make(r->pool, 5, sizeof(char *)); + + ctx->no_recursive_ops = apr_array_make(r->pool, 5, sizeof(char *)); + + /* XXX is there a way to error out from this point? Would be nice... */ + + err = svn_config_read(&config, cfg->config_file, TRUE, r->pool); + if (err) + { + char buff[256]; + + ap_log_rerror(APLOG_MARK, APLOG_ERR, + ((err->apr_err >= APR_OS_START_USERERR && + err->apr_err < APR_OS_START_CANONERR) ? + 0 : err->apr_err), + r, "Failed to load DontDoThatConfigFile: %s", + svn_err_best_message(err, buff, sizeof(buff))); + + svn_error_clear(err); + + return; + } + + svn_config_enumerate2(config, + "recursive-actions", + config_enumerator, + ctx, + r->pool); + if (ctx->err) + { + char buff[256]; + + ap_log_rerror(APLOG_MARK, APLOG_ERR, + ((ctx->err->apr_err >= APR_OS_START_USERERR && + ctx->err->apr_err < APR_OS_START_CANONERR) ? + 0 : ctx->err->apr_err), + r, "Failed to parse DontDoThatConfigFile: %s", + svn_err_best_message(ctx->err, buff, sizeof(buff))); + + svn_error_clear(ctx->err); + + return; + } + + ctx->state = STATE_BEGINNING; + + ctx->xmlp = XML_ParserCreate(NULL); + + apr_pool_cleanup_register(r->pool, ctx->xmlp, + clean_up_parser, + apr_pool_cleanup_null); + + XML_SetUserData(ctx->xmlp, ctx); + XML_SetElementHandler(ctx->xmlp, start_element, end_element); + XML_SetCharacterDataHandler(ctx->xmlp, cdata); + + ap_add_input_filter("DONTDOTHAT_FILTER", ctx, r, r->connection); + } +} + +static void +dontdothat_register_hooks(apr_pool_t *pool) +{ + ap_hook_insert_filter(dontdothat_insert_filters, NULL, NULL, APR_HOOK_FIRST); + + ap_register_input_filter("DONTDOTHAT_FILTER", + dontdothat_filter, + NULL, + AP_FTYPE_RESOURCE); +} + +module AP_MODULE_DECLARE_DATA dontdothat_module = +{ + STANDARD20_MODULE_STUFF, + create_dontdothat_dir_config, + NULL, + NULL, + NULL, + dontdothat_cmds, + dontdothat_register_hooks +}; diff --git a/tools/server-side/svn-backup-dumps.py b/tools/server-side/svn-backup-dumps.py new file mode 100755 index 0000000..bb6b235 --- /dev/null +++ b/tools/server-side/svn-backup-dumps.py @@ -0,0 +1,692 @@ +#!/usr/bin/env python +# +# svn-backup-dumps.py -- Create dumpfiles to backup a subversion repository. +# +# ==================================================================== +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ==================================================================== +# +# This script creates dump files from a subversion repository. +# It is intended for use in cron jobs and post-commit hooks. +# +# The basic operation modes are: +# 1. Create a full dump (revisions 0 to HEAD). +# 2. Create incremental dumps containing at most N revisions. +# 3. Create incremental single revision dumps (for use in post-commit). +# 4. Create incremental dumps containing everything since last dump. +# +# All dump files are prefixed with the basename of the repository. All +# examples below assume that the repository '/srv/svn/repos/src' is +# dumped so all dumpfiles start with 'src'. +# +# Optional functionality: +# 5. Create gzipped dump files. +# 6. Create bzipped dump files. +# 7. Transfer the dumpfile to another host using ftp. +# 8. Transfer the dumpfile to another host using smb. +# +# See also 'svn-backup-dumps.py -h'. +# +# +# 1. Create a full dump (revisions 0 to HEAD). +# +# svn-backup-dumps.py <repos> <dumpdir> +# +# <repos> Path to the repository. +# <dumpdir> Directory for storing the dump file. +# +# This creates a dump file named 'src.000000-NNNNNN.svndmp.gz' +# where NNNNNN is the revision number of HEAD. +# +# +# 2. Create incremental dumps containing at most N revisions. +# +# svn-backup-dumps.py -c <count> <repos> <dumpdir> +# +# <count> Count of revisions per dump file. +# <repos> Path to the repository. +# <dumpdir> Directory for storing the dump file. +# +# When started the first time with a count of 1000 and if HEAD is +# at 2923 it creates the following files: +# +# src.000000-000999.svndmp.gz +# src.001000-001999.svndmp.gz +# src.002000-002923.svndmp.gz +# +# Say the next time HEAD is at 3045 it creates these two files: +# +# src.002000-002999.svndmp.gz +# src.003000-003045.svndmp.gz +# +# +# 3. Create incremental single revision dumps (for use in post-commit). +# +# svn-backup-dumps.py -r <revnr> <repos> <dumpdir> +# +# <revnr> A revision number. +# <repos> Path to the repository. +# <dumpdir> Directory for storing the dump file. +# +# This creates a dump file named 'src.NNNNNN.svndmp.gz' where +# NNNNNN is the revision number of HEAD. +# +# +# 4. Create incremental dumps relative to last dump +# +# svn-backup-dumps.py -i <repos> <dumpdir> +# +# <repos> Path to the repository. +# <dumpdir> Directory for storing the dump file. +# +# When if dumps are performed when HEAD is 2923, +# then when HEAD is 3045, is creates these files: +# +# src.000000-002923.svndmp.gz +# src.002924-003045.svndmp.gz +# +# +# 5. Create gzipped dump files. +# +# svn-backup-dumps.py -z ... +# +# ... More options, see 1-4, 7, 8. +# +# +# 6. Create bzipped dump files. +# +# svn-backup-dumps.py -b ... +# +# ... More options, see 1-4, 7, 8. +# +# +# 7. Transfer the dumpfile to another host using ftp. +# +# svn-backup-dumps.py -t ftp:<host>:<user>:<password>:<path> ... +# +# <host> Name of the FTP host. +# <user> Username on the remote host. +# <password> Password for the user. +# <path> Subdirectory on the remote host. +# ... More options, see 1-6. +# +# If <path> contains the string '%r' it is replaced by the +# repository name (basename of the repository path). +# +# +# 8. Transfer the dumpfile to another host using smb. +# +# svn-backup-dumps.py -t smb:<share>:<user>:<password>:<path> ... +# +# <share> Name of an SMB share in the form '//host/share'. +# <user> Username on the remote host. +# <password> Password for the user. +# <path> Subdirectory of the share. +# ... More options, see 1-6. +# +# If <path> contains the string '%r' it is replaced by the +# repository name (basename of the repository path). +# +# +# +# TODO: +# - find out how to report smbclient errors +# - improve documentation +# + +__version = "0.6" + +import sys +import os +if os.name != "nt": + import fcntl + import select +import gzip +import os.path +import re +from optparse import OptionParser +from ftplib import FTP +from subprocess import Popen, PIPE + +try: + import bz2 + have_bz2 = True +except ImportError: + have_bz2 = False + + +class SvnBackupOutput: + + def __init__(self, abspath, filename): + self.__filename = filename + self.__absfilename = os.path.join(abspath, filename) + + def open(self): + pass + + def write(self, data): + pass + + def close(self): + pass + + def get_filename(self): + return self.__filename + + def get_absfilename(self): + return self.__absfilename + + +class SvnBackupOutputPlain(SvnBackupOutput): + + def __init__(self, abspath, filename): + SvnBackupOutput.__init__(self, abspath, filename) + + def open(self): + self.__ofd = open(self.get_absfilename(), "wb") + + def write(self, data): + self.__ofd.write(data) + + def close(self): + self.__ofd.close() + + +class SvnBackupOutputGzip(SvnBackupOutput): + + def __init__(self, abspath, filename): + SvnBackupOutput.__init__(self, abspath, filename + ".gz") + + def open(self): + self.__compressor = gzip.GzipFile(filename=self.get_absfilename(), + mode="wb") + + def write(self, data): + self.__compressor.write(data) + + def close(self): + self.__compressor.flush() + self.__compressor.close() + + +class SvnBackupOutputBzip2(SvnBackupOutput): + + def __init__(self, abspath, filename): + SvnBackupOutput.__init__(self, abspath, filename + ".bz2") + + def open(self): + self.__compressor = bz2.BZ2Compressor() + self.__ofd = open(self.get_absfilename(), "wb") + + def write(self, data): + self.__ofd.write(self.__compressor.compress(data)) + + def close(self): + self.__ofd.write(self.__compressor.flush()) + self.__ofd.close() + +class SvnBackupOutputCommand(SvnBackupOutput): + + def __init__(self, abspath, filename, file_extension, cmd_path, + cmd_options): + SvnBackupOutput.__init__(self, abspath, filename + file_extension) + self.__cmd_path = cmd_path + self.__cmd_options = cmd_options + + def open(self): + cmd = [ self.__cmd_path, self.__cmd_options ] + + self.__ofd = open(self.get_absfilename(), "wb") + try: + proc = Popen(cmd, stdin=PIPE, stdout=self.__ofd, shell=False) + except: + print (256, "", "Popen failed (%s ...):\n %s" % (cmd[0], + str(sys.exc_info()[1]))) + sys.exit(256) + self.__proc = proc + self.__stdin = proc.stdin + + def write(self, data): + self.__stdin.write(data) + + def close(self): + self.__stdin.close() + rc = self.__proc.wait() + self.__ofd.close() + +class SvnBackupException(Exception): + + def __init__(self, errortext): + self.errortext = errortext + + def __str__(self): + return self.errortext + +class SvnBackup: + + def __init__(self, options, args): + # need 3 args: progname, reposname, dumpdir + if len(args) != 3: + if len(args) < 3: + raise SvnBackupException("too few arguments, specify" + " repospath and dumpdir.\nuse -h or" + " --help option to see help.") + else: + raise SvnBackupException("too many arguments, specify" + " repospath and dumpdir only.\nuse" + " -h or --help option to see help.") + self.__repospath = args[1] + self.__dumpdir = args[2] + # check repospath + rpathparts = os.path.split(self.__repospath) + if len(rpathparts[1]) == 0: + # repospath without trailing slash + self.__repospath = rpathparts[0] + if not os.path.exists(self.__repospath): + raise SvnBackupException("repos '%s' does not exist." % self.__repospath) + if not os.path.isdir(self.__repospath): + raise SvnBackupException("repos '%s' is not a directory." % self.__repospath) + for subdir in [ "db", "conf", "hooks" ]: + dir = os.path.join(self.__repospath, subdir) + if not os.path.isdir(dir): + raise SvnBackupException("repos '%s' is not a repository." % self.__repospath) + rpathparts = os.path.split(self.__repospath) + self.__reposname = rpathparts[1] + if self.__reposname in [ "", ".", ".." ]: + raise SvnBackupException("couldn't extract repos name from '%s'." % self.__repospath) + # check dumpdir + if not os.path.exists(self.__dumpdir): + raise SvnBackupException("dumpdir '%s' does not exist." % self.__dumpdir) + elif not os.path.isdir(self.__dumpdir): + raise SvnBackupException("dumpdir '%s' is not a directory." % self.__dumpdir) + # set options + self.__rev_nr = options.rev + self.__count = options.cnt + self.__quiet = options.quiet + self.__deltas = options.deltas + self.__relative_incremental = options.relative_incremental + + # svnadmin/svnlook path + self.__svnadmin_path = "svnadmin" + if options.svnadmin_path: + self.__svnadmin_path = options.svnadmin_path + self.__svnlook_path = "svnlook" + if options.svnlook_path: + self.__svnlook_path = options.svnlook_path + + # check compress option + self.__gzip_path = options.gzip_path + self.__bzip2_path = options.bzip2_path + self.__zip = None + compress_options = 0 + if options.gzip_path != None: + compress_options = compress_options + 1 + if options.bzip2_path != None: + compress_options = compress_options + 1 + if options.bzip2: + compress_options = compress_options + 1 + self.__zip = "bzip2" + if options.gzip: + compress_options = compress_options + 1 + self.__zip = "gzip" + if compress_options > 1: + raise SvnBackupException("--bzip2-path, --gzip-path, -b, -z are " + "mutually exclusive.") + + self.__overwrite = False + self.__overwrite_all = False + if options.overwrite > 0: + self.__overwrite = True + if options.overwrite > 1: + self.__overwrite_all = True + self.__transfer = None + if options.transfer != None: + self.__transfer = options.transfer.split(":") + if len(self.__transfer) != 5: + if len(self.__transfer) < 5: + raise SvnBackupException("too few fields for transfer '%s'." % self.__transfer) + else: + raise SvnBackupException("too many fields for transfer '%s'." % self.__transfer) + if self.__transfer[0] not in [ "ftp", "smb" ]: + raise SvnBackupException("unknown transfer method '%s'." % self.__transfer[0]) + + def set_nonblock(self, fileobj): + fd = fileobj.fileno() + n = fcntl.fcntl(fd, fcntl.F_GETFL) + fcntl.fcntl(fd, fcntl.F_SETFL, n|os.O_NONBLOCK) + + def exec_cmd(self, cmd, output=None, printerr=False): + if os.name == "nt": + return self.exec_cmd_nt(cmd, output, printerr) + else: + return self.exec_cmd_unix(cmd, output, printerr) + + def exec_cmd_unix(self, cmd, output=None, printerr=False): + try: + proc = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=False) + except: + return (256, "", "Popen failed (%s ...):\n %s" % (cmd[0], + str(sys.exc_info()[1]))) + stdout = proc.stdout + stderr = proc.stderr + self.set_nonblock(stdout) + self.set_nonblock(stderr) + readfds = [ stdout, stderr ] + selres = select.select(readfds, [], []) + bufout = "" + buferr = "" + while len(selres[0]) > 0: + for fd in selres[0]: + buf = fd.read(16384) + if len(buf) == 0: + readfds.remove(fd) + elif fd == stdout: + if output: + output.write(buf) + else: + bufout += buf + else: + if printerr: + sys.stdout.write("%s " % buf) + else: + buferr += buf + if len(readfds) == 0: + break + selres = select.select(readfds, [], []) + rc = proc.wait() + if printerr: + print("") + return (rc, bufout, buferr) + + def exec_cmd_nt(self, cmd, output=None, printerr=False): + try: + proc = Popen(cmd, stdout=PIPE, stderr=None, shell=False) + except: + return (256, "", "Popen failed (%s ...):\n %s" % (cmd[0], + str(sys.exc_info()[1]))) + stdout = proc.stdout + bufout = "" + buferr = "" + buf = stdout.read(16384) + while len(buf) > 0: + if output: + output.write(buf) + else: + bufout += buf + buf = stdout.read(16384) + rc = proc.wait() + return (rc, bufout, buferr) + + def get_head_rev(self): + cmd = [ self.__svnlook_path, "youngest", self.__repospath ] + r = self.exec_cmd(cmd) + if r[0] == 0 and len(r[2]) == 0: + return int(r[1].strip()) + else: + print(r[2]) + return -1 + + def get_last_dumped_rev(self): + filename_regex = re.compile("(.+)\.\d+-(\d+)\.svndmp.*") + # start with -1 so the next one will be rev 0 + highest_rev = -1 + + for filename in os.listdir(self.__dumpdir): + m = filename_regex.match( filename ) + if m and (m.group(1) == self.__reposname): + rev_end = int(m.group(2)) + + if rev_end > highest_rev: + # determine the latest revision dumped + highest_rev = rev_end + + return highest_rev + + def transfer_ftp(self, absfilename, filename): + rc = False + try: + host = self.__transfer[1] + user = self.__transfer[2] + passwd = self.__transfer[3] + destdir = self.__transfer[4].replace("%r", self.__reposname) + ftp = FTP(host, user, passwd) + ftp.cwd(destdir) + ifd = open(absfilename, "rb") + ftp.storbinary("STOR %s" % filename, ifd) + ftp.quit() + rc = len(ifd.read(1)) == 0 + ifd.close() + except Exception, e: + raise SvnBackupException("ftp transfer failed:\n file: '%s'\n error: %s" % \ + (absfilename, str(e))) + return rc + + def transfer_smb(self, absfilename, filename): + share = self.__transfer[1] + user = self.__transfer[2] + passwd = self.__transfer[3] + if passwd == "": + passwd = "-N" + destdir = self.__transfer[4].replace("%r", self.__reposname) + cmd = ("smbclient", share, "-U", user, passwd, "-D", destdir, + "-c", "put %s %s" % (absfilename, filename)) + r = self.exec_cmd(cmd) + rc = r[0] == 0 + if not rc: + print(r[2]) + return rc + + def transfer(self, absfilename, filename): + if self.__transfer == None: + return + elif self.__transfer[0] == "ftp": + self.transfer_ftp(absfilename, filename) + elif self.__transfer[0] == "smb": + self.transfer_smb(absfilename, filename) + else: + print("unknown transfer method '%s'." % self.__transfer[0]) + + def create_dump(self, checkonly, overwrite, fromrev, torev=None): + revparam = "%d" % fromrev + r = "%06d" % fromrev + if torev != None: + revparam += ":%d" % torev + r += "-%06d" % torev + filename = "%s.%s.svndmp" % (self.__reposname, r) + output = None + if self.__bzip2_path: + output = SvnBackupOutputCommand(self.__dumpdir, filename, ".bz2", + self.__bzip2_path, "-cz" ) + elif self.__gzip_path: + output = SvnBackupOutputCommand(self.__dumpdir, filename, ".gz", + self.__gzip_path, "-cf" ) + elif self.__zip: + if self.__zip == "gzip": + output = SvnBackupOutputGzip(self.__dumpdir, filename) + else: + output = SvnBackupOutputBzip2(self.__dumpdir, filename) + else: + output = SvnBackupOutputPlain(self.__dumpdir, filename) + absfilename = output.get_absfilename() + realfilename = output.get_filename() + if checkonly: + return os.path.exists(absfilename) + elif os.path.exists(absfilename): + if overwrite: + print("overwriting " + absfilename) + else: + print("%s already exists." % absfilename) + return True + else: + print("writing " + absfilename) + cmd = [ self.__svnadmin_path, "dump", + "--incremental", "-r", revparam, self.__repospath ] + if self.__quiet: + cmd[2:2] = [ "-q" ] + if self.__deltas: + cmd[2:2] = [ "--deltas" ] + output.open() + r = self.exec_cmd(cmd, output, True) + output.close() + rc = r[0] == 0 + if rc: + self.transfer(absfilename, realfilename) + return rc + + def export_single_rev(self): + return self.create_dump(False, self.__overwrite, self.__rev_nr) + + def export(self): + headrev = self.get_head_rev() + if headrev == -1: + return False + if self.__count is None: + return self.create_dump(False, self.__overwrite, 0, headrev) + baserev = headrev - (headrev % self.__count) + rc = True + cnt = self.__count + fromrev = baserev - cnt + torev = baserev - 1 + while fromrev >= 0 and rc: + if self.__overwrite_all or \ + not self.create_dump(True, False, fromrev, torev): + rc = self.create_dump(False, self.__overwrite_all, + fromrev, torev) + fromrev -= cnt + torev -= cnt + else: + fromrev = -1 + if rc: + rc = self.create_dump(False, self.__overwrite, baserev, headrev) + return rc + + def export_relative_incremental(self): + headrev = self.get_head_rev() + if headrev == -1: + return False + + last_dumped_rev = self.get_last_dumped_rev(); + if headrev < last_dumped_rev: + # that should not happen... + return False + + if headrev == last_dumped_rev: + # already up-to-date + return True + + return self.create_dump(False, False, last_dumped_rev + 1, headrev) + + def execute(self): + if self.__rev_nr != None: + return self.export_single_rev() + elif self.__relative_incremental: + return self.export_relative_incremental() + else: + return self.export() + + +if __name__ == "__main__": + usage = "usage: svn-backup-dumps.py [options] repospath dumpdir" + parser = OptionParser(usage=usage, version="%prog "+__version) + if have_bz2: + parser.add_option("-b", + action="store_true", + dest="bzip2", default=False, + help="compress the dump using python bzip2 library.") + parser.add_option("-i", + action="store_true", + dest="relative_incremental", default=False, + help="perform incremental relative to last dump.") + parser.add_option("--deltas", + action="store_true", + dest="deltas", default=False, + help="pass --deltas to svnadmin dump.") + parser.add_option("-c", + action="store", type="int", + dest="cnt", default=None, + help="count of revisions per dumpfile.") + parser.add_option("-o", + action="store_const", const=1, + dest="overwrite", default=0, + help="overwrite files.") + parser.add_option("-O", + action="store_const", const=2, + dest="overwrite", default=0, + help="overwrite all files.") + parser.add_option("-q", + action="store_true", + dest="quiet", default=False, + help="quiet.") + parser.add_option("-r", + action="store", type="int", + dest="rev", default=None, + help="revision number for single rev dump.") + parser.add_option("-t", + action="store", type="string", + dest="transfer", default=None, + help="transfer dumps to another machine "+ + "(s.a. --help-transfer).") + parser.add_option("-z", + action="store_true", + dest="gzip", default=False, + help="compress the dump using python gzip library.") + parser.add_option("--bzip2-path", + action="store", type="string", + dest="bzip2_path", default=None, + help="compress the dump using bzip2 custom command.") + parser.add_option("--gzip-path", + action="store", type="string", + dest="gzip_path", default=None, + help="compress the dump using gzip custom command.") + parser.add_option("--svnadmin-path", + action="store", type="string", + dest="svnadmin_path", default=None, + help="svnadmin command path.") + parser.add_option("--svnlook-path", + action="store", type="string", + dest="svnlook_path", default=None, + help="svnlook command path.") + parser.add_option("--help-transfer", + action="store_true", + dest="help_transfer", default=False, + help="shows detailed help for the transfer option.") + (options, args) = parser.parse_args(sys.argv) + if options.help_transfer: + print("Transfer help:") + print("") + print(" FTP:") + print(" -t ftp:<host>:<user>:<password>:<dest-path>") + print("") + print(" SMB (using smbclient):") + print(" -t smb:<share>:<user>:<password>:<dest-path>") + print("") + sys.exit(0) + rc = False + try: + backup = SvnBackup(options, args) + rc = backup.execute() + except SvnBackupException, e: + print("svn-backup-dumps.py: %s" % e) + if rc: + print("Everything OK.") + sys.exit(0) + else: + print("An error occured!") + sys.exit(1) + +# vim:et:ts=4:sw=4 diff --git a/tools/server-side/svn-populate-node-origins-index.c b/tools/server-side/svn-populate-node-origins-index.c new file mode 100644 index 0000000..b9762c4 --- /dev/null +++ b/tools/server-side/svn-populate-node-origins-index.c @@ -0,0 +1,193 @@ +/* + * svn-populate-node-origins-index.c : Populate the repository's node + * origins index. + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + +#include "svn_cmdline.h" +#include "svn_error.h" +#include "svn_fs.h" +#include "svn_path.h" +#include "svn_pools.h" +#include "svn_repos.h" +#include "svn_utf.h" + +/* Used to terminate lines in large multi-line string literals. */ +#define NL APR_EOL_STR + +static const char *usage_summary = + "Crawl the Subversion repository located at REPOS-PATH in an attempt to" NL + "populate that repository's index of node origins. " NL + "" NL + "The node origins index is new as of Subversion 1.5, and behaves as a" NL + "cache to vastly speed up certain history-querying operations. For" NL + "compatibility with repositories created with pre-1.5 versions of" NL + "Subversion, Subversion will gracefully handle cache misses by doing a" NL + "brute-force calculation of the query answer and lazily populating the" NL + "index with answers it calculates. Unfortunately, calculating that" NL + "information using the brute-force method (instead of having the" NL + "information appear in the index organically) can be very costly." NL + "" NL + "This tool triggers the lazy index population logic built into" NL + "Subversion in a fashion far more efficient than is likely to happen" NL + "during typical repository usage. It can be run while the repository" NL + "is online, too, without interrupting normal Subversion activities." NL; + +/* Print a usage message for this program (PROGNAME), possibly with an + error message ERR_MSG, if not NULL. */ +static void +usage_maybe_with_err(const char *progname, const char *err_msg) +{ + FILE *out; + + out = err_msg ? stderr : stdout; + fprintf(out, "Usage: %s REPOS-PATH\n\n%s", progname, usage_summary); + if (err_msg) + fprintf(out, "\nERROR: %s\n", err_msg); +} + +/* Build the node-origins index any newly added items introduced in + REVISION in FS. Set *COUNT to the number of new items found. */ +static svn_error_t * +index_revision_adds(int *count, svn_fs_t *fs, + svn_revnum_t revision, apr_pool_t *pool) +{ + svn_fs_root_t *root; + apr_hash_t *changes; + apr_hash_index_t *hi; + apr_pool_t *subpool; + + *count = 0; + SVN_ERR(svn_fs_revision_root(&root, fs, revision, pool)); + SVN_ERR(svn_fs_paths_changed2(&changes, root, pool)); + + /* No paths changed in this revision? Nothing to do. */ + if (apr_hash_count(changes) == 0) + return SVN_NO_ERROR; + + subpool = svn_pool_create(pool); + for (hi = apr_hash_first(pool, changes); hi; hi = apr_hash_next(hi)) + { + const void *path; + void *val; + svn_fs_path_change2_t *change; + + svn_pool_clear(subpool); + apr_hash_this(hi, &path, NULL, &val); + change = val; + if ((change->change_kind == svn_fs_path_change_add) + || (change->change_kind == svn_fs_path_change_replace)) + { + if (! (change->copyfrom_path + && SVN_IS_VALID_REVNUM(change->copyfrom_rev))) + { + svn_revnum_t origin; + SVN_ERR(svn_fs_node_origin_rev(&origin, root, path, subpool)); + (*count)++; + } + } + } + svn_pool_destroy(subpool); + + return SVN_NO_ERROR; +} + +/* Build the node-origins index for the repository located at REPOS_PATH. */ +static svn_error_t * +build_index(const char *repos_path, apr_pool_t *pool) +{ + svn_repos_t *repos; + svn_fs_t *fs; + svn_revnum_t youngest_rev, i; + size_t slotsize; + const char *progress_fmt; + apr_pool_t *subpool; + + /* Open the repository. */ + SVN_ERR(svn_repos_open2(&repos, repos_path, NULL, pool)); + + /* Get a filesystem object. */ + fs = svn_repos_fs(repos); + + /* Fetch the youngest revision of the repository. */ + SVN_ERR(svn_fs_youngest_rev(&youngest_rev, fs, pool)); + slotsize = strlen(apr_ltoa(pool, youngest_rev)); + progress_fmt = apr_psprintf + (pool, + "[%%%" APR_SIZE_T_FMT "ld" + "/%%%" APR_SIZE_T_FMT "ld] " + "Found %%d new lines of history." + "\n", slotsize, slotsize); + + /* Now, iterate over all the revisions, calling index_revision_adds(). */ + subpool = svn_pool_create(pool); + for (i = 0; i < youngest_rev; i++) + { + int count; + svn_pool_clear(subpool); + SVN_ERR(index_revision_adds(&count, fs, i + 1, subpool)); + printf(progress_fmt, i + 1, youngest_rev, count); + } + svn_pool_destroy(subpool); + + return SVN_NO_ERROR; +} + + +int +main(int argc, const char **argv) +{ + apr_pool_t *pool; + svn_error_t *err = SVN_NO_ERROR; + const char *repos_path; + + /* Initialize the app. Send all error messages to 'stderr'. */ + if (svn_cmdline_init(argv[0], stderr) == EXIT_FAILURE) + return EXIT_FAILURE; + + pool = svn_pool_create(NULL); + + if (argc <= 1) + { + usage_maybe_with_err(argv[0], "Not enough arguments."); + goto cleanup; + } + + /* Convert argv[1] into a UTF8, internal-format, canonicalized path. */ + if ((err = svn_utf_cstring_to_utf8(&repos_path, argv[1], pool))) + goto cleanup; + repos_path = svn_dirent_internal_style(repos_path, pool); + repos_path = svn_dirent_canonicalize(repos_path, pool); + + if ((err = build_index(repos_path, pool))) + goto cleanup; + + cleanup: + svn_pool_destroy(pool); + + if (err) + { + svn_handle_error2(err, stderr, FALSE, + "svn-populate-node-origins-index: "); + return EXIT_FAILURE; + } + return EXIT_SUCCESS; +} diff --git a/tools/server-side/svn-rep-sharing-stats.c b/tools/server-side/svn-rep-sharing-stats.c new file mode 100644 index 0000000..e57ff91 --- /dev/null +++ b/tools/server-side/svn-rep-sharing-stats.c @@ -0,0 +1,535 @@ +/* + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + +#include <apr_signal.h> + +#include "svn_cmdline.h" +#include "svn_dirent_uri.h" +#include "svn_pools.h" +#include "svn_repos.h" +#include "svn_opt.h" +#include "svn_utf.h" +#include "svn_version.h" + +#include "../../subversion/libsvn_fs_fs/fs.h" +#include "../../subversion/libsvn_fs_fs/fs_fs.h" +/* for svn_fs_fs__id_* (used in assertions only) */ +#include "../../subversion/libsvn_fs_fs/id.h" + +#include "svn_private_config.h" + + +/** Help messages and version checking. **/ + +static svn_error_t * +version(apr_pool_t *pool) +{ + return svn_opt_print_help3(NULL, "svn-rep-sharing-stats", TRUE, FALSE, NULL, + NULL, NULL, NULL, NULL, NULL, pool); +} + +static void +usage(apr_pool_t *pool) +{ + svn_error_clear(svn_cmdline_fprintf + (stderr, pool, + _("Type 'svn-rep-sharing-stats --help' for usage.\n"))); +} + + +static void +help(const apr_getopt_option_t *options, apr_pool_t *pool) +{ + svn_error_clear + (svn_cmdline_fprintf + (stdout, pool, + _("usage: svn-rep-sharing-stats [OPTIONS] REPOS_PATH\n\n" + " Prints the reference count statistics for representations\n" + " in an FSFS repository.\n" + "\n" + " At least one of the options --data/--prop/--both must be specified.\n" + "\n" + "Valid options:\n"))); + while (options->description) + { + const char *optstr; + svn_opt_format_option(&optstr, options, TRUE, pool); + svn_error_clear(svn_cmdline_fprintf(stdout, pool, " %s\n", optstr)); + ++options; + } + svn_error_clear(svn_cmdline_fprintf(stdout, pool, "\n")); + exit(0); +} + + +/* Version compatibility check */ +static svn_error_t * +check_lib_versions(void) +{ + static const svn_version_checklist_t checklist[] = + { + /* ### check FSFS version */ + { "svn_subr", svn_subr_version }, + { "svn_fs", svn_fs_version }, + { NULL, NULL } + }; + + SVN_VERSION_DEFINE(my_version); + return svn_error_trace(svn_ver_check_list(&my_version, checklist)); +} + + + +/** Cancellation stuff, ### copied from subversion/svn/main.c */ + +/* A flag to see if we've been cancelled by the client or not. */ +static volatile sig_atomic_t cancelled = FALSE; + +/* A signal handler to support cancellation. */ +static void +signal_handler(int signum) +{ + apr_signal(signum, SIG_IGN); + cancelled = TRUE; +} + +/* Our cancellation callback. */ +static svn_error_t * +svn_cl__check_cancel(void *baton) +{ + if (cancelled) + return svn_error_create(SVN_ERR_CANCELLED, NULL, _("Caught signal")); + else + return SVN_NO_ERROR; +} + +static svn_cancel_func_t cancel_func = svn_cl__check_cancel; + +static void set_up_cancellation(void) +{ + /* Set up our cancellation support. */ + apr_signal(SIGINT, signal_handler); +#ifdef SIGBREAK + /* SIGBREAK is a Win32 specific signal generated by ctrl-break. */ + apr_signal(SIGBREAK, signal_handler); +#endif +#ifdef SIGHUP + apr_signal(SIGHUP, signal_handler); +#endif +#ifdef SIGTERM + apr_signal(SIGTERM, signal_handler); +#endif + +#ifdef SIGPIPE + /* Disable SIGPIPE generation for the platforms that have it. */ + apr_signal(SIGPIPE, SIG_IGN); +#endif + +#ifdef SIGXFSZ + /* Disable SIGXFSZ generation for the platforms that have it, otherwise + * working with large files when compiled against an APR that doesn't have + * large file support will crash the program, which is uncool. */ + apr_signal(SIGXFSZ, SIG_IGN); +#endif +} + + +/** Program-specific code. **/ +enum { + OPT_VERSION = SVN_OPT_FIRST_LONGOPT_ID, + OPT_DATA, + OPT_PROP, + OPT_BOTH +}; + +static svn_error_t *check_experimental(void) +{ + if (getenv("SVN_REP_SHARING_STATS_IS_EXPERIMENTAL")) + return SVN_NO_ERROR; + + return svn_error_create(APR_EGENERAL, NULL, + "This code is experimental and should not " + "be used on live data."); +} + +/* The parts of a rep that determine whether it's being shared. */ +struct key_t +{ + svn_revnum_t revision; + apr_off_t offset; +}; + +/* What we need to know about a rep. */ +struct value_t +{ + svn_checksum_t *sha1_checksum; + apr_uint64_t refcount; +}; + +/* Increment records[rep] if both are non-NULL and REP contains a sha1. + * Allocate keys and values in RESULT_POOL. + */ +static svn_error_t *record(apr_hash_t *records, + representation_t *rep, + apr_pool_t *result_pool) +{ + struct key_t *key; + struct value_t *value; + + /* Skip if we ignore this particular kind of reps, or if the rep doesn't + * exist or doesn't have the checksum we are after. (The latter case + * often corresponds to node_rev->kind == svn_node_dir.) + */ + if (records == NULL || rep == NULL || rep->sha1_checksum == NULL) + return SVN_NO_ERROR; + + /* Construct the key. + * + * Must use calloc() because apr_hash_* pay attention to padding bytes too. + */ + key = apr_pcalloc(result_pool, sizeof(*key)); + key->revision = rep->revision; + key->offset = rep->offset; + + /* Update or create the value. */ + if ((value = apr_hash_get(records, key, sizeof(*key)))) + { + /* Paranoia. */ + SVN_ERR_ASSERT(value->sha1_checksum != NULL); + SVN_ERR_ASSERT(svn_checksum_match(value->sha1_checksum, + rep->sha1_checksum)); + /* Real work. */ + value->refcount++; + } + else + { + value = apr_palloc(result_pool, sizeof(*value)); + value->sha1_checksum = svn_checksum_dup(rep->sha1_checksum, result_pool); + value->refcount = 1; + } + + /* Store them. */ + apr_hash_set(records, key, sizeof(*key), value); + + return SVN_NO_ERROR; +} + +/* Inspect the data and/or prop reps of revision REVNUM in FS. Store + * reference count tallies in passed hashes (allocated in RESULT_POOL). + * + * If PROP_REPS or DATA_REPS is NULL, the respective kind of reps are not + * tallied. + * + * Print progress report to STDERR unless QUIET is true. + * + * Use SCRATCH_POOL for temporary allocations. + */ +static svn_error_t * +process_one_revision(svn_fs_t *fs, + svn_revnum_t revnum, + svn_boolean_t quiet, + apr_hash_t *prop_reps, + apr_hash_t *data_reps, + apr_hash_t *both_reps, + apr_pool_t *result_pool, + apr_pool_t *scratch_pool) +{ + svn_fs_root_t *rev_root; + apr_hash_t *paths_changed; + apr_hash_index_t *hi; + + if (! quiet) + SVN_ERR(svn_cmdline_fprintf(stderr, scratch_pool, + "processing r%ld\n", revnum)); + + /* Get the changed paths. */ + SVN_ERR(svn_fs_revision_root(&rev_root, fs, revnum, scratch_pool)); + SVN_ERR(svn_fs_paths_changed2(&paths_changed, rev_root, scratch_pool)); + + /* Iterate them. */ + /* ### use iterpool? */ + for (hi = apr_hash_first(scratch_pool, paths_changed); + hi; hi = apr_hash_next(hi)) + { + const char *path; + const svn_fs_path_change2_t *change; + const svn_fs_id_t *node_rev_id1, *node_rev_id2; + const svn_fs_id_t *the_id; + + node_revision_t *node_rev; + + path = svn__apr_hash_index_key(hi); + change = svn__apr_hash_index_val(hi); + if (! quiet) + SVN_ERR(svn_cmdline_fprintf(stderr, scratch_pool, + "processing r%ld:%s\n", revnum, path)); + + if (change->change_kind == svn_fs_path_change_delete) + /* Can't ask for reps of PATH at REVNUM if the path no longer exists + * at that revision! */ + continue; + + /* Okay, we have two node_rev id's for this change: the txn one and + * the revision one. We'll use the latter. */ + node_rev_id1 = change->node_rev_id; + SVN_ERR(svn_fs_node_id(&node_rev_id2, rev_root, path, scratch_pool)); + + SVN_ERR_ASSERT(svn_fs_fs__id_txn_id(node_rev_id1) != NULL); + SVN_ERR_ASSERT(svn_fs_fs__id_rev(node_rev_id2) != SVN_INVALID_REVNUM); + + the_id = node_rev_id2; + + /* Get the node_rev using the chosen node_rev_id. */ + SVN_ERR(svn_fs_fs__get_node_revision(&node_rev, fs, the_id, scratch_pool)); + + /* Maybe record the sha1's. */ + SVN_ERR(record(prop_reps, node_rev->prop_rep, result_pool)); + SVN_ERR(record(data_reps, node_rev->data_rep, result_pool)); + SVN_ERR(record(both_reps, node_rev->prop_rep, result_pool)); + SVN_ERR(record(both_reps, node_rev->data_rep, result_pool)); + } + + return SVN_NO_ERROR; +} + +/* Print REPS_REF_COUNT (a hash as for process_one_revision()) + * to stdout in "refcount => sha1" format. A sha1 may appear + * more than once if not all its instances are shared. Prepend + * each line by NAME. + * + * Use SCRATCH_POOL for temporary allocations. + */ +static svn_error_t * +pretty_print(const char *name, + apr_hash_t *reps_ref_counts, + apr_pool_t *scratch_pool) +{ + apr_hash_index_t *hi; + + if (reps_ref_counts == NULL) + return SVN_NO_ERROR; + + for (hi = apr_hash_first(scratch_pool, reps_ref_counts); + hi; hi = apr_hash_next(hi)) + { + struct value_t *value; + + SVN_ERR(cancel_func(NULL)); + + value = svn__apr_hash_index_val(hi); + SVN_ERR(svn_cmdline_printf(scratch_pool, "%s %" APR_UINT64_T_FMT " %s\n", + name, value->refcount, + svn_checksum_to_cstring_display( + value->sha1_checksum, + scratch_pool))); + } + + return SVN_NO_ERROR; +} + +/* Return an error unless FS is an fsfs fs. */ +static svn_error_t *is_fs_fsfs(svn_fs_t *fs, apr_pool_t *scratch_pool) +{ + const char *actual, *expected, *path; + + path = svn_fs_path(fs, scratch_pool); + + expected = SVN_FS_TYPE_FSFS; + SVN_ERR(svn_fs_type(&actual, path, scratch_pool)); + + if (strcmp(actual, expected) != 0) + return svn_error_createf(SVN_ERR_FS_UNKNOWN_FS_TYPE, NULL, + "Filesystem '%s' is not of type '%s'", + svn_dirent_local_style(path, scratch_pool), + actual); + + return SVN_NO_ERROR; +} + +/* The core logic. This function iterates the repository REPOS_PATH + * and sends all the (DATA and/or PROP) reps in each revision for counting + * by process_one_revision(). QUIET is passed to process_one_revision(). + */ +static svn_error_t *process(const char *repos_path, + svn_boolean_t prop, + svn_boolean_t data, + svn_boolean_t quiet, + apr_pool_t *scratch_pool) +{ + apr_hash_t *prop_reps = NULL; + apr_hash_t *data_reps = NULL; + apr_hash_t *both_reps = NULL; + svn_revnum_t rev, youngest; + apr_pool_t *iterpool; + svn_repos_t *repos; + svn_fs_t *fs; + + if (prop) + prop_reps = apr_hash_make(scratch_pool); + if (data) + data_reps = apr_hash_make(scratch_pool); + if (prop && data) + both_reps = apr_hash_make(scratch_pool); + + /* Open the FS. */ + SVN_ERR(svn_repos_open2(&repos, repos_path, NULL, scratch_pool)); + fs = svn_repos_fs(repos); + + SVN_ERR(is_fs_fsfs(fs, scratch_pool)); + + SVN_ERR(svn_fs_youngest_rev(&youngest, fs, scratch_pool)); + + /* Iterate the revisions. */ + iterpool = svn_pool_create(scratch_pool); + for (rev = 0; rev <= youngest; rev++) + { + svn_pool_clear(iterpool); + SVN_ERR(cancel_func(NULL)); + SVN_ERR(process_one_revision(fs, rev, quiet, + prop_reps, data_reps, both_reps, + scratch_pool, iterpool)); + } + svn_pool_destroy(iterpool); + + /* Print stats. */ + SVN_ERR(pretty_print("prop", prop_reps, scratch_pool)); + SVN_ERR(pretty_print("data", data_reps, scratch_pool)); + SVN_ERR(pretty_print("both", both_reps, scratch_pool)); + + return SVN_NO_ERROR; +} + +int +main(int argc, const char *argv[]) +{ + const char *repos_path; + apr_allocator_t *allocator; + apr_pool_t *pool; + svn_boolean_t prop = FALSE, data = FALSE; + svn_boolean_t quiet = FALSE; + svn_error_t *err; + apr_getopt_t *os; + const apr_getopt_option_t options[] = + { + {"data", OPT_DATA, 0, N_("display data reps stats")}, + {"prop", OPT_PROP, 0, N_("display prop reps stats")}, + {"both", OPT_BOTH, 0, N_("display combined (data+prop) reps stats")}, + {"quiet", 'q', 0, N_("no progress (only errors) to stderr")}, + {"help", 'h', 0, N_("display this help")}, + {"version", OPT_VERSION, 0, + N_("show program version information")}, + {0, 0, 0, 0} + }; + + /* Initialize the app. */ + if (svn_cmdline_init("svn-rep-sharing-stats", stderr) != EXIT_SUCCESS) + return EXIT_FAILURE; + + /* Create our top-level pool. Use a separate mutexless allocator, + * given this application is single threaded. + */ + if (apr_allocator_create(&allocator)) + return EXIT_FAILURE; + + apr_allocator_max_free_set(allocator, SVN_ALLOCATOR_RECOMMENDED_MAX_FREE); + + pool = svn_pool_create_ex(NULL, allocator); + apr_allocator_owner_set(allocator, pool); + + /* Check library versions */ + err = check_lib_versions(); + if (err) + return svn_cmdline_handle_exit_error(err, pool, "svn-rep-sharing-stats: "); + + err = svn_cmdline__getopt_init(&os, argc, argv, pool); + if (err) + return svn_cmdline_handle_exit_error(err, pool, "svn-rep-sharing-stats: "); + + SVN_INT_ERR(check_experimental()); + + os->interleave = 1; + while (1) + { + int opt; + const char *arg; + apr_status_t status = apr_getopt_long(os, options, &opt, &arg); + if (APR_STATUS_IS_EOF(status)) + break; + if (status != APR_SUCCESS) + { + usage(pool); + return EXIT_FAILURE; + } + switch (opt) + { + case OPT_DATA: + data = TRUE; + break; + /* It seems we don't actually rep-share props yet. */ + case OPT_PROP: + prop = TRUE; + break; + case OPT_BOTH: + data = TRUE; + prop = TRUE; + break; + case 'q': + quiet = TRUE; + break; + case 'h': + help(options, pool); + break; + case OPT_VERSION: + SVN_INT_ERR(version(pool)); + exit(0); + break; + default: + usage(pool); + return EXIT_FAILURE; + } + } + + /* Exactly 1 non-option argument, + * and at least one of "--data"/"--prop"/"--both". + */ + if (os->ind + 1 != argc || (!data && !prop)) + { + usage(pool); + return EXIT_FAILURE; + } + + /* Grab REPOS_PATH from argv. */ + SVN_INT_ERR(svn_utf_cstring_to_utf8(&repos_path, os->argv[os->ind], pool)); + repos_path = svn_dirent_internal_style(repos_path, pool); + + set_up_cancellation(); + + /* Do something. */ + SVN_INT_ERR(process(repos_path, prop, data, quiet, pool)); + + /* We're done. */ + + svn_pool_destroy(pool); + /* Flush stdout to make sure that the user will see any printing errors. */ + SVN_INT_ERR(svn_cmdline_fflush(stdout)); + + return EXIT_SUCCESS; +} diff --git a/tools/server-side/svn_server_log_parse.py b/tools/server-side/svn_server_log_parse.py new file mode 100755 index 0000000..5ecb104 --- /dev/null +++ b/tools/server-side/svn_server_log_parse.py @@ -0,0 +1,460 @@ +#!/usr/bin/python + +# ==================================================================== +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ==================================================================== + +# TODO: Teach parse_open about capabilities, rather than allowing any +# words at all. + +"""Parse subversion server operational logs. + +SVN-ACTION strings +------------------ + +Angle brackets denote a variable, e.g. 'commit r<N>' means you'll see +lines like 'commit r17' for this action. + +<N> and <M> are revision numbers. + +<PATH>, <FROM-PATH>, and <TO-PATH> mean a URI-encoded path relative to +the repository root, including a leading '/'. + +<REVPROP> means a revision property, e.g. 'svn:log'. + +<I> represents a svn_mergeinfo_inheritance_t value and is one of these +words: explicit inherited nearest-ancestor. + +<D> represents a svn_depth_t value and is one of these words: empty +files immediates infinity. If the depth value for the operation was +svn_depth_unknown, the depth= portion is absent entirely. + +The get-mergeinfo and log actions use lists for paths and revprops. +The lists are enclosed in parentheses and each item is separated by a +space (spaces in paths are encoded as %20). + +The words will *always* be in this order, though some may be absent. + +General:: + + change-rev-prop r<N> <REVPROP> + commit r<N> + get-dir <PATH> r<N> text? props? + get-file <PATH> r<N> text? props? + lock (<PATH> ...) steal? + rev-proplist r<N> + unlock (<PATH> ...) break? + +Reports:: + + get-file-revs <PATH> r<N>:<M> include-merged-revisions? + get-mergeinfo (<PATH> ...) <I> include-descendants? + log (<PATH> ...) r<N>:<M> limit=<N>? discover-changed-paths? strict? include-merged-revisions? revprops=all|(<REVPROP> ...)? + replay <PATH> r<N> + +The update report:: + + checkout-or-export <PATH> r<N> depth=<D>? + diff <FROM-PATH>@<N> <TO-PATH>@<M> depth=<D>? ignore-ancestry? + diff <PATH> r<N>:<M> depth=<D>? ignore-ancestry? + status <PATH> r<N> depth=<D>? + switch <FROM-PATH> <TO-PATH>@<N> depth=<D>? + update <PATH> r<N> depth=<D>? send-copyfrom-args? +""" + + +import re +try: + # Python >=3.0 + from urllib.parse import unquote as urllib_parse_unquote +except ImportError: + # Python <3.0 + from urllib import unquote as urllib_parse_unquote + +import svn.core + +# +# Valid words for _parse_depth and _parse_mergeinfo_inheritance +# + +DEPTH_WORDS = ['empty', 'files', 'immediates', 'infinity'] +INHERITANCE_WORDS = { + 'explicit': svn.core.svn_mergeinfo_explicit, + 'inherited': svn.core.svn_mergeinfo_inherited, + 'nearest-ancestor': svn.core.svn_mergeinfo_nearest_ancestor, +} + +# +# Patterns for _match +# + +# <PATH> +pPATH = r'(/\S*)' +# (<PATH> ...) +pPATHS = r'\(([^)]*)\)' +# r<N> +pREVNUM = r'r(\d+)' +# (<N> ...) +pREVNUMS = r'\(((\d+\s*)*)\)' +# r<N>:<M> +pREVRANGE = r'r(-?\d+):(-?\d+)' +# <PATH>@<N> +pPATHREV = pPATH + r'@(\d+)' +pWORD = r'(\S+)' +pPROPERTY = pWORD +# depth=<D>? +pDEPTH = 'depth=' + pWORD + +# +# Exceptions +# + +class Error(Exception): pass +class BadDepthError(Error): + def __init__(self, value): + Error.__init__(self, 'bad svn_depth_t value ' + value) +class BadMergeinfoInheritanceError(Error): + def __init__(self, value): + Error.__init__(self, 'bad svn_mergeinfo_inheritance_t value ' + value) +class MatchError(Error): + def __init__(self, pattern, line): + Error.__init__(self, '/%s/ does not match log line:\n%s' + % (pattern, line)) + + +# +# Helper functions +# + +# TODO: Move to kitchensink.c like svn_depth_from_word? +try: + from svn.core import svn_inheritance_from_word +except ImportError: + def svn_inheritance_from_word(word): + try: + return INHERITANCE_WORDS[word] + except KeyError: + # XXX svn_inheritance_to_word uses explicit as default so... + return svn.core.svn_mergeinfo_explicit + +def _parse_depth(word): + if word is None: + return svn.core.svn_depth_unknown + if word not in DEPTH_WORDS: + raise BadDepthError(word) + return svn.core.svn_depth_from_word(word) + +def _parse_mergeinfo_inheritance(word): + if word not in INHERITANCE_WORDS: + raise BadMergeinfoInheritanceError(word) + return svn_inheritance_from_word(word) + +def _match(line, *patterns): + """Return a re.match object from matching patterns against line. + + All optional arguments must be strings suitable for ''.join()ing + into a single pattern string for re.match. The last optional + argument may instead be a list of such strings, which will be + joined into the final pattern as *optional* matches. + + Raises: + Error -- if re.match returns None (i.e. no match) + """ + if isinstance(patterns[-1], list): + optional = patterns[-1] + patterns = patterns[:-1] + else: + optional = [] + pattern = r'\s+'.join(patterns) + pattern += ''.join([r'(\s+' + x + ')?' for x in optional]) + m = re.match(pattern, line) + if m is None: + raise MatchError(pattern, line) + return m + + +class Parser(object): + """Subclass this and define the handle_ methods according to the + "SVN-ACTION strings" section of this module's documentation. For + example, "lock <PATH> steal?" => def handle_lock(self, path, steal) + where steal will be True if "steal" was present. + + See the end of test_svn_server_log_parse.py for a complete example. + """ + def parse(self, line): + """Parse line and call appropriate handle_ method. + + Returns one of: + - line remaining after the svn action, if one was parsed + - whatever your handle_unknown implementation returns + + Raises: + BadDepthError -- for bad svn_depth_t values + BadMergeinfoInheritanceError -- for bad svn_mergeinfo_inheritance_t + values + Error -- any other parse error + """ + self.line = line + words = self.split_line = line.split(' ') + try: + method = getattr(self, '_parse_' + words[0].replace('-', '_')) + except AttributeError: + return self.handle_unknown(self.line) + return method(' '.join(words[1:])) + + def _parse_commit(self, line): + m = _match(line, pREVNUM) + self.handle_commit(int(m.group(1))) + return line[m.end():] + + def _parse_open(self, line): + pINT = r'(\d+)' + pCAP = r'cap=\(([^)]*)\)' + pCLIENT = pWORD + m = _match(line, pINT, pCAP, pPATH, pCLIENT, pCLIENT) + protocol = int(m.group(1)) + if m.group(2) is None: + capabilities = [] + else: + capabilities = m.group(2).split() + path = m.group(3) + ra_client = urllib_parse_unquote(m.group(4)) + client = urllib_parse_unquote(m.group(5)) + self.handle_open(protocol, capabilities, path, ra_client, client) + return line[m.end():] + + def _parse_reparent(self, line): + m = _match(line, pPATH) + self.handle_reparent(urllib_parse_unquote(m.group(1))) + return line[m.end():] + + def _parse_get_latest_rev(self, line): + self.handle_get_latest_rev() + return line + + def _parse_get_dated_rev(self, line): + m = _match(line, pWORD) + self.handle_get_dated_rev(m.group(1)) + return line[m.end():] + + def _parse_get_dir(self, line): + m = _match(line, pPATH, pREVNUM, ['text', 'props']) + self.handle_get_dir(urllib_parse_unquote(m.group(1)), int(m.group(2)), + m.group(3) is not None, + m.group(4) is not None) + return line[m.end():] + + def _parse_get_file(self, line): + m = _match(line, pPATH, pREVNUM, ['text', 'props']) + self.handle_get_file(urllib_parse_unquote(m.group(1)), int(m.group(2)), + m.group(3) is not None, + m.group(4) is not None) + return line[m.end():] + + def _parse_lock(self, line): + m = _match(line, pPATHS, ['steal']) + paths = [urllib_parse_unquote(x) for x in m.group(1).split()] + self.handle_lock(paths, m.group(2) is not None) + return line[m.end():] + + def _parse_change_rev_prop(self, line): + m = _match(line, pREVNUM, pPROPERTY) + self.handle_change_rev_prop(int(m.group(1)), + urllib_parse_unquote(m.group(2))) + return line[m.end():] + + def _parse_rev_proplist(self, line): + m = _match(line, pREVNUM) + self.handle_rev_proplist(int(m.group(1))) + return line[m.end():] + + def _parse_rev_prop(self, line): + m = _match(line, pREVNUM, pPROPERTY) + self.handle_rev_prop(int(m.group(1)), urllib_parse_unquote(m.group(2))) + return line[m.end():] + + def _parse_unlock(self, line): + m = _match(line, pPATHS, ['break']) + paths = [urllib_parse_unquote(x) for x in m.group(1).split()] + self.handle_unlock(paths, m.group(2) is not None) + return line[m.end():] + + def _parse_get_lock(self, line): + m = _match(line, pPATH) + self.handle_get_lock(urllib_parse_unquote(m.group(1))) + return line[m.end():] + + def _parse_get_locks(self, line): + m = _match(line, pPATH) + self.handle_get_locks(urllib_parse_unquote(m.group(1))) + return line[m.end():] + + def _parse_get_locations(self, line): + m = _match(line, pPATH, pREVNUMS) + path = urllib_parse_unquote(m.group(1)) + revnums = [int(x) for x in m.group(2).split()] + self.handle_get_locations(path, revnums) + return line[m.end():] + + def _parse_get_location_segments(self, line): + m = _match(line, pPATHREV, pREVRANGE) + path = urllib_parse_unquote(m.group(1)) + peg = int(m.group(2)) + left = int(m.group(3)) + right = int(m.group(4)) + self.handle_get_location_segments(path, peg, left, right) + return line[m.end():] + + def _parse_get_file_revs(self, line): + m = _match(line, pPATH, pREVRANGE, ['include-merged-revisions']) + path = urllib_parse_unquote(m.group(1)) + left = int(m.group(2)) + right = int(m.group(3)) + include_merged_revisions = m.group(4) is not None + self.handle_get_file_revs(path, left, right, include_merged_revisions) + return line[m.end():] + + def _parse_get_mergeinfo(self, line): + # <I> + pMERGEINFO_INHERITANCE = pWORD + pINCLUDE_DESCENDANTS = pWORD + m = _match(line, + pPATHS, pMERGEINFO_INHERITANCE, ['include-descendants']) + paths = [urllib_parse_unquote(x) for x in m.group(1).split()] + inheritance = _parse_mergeinfo_inheritance(m.group(2)) + include_descendants = m.group(3) is not None + self.handle_get_mergeinfo(paths, inheritance, include_descendants) + return line[m.end():] + + def _parse_log(self, line): + # limit=<N>? + pLIMIT = r'limit=(\d+)' + # revprops=all|(<REVPROP> ...)? + pREVPROPS = r'revprops=(all|\(([^)]+)\))' + m = _match(line, pPATHS, pREVRANGE, + [pLIMIT, 'discover-changed-paths', 'strict', + 'include-merged-revisions', pREVPROPS]) + paths = [urllib_parse_unquote(x) for x in m.group(1).split()] + left = int(m.group(2)) + right = int(m.group(3)) + if m.group(5) is None: + limit = 0 + else: + limit = int(m.group(5)) + discover_changed_paths = m.group(6) is not None + strict = m.group(7) is not None + include_merged_revisions = m.group(8) is not None + if m.group(10) == 'all': + revprops = None + else: + if m.group(11) is None: + revprops = [] + else: + revprops = [urllib_parse_unquote(x) for x in m.group(11).split()] + self.handle_log(paths, left, right, limit, discover_changed_paths, + strict, include_merged_revisions, revprops) + return line[m.end():] + + def _parse_check_path(self, line): + m = _match(line, pPATHREV) + path = urllib_parse_unquote(m.group(1)) + revnum = int(m.group(2)) + self.handle_check_path(path, revnum) + return line[m.end():] + + def _parse_stat(self, line): + m = _match(line, pPATHREV) + path = urllib_parse_unquote(m.group(1)) + revnum = int(m.group(2)) + self.handle_stat(path, revnum) + return line[m.end():] + + def _parse_replay(self, line): + m = _match(line, pPATH, pREVNUM) + path = urllib_parse_unquote(m.group(1)) + revision = int(m.group(2)) + self.handle_replay(path, revision) + return line[m.end():] + + # the update report + + def _parse_checkout_or_export(self, line): + m = _match(line, pPATH, pREVNUM, [pDEPTH]) + path = urllib_parse_unquote(m.group(1)) + revision = int(m.group(2)) + depth = _parse_depth(m.group(4)) + self.handle_checkout_or_export(path, revision, depth) + return line[m.end():] + + def _parse_diff(self, line): + # First, try 1-path form. + try: + m = _match(line, pPATH, pREVRANGE, [pDEPTH, 'ignore-ancestry']) + f = self._parse_diff_1path + except Error: + # OK, how about 2-path form? + m = _match(line, pPATHREV, pPATHREV, [pDEPTH, 'ignore-ancestry']) + f = self._parse_diff_2paths + return f(line, m) + + def _parse_diff_1path(self, line, m): + path = urllib_parse_unquote(m.group(1)) + left = int(m.group(2)) + right = int(m.group(3)) + depth = _parse_depth(m.group(5)) + ignore_ancestry = m.group(6) is not None + self.handle_diff_1path(path, left, right, + depth, ignore_ancestry) + return line[m.end():] + + def _parse_diff_2paths(self, line, m): + from_path = urllib_parse_unquote(m.group(1)) + from_rev = int(m.group(2)) + to_path = urllib_parse_unquote(m.group(3)) + to_rev = int(m.group(4)) + depth = _parse_depth(m.group(6)) + ignore_ancestry = m.group(7) is not None + self.handle_diff_2paths(from_path, from_rev, to_path, to_rev, + depth, ignore_ancestry) + return line[m.end():] + + def _parse_status(self, line): + m = _match(line, pPATH, pREVNUM, [pDEPTH]) + path = urllib_parse_unquote(m.group(1)) + revision = int(m.group(2)) + depth = _parse_depth(m.group(4)) + self.handle_status(path, revision, depth) + return line[m.end():] + + def _parse_switch(self, line): + m = _match(line, pPATH, pPATHREV, [pDEPTH]) + from_path = urllib_parse_unquote(m.group(1)) + to_path = urllib_parse_unquote(m.group(2)) + to_rev = int(m.group(3)) + depth = _parse_depth(m.group(5)) + self.handle_switch(from_path, to_path, to_rev, depth) + return line[m.end():] + + def _parse_update(self, line): + m = _match(line, pPATH, pREVNUM, [pDEPTH, 'send-copyfrom-args']) + path = urllib_parse_unquote(m.group(1)) + revision = int(m.group(2)) + depth = _parse_depth(m.group(4)) + send_copyfrom_args = m.group(5) is not None + self.handle_update(path, revision, depth, send_copyfrom_args) + return line[m.end():] diff --git a/tools/server-side/svnauthz-validate.c b/tools/server-side/svnauthz-validate.c new file mode 100644 index 0000000..df7d541 --- /dev/null +++ b/tools/server-side/svnauthz-validate.c @@ -0,0 +1,76 @@ +/* + * svnauthz-validate.c : Load and validate an authz file. + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + * + * + * svnauthz-validate.c : load and validate an authz file, returns + * value == 0 if syntax of authz file is correct + * value == 1 if syntax of authz file is invalid or file not found + * value == 2 in case of general error + * + */ + +#include "svn_pools.h" +#include "svn_repos.h" +#include "svn_cmdline.h" + +int +main(int argc, const char **argv) +{ + apr_pool_t *pool; + svn_error_t *err; + svn_authz_t *authz; + const char *authz_file; + + if (argc <= 1) + { + printf("Usage: %s PATH \n\n", argv[0]); + printf("Loads the authz file at PATH and validates its syntax. \n" + "Returns:\n" + " 0 when syntax is OK.\n" + " 1 when syntax is invalid.\n" + " 2 operational error\n"); + return 2; + } + + authz_file = argv[1]; + + /* Initialize the app. Send all error messages to 'stderr'. */ + if (svn_cmdline_init(argv[0], stderr) != EXIT_SUCCESS) + return 2; + + pool = svn_pool_create(NULL); + + /* Read the access file and validate it. */ + err = svn_repos_authz_read(&authz, authz_file, TRUE, pool); + + svn_pool_destroy(pool); + + if (err) + { + svn_handle_error2(err, stderr, FALSE, "svnauthz-validate: "); + return 1; + } + else + { + return 0; + } +} diff --git a/tools/server-side/test_svn_server_log_parse.py b/tools/server-side/test_svn_server_log_parse.py new file mode 100755 index 0000000..2fa8759 --- /dev/null +++ b/tools/server-side/test_svn_server_log_parse.py @@ -0,0 +1,611 @@ +#!/usr/bin/python + +# ==================================================================== +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ==================================================================== + +# Run this without arguments to run unit tests. +# Run with a path to a davautocheck ops log to test that it can parse that. + +import os +import re +import sys +import tempfile +try: + # Python >=3.0 + from urllib.parse import quote as urllib_parse_quote +except ImportError: + # Python <3.0 + from urllib import quote as urllib_parse_quote +import unittest + +import svn.core + +import svn_server_log_parse + +class TestCase(unittest.TestCase): + def setUp(self): + # Define a class to stuff everything passed to any handle_ + # method into self.result. + class cls(svn_server_log_parse.Parser): + def __getattr__(cls_self, attr): + if attr.startswith('handle_'): + return lambda *a: setattr(self, 'result', a) + raise AttributeError + self.parse = cls().parse + + def test_unknown(self): + line = 'unknown log line' + self.parse(line) + self.assertEqual(self.result, (line,)) + + def test_open(self): + self.assertRaises(svn_server_log_parse.Error, self.parse, 'open') + self.assertRaises(svn_server_log_parse.Error, self.parse, 'open 2 cap / SVN/1.60. fooclient') + self.assertRaises(svn_server_log_parse.Error, self.parse, 'open a cap=() / SVN/1.60. fooclient') + self.assertEqual(self.parse('open 2 cap=() / SVN fooclient'), '') + self.assertEqual(self.result, (2, [], '/', 'SVN', 'fooclient')) + # TODO: Teach it about the capabilities, rather than allowing + # any words at all. + self.assertEqual(self.parse('open 2 cap=(foo) / SVN foo%20client'), '') + self.assertEqual(self.result, (2, ['foo'], '/', 'SVN', 'foo client')) + + def test_reparent(self): + self.assertRaises(svn_server_log_parse.Error, self.parse, 'reparent') + self.assertEqual(self.parse('reparent /'), '') + self.assertEqual(self.result, ('/',)) + + def test_get_latest_rev(self): + self.assertEqual(self.parse('get-latest-rev'), '') + self.assertEqual(self.result, ()) + self.assertEqual(self.parse('get-latest-rev r3'), 'r3') + self.assertEqual(self.result, ()) + + def test_get_dated_rev(self): + self.assertRaises(svn_server_log_parse.Error, self.parse, + 'get-dated-rev') + self.assertEqual(self.parse('get-dated-rev 2008-04-15T20:41:24.000000Z'), '') + self.assertEqual(self.result, ('2008-04-15T20:41:24.000000Z',)) + + def test_commit(self): + self.assertRaises(svn_server_log_parse.Error, self.parse, 'commit') + self.assertRaises(svn_server_log_parse.Error, self.parse, 'commit 3') + self.assertEqual(self.parse('commit r3'), '') + self.assertEqual(self.result, (3,)) + self.assertEqual(self.parse('commit r3 leftover'), ' leftover') + self.assertEqual(self.result, (3,)) + + def test_get_dir(self): + self.get_dir_or_file('get-dir') + + def test_get_file(self): + self.get_dir_or_file('get-file') + + def get_dir_or_file(self, c): + self.assertRaises(svn_server_log_parse.Error, self.parse, c) + self.assertRaises(svn_server_log_parse.Error, self.parse, c + ' foo') + self.assertRaises(svn_server_log_parse.Error, self.parse, c + ' foo 3') + self.assertEqual(self.parse(c + ' /a/b/c r3 ...'), ' ...') + self.assertEqual(self.result, ('/a/b/c', 3, False, False)) + self.assertEqual(self.parse(c + ' / r3'), '') + self.assertEqual(self.result, ('/', 3, False, False)) + # path must be absolute + self.assertRaises(svn_server_log_parse.Error, + self.parse, c + ' a/b/c r3') + self.assertEqual(self.parse(c + ' /k r27 text'), '') + self.assertEqual(self.result, ('/k', 27, True, False)) + self.assertEqual(self.parse(c + ' /k r27 props'), '') + self.assertEqual(self.result, ('/k', 27, False, True)) + self.assertEqual(self.parse(c + ' /k r27 text props'), '') + self.assertEqual(self.result, ('/k', 27, True, True)) + # out of order not accepted + self.assertEqual(self.parse(c + ' /k r27 props text'), ' text') + self.assertEqual(self.result, ('/k', 27, False, True)) + + def test_lock(self): + self.assertRaises(svn_server_log_parse.Error, self.parse, 'lock') + self.parse('lock (/foo)') + self.assertEqual(self.result, (['/foo'], False)) + self.assertEqual(self.parse('lock (/foo) steal ...'), ' ...') + self.assertEqual(self.result, (['/foo'], True)) + self.assertEqual(self.parse('lock (/foo) stear'), ' stear') + + def test_change_rev_prop(self): + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'change-rev-prop r3') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'change-rev-prop r svn:log') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'change-rev-prop rX svn:log') + self.assertEqual(self.parse('change-rev-prop r3 svn:log ...'), ' ...') + self.assertEqual(self.result, (3, 'svn:log')) + + def test_rev_proplist(self): + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'rev-proplist') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'rev-proplist r') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'rev-proplist rX') + self.assertEqual(self.parse('rev-proplist r3 ...'), ' ...') + self.assertEqual(self.result, (3,)) + + def test_rev_prop(self): + self.assertRaises(svn_server_log_parse.Error, self.parse, 'rev-prop') + self.assertRaises(svn_server_log_parse.Error, self.parse, 'rev-prop r') + self.assertRaises(svn_server_log_parse.Error, self.parse, 'rev-prop rX') + self.assertEqual(self.parse('rev-prop r3 foo ...'), ' ...') + self.assertEqual(self.result, (3, 'foo')) + + def test_unlock(self): + self.assertRaises(svn_server_log_parse.Error, self.parse, 'unlock') + self.parse('unlock (/foo)') + self.assertEqual(self.result, (['/foo'], False)) + self.assertEqual(self.parse('unlock (/foo) break ...'), ' ...') + self.assertEqual(self.result, (['/foo'], True)) + self.assertEqual(self.parse('unlock (/foo) bear'), ' bear') + + def test_get_lock(self): + self.assertRaises(svn_server_log_parse.Error, self.parse, 'get-lock') + self.parse('get-lock /foo') + self.assertEqual(self.result, ('/foo',)) + + def test_get_locks(self): + self.assertRaises(svn_server_log_parse.Error, self.parse, 'get-locks') + self.parse('get-locks /foo') + self.assertEqual(self.result, ('/foo',)) + + def test_get_locations(self): + self.assertRaises(svn_server_log_parse.Error, self.parse, + 'get-locations') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'get-locations /foo 3') + self.assertEqual(self.parse('get-locations /foo (3 4) ...'), ' ...') + self.assertEqual(self.result, ('/foo', [3, 4])) + self.assertEqual(self.parse('get-locations /foo (3)'), '') + self.assertEqual(self.result, ('/foo', [3])) + + def test_get_location_segments(self): + self.assertRaises(svn_server_log_parse.Error, self.parse, + 'get-location-segments') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'get-location-segments /foo 3') + self.assertEqual(self.parse('get-location-segments /foo@2 r3:4'), '') + self.assertEqual(self.result, ('/foo', 2, 3, 4)) + + def test_get_file_revs(self): + self.assertRaises(svn_server_log_parse.Error, self.parse, 'get-file-revs') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'get-file-revs /foo 3') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'get-file-revs /foo 3:a') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'get-file-revs /foo r3:a') + self.assertEqual(self.parse('get-file-revs /foo r3:4 ...'), ' ...') + self.assertEqual(self.result, ('/foo', 3, 4, False)) + self.assertEqual(self.parse('get-file-revs /foo r3:4' + ' include-merged-revisions ...'), ' ...') + self.assertEqual(self.result, ('/foo', 3, 4, True)) + + def test_get_mergeinfo(self): + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'get-mergeinfo') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'get-mergeinfo /foo') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'get-mergeinfo (/foo') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'get-mergeinfo (/foo /bar') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'get-mergeinfo (/foo)') + self.assertRaises(svn_server_log_parse.BadMergeinfoInheritanceError, + self.parse, 'get-mergeinfo (/foo) bork') + self.assertEqual(self.parse('get-mergeinfo (/foo) explicit'), '') + self.assertEqual(self.result, (['/foo'], + svn.core.svn_mergeinfo_explicit, False)) + self.assertEqual(self.parse('get-mergeinfo (/foo /bar) inherited ...'), + ' ...') + self.assertEqual(self.result, (['/foo', '/bar'], + svn.core.svn_mergeinfo_inherited, False)) + self.assertEqual(self.result, (['/foo', '/bar'], + svn.core.svn_mergeinfo_inherited, False)) + + def test_log(self): + self.assertRaises(svn_server_log_parse.Error, self.parse, 'log') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'log /foo') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'log (/foo)') + self.assertEqual(self.parse('log (/foo) r3:4' + ' include-merged-revisions'), '') + self.assertEqual(self.result, + (['/foo'], 3, 4, 0, False, False, True, [])) + self.assertEqual(self.parse('log (/foo /bar) r3:4 revprops=all ...'), + ' ...') + self.assertEqual(self.result, + (['/foo', '/bar'], 3, 4, 0, False, False, False, None)) + self.assertEqual(self.parse('log (/foo) r3:4 revprops=(a b) ...'), + ' ...') + self.assertEqual(self.result, + (['/foo'], 3, 4, 0, False, False, False, ['a', 'b'])) + self.assertEqual(self.parse('log (/foo) r8:1 limit=3'), '') + self.assertEqual(self.result, + (['/foo'], 8, 1, 3, False, False, False, [])) + + def test_check_path(self): + self.assertRaises(svn_server_log_parse.Error, self.parse, 'check-path') + self.assertEqual(self.parse('check-path /foo@9'), '') + self.assertEqual(self.result, ('/foo', 9)) + + def test_stat(self): + self.assertRaises(svn_server_log_parse.Error, self.parse, 'stat') + self.assertEqual(self.parse('stat /foo@9'), '') + self.assertEqual(self.result, ('/foo', 9)) + + def test_replay(self): + self.assertRaises(svn_server_log_parse.Error, self.parse, 'replay') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'replay /foo') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'replay (/foo) r9') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'replay (/foo) r9:10') + self.assertEqual(self.parse('replay /foo r9'), '') + self.assertEqual(self.result, ('/foo', 9)) + + def test_checkout_or_export(self): + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'checkout-or-export') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'checkout-or-export /foo') + self.assertEqual(self.parse('checkout-or-export /foo r9'), '') + self.assertEqual(self.result, ('/foo', 9, svn.core.svn_depth_unknown)) + self.assertRaises(svn_server_log_parse.BadDepthError, self.parse, + 'checkout-or-export /foo r9 depth=INVALID-DEPTH') + self.assertRaises(svn_server_log_parse.BadDepthError, self.parse, + 'checkout-or-export /foo r9 depth=bork') + self.assertEqual(self.parse('checkout-or-export /foo r9 depth=files .'), + ' .') + self.assertEqual(self.result, ('/foo', 9, svn.core.svn_depth_files)) + + def test_diff_1path(self): + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'diff') + self.assertEqual(self.parse('diff /foo r9:10'), '') + self.assertEqual(self.result, ('/foo', 9, 10, + svn.core.svn_depth_unknown, False)) + self.assertEqual(self.parse('diff /foo r9:10' + ' ignore-ancestry ...'), ' ...') + self.assertEqual(self.result, ('/foo', 9, 10, + svn.core.svn_depth_unknown, True)) + self.assertEqual(self.parse('diff /foo r9:10 depth=files'), '') + self.assertEqual(self.result, ('/foo', 9, 10, + svn.core.svn_depth_files, False)) + + def test_diff_2paths(self): + self.assertEqual(self.parse('diff /foo@9 /bar@10'), '') + self.assertEqual(self.result, ('/foo', 9, '/bar', 10, + svn.core.svn_depth_unknown, False)) + self.assertEqual(self.parse('diff /foo@9 /bar@10' + ' ignore-ancestry ...'), ' ...') + self.assertEqual(self.result, ('/foo', 9, '/bar', 10, + svn.core.svn_depth_unknown, True)) + self.assertEqual(self.parse('diff /foo@9 /bar@10' + ' depth=files ignore-ancestry'), '') + self.assertEqual(self.result, ('/foo', 9, '/bar', 10, + svn.core.svn_depth_files, True)) + + def test_status(self): + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'status') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'status /foo') + self.assertEqual(self.parse('status /foo r9'), '') + self.assertEqual(self.result, ('/foo', 9, svn.core.svn_depth_unknown)) + self.assertRaises(svn_server_log_parse.BadDepthError, self.parse, + 'status /foo r9 depth=INVALID-DEPTH') + self.assertRaises(svn_server_log_parse.BadDepthError, self.parse, + 'status /foo r9 depth=bork') + self.assertEqual(self.parse('status /foo r9 depth=files .'), + ' .') + self.assertEqual(self.result, ('/foo', 9, svn.core.svn_depth_files)) + + def test_switch(self): + self.assertEqual(self.parse('switch /foo /bar@10 ...'), ' ...') + self.assertEqual(self.result, ('/foo', '/bar', 10, + svn.core.svn_depth_unknown)) + self.assertEqual(self.parse('switch /foo /bar@10' + ' depth=files'), '') + self.assertEqual(self.result, ('/foo', '/bar', 10, + svn.core.svn_depth_files)) + + def test_update(self): + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'update') + self.assertRaises(svn_server_log_parse.Error, + self.parse, 'update /foo') + self.assertEqual(self.parse('update /foo r9'), '') + self.assertEqual(self.result, ('/foo', 9, svn.core.svn_depth_unknown, + False)) + self.assertRaises(svn_server_log_parse.BadDepthError, self.parse, + 'update /foo r9 depth=INVALID-DEPTH') + self.assertRaises(svn_server_log_parse.BadDepthError, self.parse, + 'update /foo r9 depth=bork') + self.assertEqual(self.parse('update /foo r9 depth=files .'), ' .') + self.assertEqual(self.result, ('/foo', 9, svn.core.svn_depth_files, + False)) + self.assertEqual(self.parse('update /foo r9 send-copyfrom-args .'), + ' .') + self.assertEqual(self.result, ('/foo', 9, svn.core.svn_depth_unknown, + True)) + +if __name__ == '__main__': + if len(sys.argv) == 1: + # No arguments so run the unit tests. + unittest.main() + sys.stderr.write('unittest.main failed to exit\n') + sys.exit(2) + + # Use the argument as the path to a log file to test against. + + def uri_encode(s): + # urllib.parse.quote encodes :&@ characters, svn does not. + return urllib_parse_quote(s, safe='/:&@') + + # Define a class to reconstruct the SVN-ACTION string. + class Test(svn_server_log_parse.Parser): + def handle_unknown(self, line): + sys.stderr.write('unknown log line at %d:\n%s\n' % (self.linenum, + line)) + sys.exit(2) + + def handle_open(self, protocol, capabilities, path, ra_client, client): + capabilities = ' '.join(capabilities) + if ra_client is None: + ra_client = '-' + if client is None: + client = '-' + path = uri_encode(path) + self.action = ('open %d cap=(%s) %s %s %s' + % (protocol, capabilities, path, ra_client, client)) + + def handle_reparent(self, path): + path = uri_encode(path) + self.action = 'reparent ' + path + + def handle_get_latest_rev(self): + self.action = 'get-latest-rev' + + def handle_get_dated_rev(self, date): + self.action = 'get-dated-rev ' + date + + def handle_commit(self, revision): + self.action = 'commit r%d' % (revision,) + + def handle_get_dir(self, path, revision, text, props): + path = uri_encode(path) + self.action = 'get-dir %s r%d' % (path, revision) + if text: + self.action += ' text' + if props: + self.action += ' props' + + def handle_get_file(self, path, revision, text, props): + path = uri_encode(path) + self.action = 'get-file %s r%d' % (path, revision) + if text: + self.action += ' text' + if props: + self.action += ' props' + + def handle_lock(self, paths, steal): + paths = [uri_encode(x) for x in paths] + self.action = 'lock (%s)' % (' '.join(paths),) + if steal: + self.action += ' steal' + + def handle_change_rev_prop(self, revision, revprop): + revprop = uri_encode(revprop) + self.action = 'change-rev-prop r%d %s' % (revision, revprop) + + def handle_rev_prop(self, revision, revprop): + revprop = uri_encode(revprop) + self.action = 'rev-prop r%d %s' % (revision, revprop) + + def handle_rev_proplist(self, revision): + self.action = 'rev-proplist r%d' % (revision,) + + def handle_unlock(self, paths, break_lock): + paths = [uri_encode(x) for x in paths] + self.action = 'unlock (%s)' % (' '.join(paths),) + if break_lock: + self.action += ' break' + + def handle_get_lock(self, path): + path = uri_encode(path) + self.action = 'get-lock ' + path + + def handle_get_locks(self, path): + self.action = 'get-locks ' + path + path = uri_encode(path) + + def handle_get_locations(self, path, revisions): + path = uri_encode(path) + self.action = ('get-locations %s (%s)' + % (path, ' '.join([str(x) for x in revisions]))) + + def handle_get_location_segments(self, path, peg, left, right): + path = uri_encode(path) + self.action = 'get-location-segments %s@%d r%d:%d' % (path, peg, + left, right) + + def handle_get_file_revs(self, path, left, right, + include_merged_revisions): + path = uri_encode(path) + self.action = 'get-file-revs %s r%d:%d' % (path, left, right) + if include_merged_revisions: + self.action += ' include-merged-revisions' + + def handle_get_mergeinfo(self, paths, inheritance, include_descendants): + paths = [uri_encode(x) for x in paths] + self.action = ('get-mergeinfo (%s) %s' + % (' '.join(paths), + svn.core.svn_inheritance_to_word(inheritance))) + if include_descendants: + self.action += ' include-descendants' + + def handle_log(self, paths, left, right, limit, discover_changed_paths, + strict, include_merged_revisions, revprops): + paths = [uri_encode(x) for x in paths] + self.action = 'log (%s) r%d:%d' % (' '.join(paths), + left, right) + if limit != 0: + self.action += ' limit=%d' % (limit,) + if discover_changed_paths: + self.action += ' discover-changed-paths' + if strict: + self.action += ' strict' + if include_merged_revisions: + self.action += ' include-merged-revisions' + if revprops is None: + self.action += ' revprops=all' + elif len(revprops) > 0: + revprops = [uri_encode(x) for x in revprops] + self.action += ' revprops=(%s)' % (' '.join(revprops),) + + def handle_check_path(self, path, revision): + path = uri_encode(path) + self.action = 'check-path %s@%d' % (path, revision) + + def handle_stat(self, path, revision): + path = uri_encode(path) + self.action = 'stat %s@%d' % (path, revision) + + def handle_replay(self, path, revision): + path = uri_encode(path) + self.action = 'replay %s r%d' % (path, revision) + + def maybe_depth(self, depth): + if depth != svn.core.svn_depth_unknown: + self.action += ' depth=%s' % ( + svn.core.svn_depth_to_word(depth),) + + def handle_checkout_or_export(self, path, revision, depth): + path = uri_encode(path) + self.action = 'checkout-or-export %s r%d' % (path, revision) + self.maybe_depth(depth) + + def handle_diff_1path(self, path, left, right, + depth, ignore_ancestry): + path = uri_encode(path) + self.action = 'diff %s r%d:%d' % (path, left, right) + self.maybe_depth(depth) + if ignore_ancestry: + self.action += ' ignore-ancestry' + + def handle_diff_2paths(self, from_path, from_rev, + to_path, to_rev, + depth, ignore_ancestry): + from_path = uri_encode(from_path) + to_path = uri_encode(to_path) + self.action = ('diff %s@%d %s@%d' + % (from_path, from_rev, to_path, to_rev)) + self.maybe_depth(depth) + if ignore_ancestry: + self.action += ' ignore-ancestry' + + def handle_status(self, path, revision, depth): + path = uri_encode(path) + self.action = 'status %s r%d' % (path, revision) + self.maybe_depth(depth) + + def handle_switch(self, from_path, to_path, to_rev, depth): + from_path = uri_encode(from_path) + to_path = uri_encode(to_path) + self.action = ('switch %s %s@%d' + % (from_path, to_path, to_rev)) + self.maybe_depth(depth) + + def handle_update(self, path, revision, depth, send_copyfrom_args): + path = uri_encode(path) + self.action = 'update %s r%d' % (path, revision) + self.maybe_depth(depth) + if send_copyfrom_args: + self.action += ' send-copyfrom-args' + + tmp = tempfile.mktemp() + try: + fp = open(tmp, 'w') + parser = Test() + parser.linenum = 0 + log_file = sys.argv[1] + log_type = None + for line in open(log_file): + if log_type is None: + # Figure out which log type we have. + if re.match(r'\d+ \d\d\d\d-', line): + log_type = 'svnserve' + elif re.match(r'\[\d\d/', line): + log_type = 'mod_dav_svn' + else: + sys.stderr.write("unknown log format in '%s'" + % (log_file,)) + sys.exit(3) + sys.stderr.write('parsing %s log...\n' % (log_type,)) + sys.stderr.flush() + + words = line.split() + if log_type == 'svnserve': + # Skip over PID, date, client address, username, and repos. + if words[5].startswith('ERR'): + # Skip error lines. + fp.write(line) + continue + leading = ' '.join(words[:5]) + action = ' '.join(words[5:]) + else: + # Find the SVN-ACTION string from the CustomLog format + # davautocheck.sh uses. If that changes, this will need + # to as well. Currently it's + # %t %u %{SVN-REPOS-NAME}e %{SVN-ACTION}e + leading = ' '.join(words[:4]) + action = ' '.join(words[4:]) + + # Parse the action and write the reconstructed action to + # the temporary file. Ignore the returned trailing text, + # as we have none in the davautocheck ops log. + parser.linenum += 1 + try: + parser.parse(action) + except svn_server_log_parse.Error: + sys.stderr.write('error at line %d: %s\n' + % (parser.linenum, action)) + raise + fp.write(leading + ' ' + parser.action + '\n') + fp.close() + # Check differences between original and reconstructed files + # (should be identical). + result = os.spawnlp(os.P_WAIT, 'diff', 'diff', '-u', log_file, tmp) + if result == 0: + sys.stderr.write('OK\n') + sys.exit(result) + finally: + try: + os.unlink(tmp) + except Exception, e: + sys.stderr.write('os.unlink(tmp): %s\n' % (e,)) |