summaryrefslogtreecommitdiff
path: root/tools/server-side
diff options
context:
space:
mode:
authorLorry <lorry@roadtrain.codethink.co.uk>2012-08-22 14:29:52 +0100
committerLorry <lorry@roadtrain.codethink.co.uk>2012-08-22 14:29:52 +0100
commitf1bdf13786f0752c0846cf36f0d91e4fc6747929 (patch)
tree4223b2035bf2240d681a53822808b3c7f687b905 /tools/server-side
downloadsubversion-tarball-f1bdf13786f0752c0846cf36f0d91e4fc6747929.tar.gz
Tarball conversion
Diffstat (limited to 'tools/server-side')
-rwxr-xr-xtools/server-side/fsfs-reshard.py399
-rw-r--r--tools/server-side/mod_dontdothat/README53
-rw-r--r--tools/server-side/mod_dontdothat/mod_dontdothat.c661
-rwxr-xr-xtools/server-side/svn-backup-dumps.py692
-rw-r--r--tools/server-side/svn-populate-node-origins-index.c193
-rw-r--r--tools/server-side/svn-rep-sharing-stats.c535
-rwxr-xr-xtools/server-side/svn_server_log_parse.py460
-rw-r--r--tools/server-side/svnauthz-validate.c76
-rwxr-xr-xtools/server-side/test_svn_server_log_parse.py611
9 files changed, 3680 insertions, 0 deletions
diff --git a/tools/server-side/fsfs-reshard.py b/tools/server-side/fsfs-reshard.py
new file mode 100755
index 0000000..d039885
--- /dev/null
+++ b/tools/server-side/fsfs-reshard.py
@@ -0,0 +1,399 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# fsfs-reshard.py REPOS_PATH MAX_FILES_PER_SHARD
+#
+# Perform an offline conversion of an FSFS repository between linear (format
+# 2, usable by Subversion 1.4+) and sharded (format 3, usable by Subversion
+# 1.5+) layouts.
+#
+# The MAX_FILES_PER_SHARD argument specifies the maximum number of files
+# that will be stored in each shard (directory), or zero to specify a linear
+# layout. Subversion 1.5 uses a default value of 1000 files per shard.
+#
+# As the repository will not be valid while the conversion is in progress,
+# the repository administrator must ensure that access to the repository is
+# blocked for the duration of the conversion.
+#
+# In the event that the conversion is interrupted, the repository will be in
+# an inconsistent state. The repository administrator should then re-run
+# this tool to completion.
+#
+#
+# Note that, currently, resharding from one sharded layout to another is
+# likely to be an extremely slow process. To reshard, we convert from a
+# sharded to linear layout and then to the new sharded layout. The problem
+# is that the initial conversion to the linear layout triggers exactly the
+# same 'large number of files in a directory' problem that sharding is
+# intended to solve.
+#
+# ====================================================================
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# ====================================================================
+#
+# $HeadURL: http://svn.apache.org/repos/asf/subversion/branches/1.7.x/tools/server-side/fsfs-reshard.py $
+# $LastChangedDate: 2009-11-16 19:07:17 +0000 (Mon, 16 Nov 2009) $
+# $LastChangedBy: hwright $
+# $LastChangedRevision: 880911 $
+
+import os, stat, sys
+
+from errno import EEXIST
+
+def usage():
+ """Print a usage message and exit."""
+ print("""usage: %s REPOS_PATH MAX_FILES_PER_SHARD [START END]
+
+Perform an offline conversion of an FSFS repository between linear
+(readable by Subversion 1.4 or later) and sharded (readable by
+Subversion 1.5 or later) layouts.
+
+The MAX_FILES_PER_SHARD argument specifies the maximum number of
+files that will be stored in each shard (directory), or zero to
+specify a linear layout. Subversion 1.5 uses a default value of
+1000 files per shard.
+
+Convert revisions START through END inclusive if specified, or all
+revisions if unspecified.
+""" % sys.argv[0])
+ sys.exit(1)
+
+def incompatible_repos_format(repos_path, format):
+ """Print an error saying that REPOS_PATH is a repository with an
+ incompatible repository format FORMAT, then exit."""
+ sys.stderr.write("""error: unable to convert repository '%s'.
+
+This repository is not compatible with this tool. Valid
+repository formats are '3' or '5'; this repository is
+format '%s'.
+
+""" % (repos_path, format))
+ sys.stderr.flush()
+ sys.exit(1)
+
+def incompatible_fs_format(repos_path, format):
+ """Print an error saying that REPOS_PATH is a repository with an
+ incompatible filesystem format FORMAT, then exit."""
+ sys.stderr.write("""error: unable to convert repository '%s'.
+
+This repository contains a filesystem that is not compatible with
+this tool. Valid filesystem formats are '1', '2', or '3'; this
+repository contains a filesystem with format '%s'.
+
+""" % (repos_path, format))
+ sys.stderr.flush()
+ sys.exit(1)
+
+def unexpected_fs_format_options(repos_path):
+ """Print an error saying that REPOS_PATH is a repository with
+ unexpected filesystem format options, then exit."""
+ sys.stderr.write("""error: unable to convert repository '%s'.
+
+This repository contains a filesystem that appears to be invalid -
+there is unexpected data after the filesystem format number.
+
+""" % repos_path)
+ sys.stderr.flush()
+ sys.exit(1)
+
+def incompatible_fs_format_option(repos_path, option):
+ """Print an error saying that REPOS_PATH is a repository with an
+ incompatible filesystem format option OPTION, then exit."""
+ sys.stderr.write("""error: unable to convert repository '%s'.
+
+This repository contains a filesystem that is not compatible with
+this tool. This tool recognises the 'layout' option but the
+filesystem uses the '%s' option.
+
+""" % (repos_path, option))
+ sys.stderr.flush()
+ sys.exit(1)
+
+def warn_about_fs_format_1(repos_path, format_path):
+ """Print a warning saying that REPOS_PATH contains a format 1 FSFS
+ filesystem that we can't reconstruct, then exit."""
+ sys.stderr.write("""warning: conversion of '%s' will be one-way.
+
+This repository is currently readable by Subversion 1.1 or later.
+This tool can convert this repository to one that is readable by
+either Subversion 1.4 (or later) or Subversion 1.5 (or later),
+but it is not able to convert it back to the original format - a
+separate dump/load step would be required.
+
+If you would like to upgrade this repository anyway, delete the
+file '%s' and re-run this tool.
+
+""" % (repos_path, format_path))
+ sys.stderr.flush()
+ sys.exit(1)
+
+def check_repos_format(repos_path):
+ """Check that REPOS_PATH contains a repository with a suitable format;
+ print a message and exit if not."""
+ format_path = os.path.join(repos_path, 'format')
+ try:
+ format_file = open(format_path)
+ format = format_file.readline()
+ if not format.endswith('\n'):
+ incompatible_repos_format(repos_path, format + ' <missing newline>')
+ format = format.rstrip('\n')
+ if format == '3' or format == '5':
+ pass
+ else:
+ incompatible_repos_format(repos_path, format)
+ except IOError:
+ # In all likelihood, the file doesn't exist.
+ incompatible_repos_format(repos_path, '<unreadable>')
+
+def check_fs_format(repos_path):
+ """Check that REPOS_PATH contains a filesystem with a suitable format,
+ or that it contains no format file; print a message and exit if neither
+ is true. Return bool whether the filesystem is sharded."""
+ sharded = False
+ db_path = os.path.join(repos_path, 'db')
+ format_path = os.path.join(db_path, 'format')
+ try:
+ format_file = open(format_path)
+ format = format_file.readline()
+ if not format.endswith('\n'):
+ incompatible_fs_format(repos_path, format + ' <missing newline>')
+ format = format.rstrip('\n')
+ if format == '1':
+ # This is a format 1 (svndiff0 only) filesystem. We can upgrade it,
+ # but we can't downgrade again (since we can't uncompress any of the
+ # svndiff1 deltas that may have been written). Warn the user and exit.
+ warn_about_fs_format_1(repos_path, format_path)
+ if format == '2':
+ pass
+ elif format == '3':
+ pass
+ else:
+ incompatible_fs_format(repos_path, format)
+
+ for line in format_file:
+ if format == '2':
+ unexpected_fs_format_options(repos_path)
+
+ line = line.rstrip('\n')
+ if line == 'layout linear':
+ pass
+ elif line.startswith('layout sharded '):
+ sharded = True
+ else:
+ incompatible_fs_format_option(repos_path, line)
+
+ format_file.close()
+ except IOError:
+ # The format file might not exist if we've previously been interrupted,
+ # or if the user is following our advice about upgrading a format 1
+ # repository. In both cases, we'll just assume the format was
+ # compatible.
+ pass
+
+ return sharded
+
+def current_file(repos_path):
+ """Return triple of (revision, next_node_id, next_copy_id) from
+ REPOS_PATH/db/current ."""
+ return open(os.path.join(repos_path, 'db', 'current')).readline().split()
+
+def remove_fs_format(repos_path):
+ """Remove the filesystem format file for repository REPOS_PATH.
+ Do not raise an error if the file is already missing."""
+ format_path = os.path.join(repos_path, 'db', 'format')
+ try:
+ statinfo = os.stat(format_path)
+ except OSError:
+ # The file probably doesn't exist.
+ return
+
+ # On Windows, we need to ensure the file is writable before we can
+ # remove it.
+ os.chmod(format_path, statinfo.st_mode | stat.S_IWUSR)
+ os.remove(format_path)
+
+def write_fs_format(repos_path, contents):
+ """Write a new filesystem format file for repository REPOS_PATH containing
+ CONTENTS."""
+ format_path = os.path.join(repos_path, 'db', 'format')
+ f = open(format_path, 'wb')
+ f.write(contents)
+ f.close()
+ os.chmod(format_path, stat.S_IRUSR | stat.S_IRGRP)
+
+def linearise(path):
+ """Move all the files in subdirectories of PATH into PATH, and remove the
+ subdirectories. Handle conflicts between subdirectory names and files
+ contained in subdirectories by ensuring subdirectories have a '.shard'
+ suffix prior to moving (the files are assumed not to have this suffix.
+ Abort if a subdirectory is found to contain another subdirectory."""
+ # First enumerate all subdirectories of DIR and rename where necessary
+ # to include a .shard suffix.
+ for name in os.listdir(path):
+ if name.endswith('.shard'):
+ continue
+ subdir_path = os.path.join(path, name)
+ if not os.path.isdir(subdir_path):
+ continue
+ os.rename(subdir_path, subdir_path + '.shard')
+
+ # Now move all the subdirectory contents into the parent and remove
+ # the subdirectories.
+ for root_path, dirnames, filenames in os.walk(path):
+ if root_path == path:
+ continue
+ if len(dirnames) > 0:
+ sys.stderr.write("error: directory '%s' contains other unexpected directories.\n" \
+ % root_path)
+ sys.stderr.flush()
+ sys.exit(1)
+ for name in filenames:
+ from_path = os.path.join(root_path, name)
+ to_path = os.path.join(path, name)
+ os.rename(from_path, to_path)
+ os.rmdir(root_path)
+
+def shard(path, max_files_per_shard, start, end):
+ """Move the files for revisions START to END inclusive in PATH into
+ subdirectories of PATH named such that subdirectory '0' contains at most
+ MAX_FILES_PER_SHARD files, those named [0, MAX_FILES_PER_SHARD). Abort if
+ PATH is found to contain any entries with non-numeric names."""
+
+ tmp = path + '.reshard'
+ try:
+ os.mkdir(tmp)
+ except OSError, e:
+ if e.errno != EEXIST:
+ raise
+
+ # Move all entries into shards named N.shard.
+ for rev in range(start, end + 1):
+ name = str(rev)
+ shard = rev // max_files_per_shard
+ shard_name = str(shard) + '.shard'
+
+ from_path = os.path.join(path, name)
+ to_path = os.path.join(tmp, shard_name, name)
+ try:
+ os.rename(from_path, to_path)
+ except OSError:
+ # The most likely explanation is that the shard directory doesn't
+ # exist. Let's create it and retry the rename.
+ os.mkdir(os.path.join(tmp, shard_name))
+ os.rename(from_path, to_path)
+
+ # Now rename all the shards to remove the suffix.
+ skipped = 0
+ for name in os.listdir(tmp):
+ if not name.endswith('.shard'):
+ sys.stderr.write("warning: ignoring unexpected subdirectory '%s'.\n" \
+ % os.path.join(tmp, name))
+ sys.stderr.flush()
+ skipped += 1
+ continue
+ from_path = os.path.join(tmp, name)
+ to_path = os.path.join(path, os.path.basename(from_path)[:-6])
+ os.rename(from_path, to_path)
+ skipped == 0 and os.rmdir(tmp)
+
+def main():
+ if len(sys.argv) < 3:
+ usage()
+
+ repos_path = sys.argv[1]
+ max_files_per_shard = sys.argv[2]
+ try:
+ start = int(sys.argv[3])
+ end = int(sys.argv[4])
+ except IndexError:
+ start = 0
+ end = int(current_file(repos_path)[0])
+
+ # Validate the command-line arguments.
+ db_path = os.path.join(repos_path, 'db')
+ current_path = os.path.join(db_path, 'current')
+ if not os.path.exists(current_path):
+ sys.stderr.write("error: '%s' doesn't appear to be a Subversion FSFS repository.\n" \
+ % repos_path)
+ sys.stderr.flush()
+ sys.exit(1)
+
+ try:
+ max_files_per_shard = int(max_files_per_shard)
+ except ValueError, OverflowError:
+ sys.stderr.write("error: maximum files per shard ('%s') is not a valid number.\n" \
+ % max_files_per_shard)
+ sys.stderr.flush()
+ sys.exit(1)
+
+ if max_files_per_shard < 0:
+ sys.stderr.write("error: maximum files per shard ('%d') must not be negative.\n" \
+ % max_files_per_shard)
+ sys.stderr.flush()
+ sys.exit(1)
+
+ # Check the format of the repository.
+ check_repos_format(repos_path)
+ sharded = check_fs_format(repos_path)
+
+ # Let the user know what's going on.
+ if max_files_per_shard > 0:
+ print("Converting '%s' to a sharded structure with %d files per directory" \
+ % (repos_path, max_files_per_shard))
+ if sharded:
+ print('(will convert to a linear structure first)')
+ else:
+ print("Converting '%s' to a linear structure" % repos_path)
+
+ # Prevent access to the repository for the duration of the conversion.
+ # There's no clean way to do this, but since the format of the repository
+ # is indeterminate, let's remove the format file while we're converting.
+ print('- marking the repository as invalid')
+ remove_fs_format(repos_path)
+
+ # First, convert to a linear scheme (this makes recovery easier because
+ # it's easier to reason about the behaviour on restart).
+ if sharded:
+ print('- linearising db/revs')
+ linearise(os.path.join(repos_path, 'db', 'revs'))
+ print('- linearising db/revprops')
+ linearise(os.path.join(repos_path, 'db', 'revprops'))
+
+ if max_files_per_shard == 0:
+ # We're done. Stamp the filesystem with a format 2 db/format file.
+ print('- marking the repository as a valid linear repository')
+ write_fs_format(repos_path, '2\n')
+ else:
+ print('- sharding db/revs')
+ shard(os.path.join(repos_path, 'db', 'revs'), max_files_per_shard,
+ start, end)
+ print('- sharding db/revprops')
+ shard(os.path.join(repos_path, 'db', 'revprops'), max_files_per_shard,
+ start, end)
+
+ # We're done. Stamp the filesystem with a format 3 db/format file.
+ print('- marking the repository as a valid sharded repository')
+ write_fs_format(repos_path, '3\nlayout sharded %d\n' % max_files_per_shard)
+
+ print('- done.')
+ sys.exit(0)
+
+if __name__ == '__main__':
+ raise Exception("""This script is unfinished and not ready to be used on live data.
+ Trust us.""")
+ main()
diff --git a/tools/server-side/mod_dontdothat/README b/tools/server-side/mod_dontdothat/README
new file mode 100644
index 0000000..7d4fe36
--- /dev/null
+++ b/tools/server-side/mod_dontdothat/README
@@ -0,0 +1,53 @@
+mod_dontdothat is an Apache module that allows you to block specific types
+of Subversion requests. Specifically, it's designed to keep users from doing
+things that are particularly hard on the server, like checking out the root
+of the tree, or the tags or branches directories. It works by sticking an
+input filter in front of all REPORT requests and looking for dangerous types
+of requests. If it finds any, it returns a 403 Forbidden error.
+
+You can compile and install it via apxs:
+
+$ apxs -c \
+ -I$PREFIX/include/subversion-1 \
+ -L$PREFIX/lib -lsvn_subr-1
+ mod_dontdothat.c
+
+$ apxs -i -n dontdothat mod_dontdothat.la
+
+It is enabled via single httpd.conf directive, DontDoThatConfigFile:
+
+<Location /svn>
+ DAV svn
+ SVNParentPath /path/to/repositories
+ DontDoThatConfigFile /path/to/config.file
+ DontDoThatDisallowReplay off
+</Location>
+
+The file you give to DontDoThatConfigFile is a Subversion configuration file
+that contains the following sections.
+
+[recursive-actions]
+/*/trunk = allow
+/ = deny
+/* = deny
+/*/tags = deny
+/*/branches = deny
+/*/* = deny
+/*/*/tags = deny
+/*/*/branches = deny
+
+As you might guess, this defines a set of patterns that control what the
+user is not allowed to do. Anything with a 'deny' after it is denied, and
+as a fallback mechanism anything with an 'allow' after it is special cased
+to be allowed, even if it matches something that is denied.
+
+Note that the wildcard portions of a rule only swallow a single directory,
+so /* will match /foo, but not /foo/bar. They also must be at the end of
+a directory segment, so /foo* or /* are valid, but /*foo is not.
+
+These rules are applied to any recursive action, which basically means any
+Subversion command that goes through the update-report, like update, diff,
+checkout, merge, etc.
+
+The DontDoThatDisallowReplay option makes mod_dontdothat disallow
+replay requests, which is on by default.
diff --git a/tools/server-side/mod_dontdothat/mod_dontdothat.c b/tools/server-side/mod_dontdothat/mod_dontdothat.c
new file mode 100644
index 0000000..c7c6613
--- /dev/null
+++ b/tools/server-side/mod_dontdothat/mod_dontdothat.c
@@ -0,0 +1,661 @@
+/*
+ * mod_dontdothat.c: an Apache filter that allows you to return arbitrary
+ * errors for various types of Subversion requests.
+ *
+ * ====================================================================
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ * ====================================================================
+ */
+
+#include <httpd.h>
+#include <http_config.h>
+#include <http_protocol.h>
+#include <http_request.h>
+#include <http_log.h>
+#include <util_filter.h>
+#include <ap_config.h>
+#include <apr_strings.h>
+
+#include <expat.h>
+
+#include "mod_dav_svn.h"
+#include "svn_string.h"
+#include "svn_config.h"
+
+module AP_MODULE_DECLARE_DATA dontdothat_module;
+
+typedef struct dontdothat_config_rec {
+ const char *config_file;
+ const char *base_path;
+ int no_replay;
+} dontdothat_config_rec;
+
+static void *create_dontdothat_dir_config(apr_pool_t *pool, char *dir)
+{
+ dontdothat_config_rec *cfg = apr_pcalloc(pool, sizeof(*cfg));
+
+ cfg->base_path = dir;
+ cfg->no_replay = 1;
+
+ return cfg;
+}
+
+static const command_rec dontdothat_cmds[] =
+{
+ AP_INIT_TAKE1("DontDoThatConfigFile", ap_set_file_slot,
+ (void *) APR_OFFSETOF(dontdothat_config_rec, config_file),
+ OR_ALL,
+ "Text file containing actions to take for specific requests"),
+ AP_INIT_FLAG("DontDoThatDisallowReplay", ap_set_flag_slot,
+ (void *) APR_OFFSETOF(dontdothat_config_rec, no_replay),
+ OR_ALL, "Disallow replay requests as if they are other recursive requests."),
+ { NULL }
+};
+
+typedef enum parse_state_t {
+ STATE_BEGINNING,
+ STATE_IN_UPDATE,
+ STATE_IN_SRC_PATH,
+ STATE_IN_DST_PATH,
+ STATE_IN_RECURSIVE
+} parse_state_t;
+
+typedef struct dontdothat_filter_ctx {
+ /* Set to TRUE when we determine that the request is safe and should be
+ * allowed to continue. */
+ svn_boolean_t let_it_go;
+
+ /* Set to TRUE when we determine that the request is unsafe and should be
+ * stopped in its tracks. */
+ svn_boolean_t no_soup_for_you;
+
+ XML_Parser xmlp;
+
+ /* The current location in the REPORT body. */
+ parse_state_t state;
+
+ /* A buffer to hold CDATA we encounter. */
+ svn_stringbuf_t *buffer;
+
+ dontdothat_config_rec *cfg;
+
+ /* An array of wildcards that are special cased to be allowed. */
+ apr_array_header_t *allow_recursive_ops;
+
+ /* An array of wildcards where recursive operations are not allowed. */
+ apr_array_header_t *no_recursive_ops;
+
+ /* TRUE if a path has failed a test already. */
+ svn_boolean_t path_failed;
+
+ /* An error for when we're using this as a baton while parsing config
+ * files. */
+ svn_error_t *err;
+
+ /* The current request. */
+ request_rec *r;
+} dontdothat_filter_ctx;
+
+/* Return TRUE if wildcard WC matches path P, FALSE otherwise. */
+static svn_boolean_t
+matches(const char *wc, const char *p)
+{
+ for (;;)
+ {
+ switch (*wc)
+ {
+ case '*':
+ if (wc[1] != '/' && wc[1] != '\0')
+ abort(); /* This was checked for during parsing of the config. */
+
+ /* It's a wild card, so eat up until the next / in p. */
+ while (*p && p[1] != '/')
+ ++p;
+
+ /* If we ran out of p and we're out of wc then it matched. */
+ if (! *p)
+ {
+ if (wc[1] == '\0')
+ return TRUE;
+ else
+ return FALSE;
+ }
+ break;
+
+ case '\0':
+ if (*p != '\0')
+ /* This means we hit the end of wc without running out of p. */
+ return FALSE;
+ else
+ /* Or they were exactly the same length, so it's not lower. */
+ return TRUE;
+
+ default:
+ if (*wc != *p)
+ return FALSE; /* If we don't match, then move on to the next
+ * case. */
+ else
+ break;
+ }
+
+ ++wc;
+ ++p;
+
+ if (! *p && *wc)
+ return FALSE;
+ }
+}
+
+static svn_boolean_t
+is_this_legal(dontdothat_filter_ctx *ctx, const char *uri)
+{
+ const char *relative_path;
+ const char *cleaned_uri;
+ const char *repos_name;
+ int trailing_slash;
+ dav_error *derr;
+
+ /* Ok, so we need to skip past the scheme, host, etc. */
+ uri = ap_strstr_c(uri, "://");
+ if (uri)
+ uri = ap_strchr_c(uri + 3, '/');
+
+ if (uri)
+ {
+ const char *repos_path;
+
+ derr = dav_svn_split_uri(ctx->r,
+ uri,
+ ctx->cfg->base_path,
+ &cleaned_uri,
+ &trailing_slash,
+ &repos_name,
+ &relative_path,
+ &repos_path);
+ if (! derr)
+ {
+ int idx;
+
+ if (! repos_path)
+ repos_path = "";
+
+ repos_path = apr_psprintf(ctx->r->pool, "/%s", repos_path);
+
+ /* First check the special cases that are always legal... */
+ for (idx = 0; idx < ctx->allow_recursive_ops->nelts; ++idx)
+ {
+ const char *wc = APR_ARRAY_IDX(ctx->allow_recursive_ops,
+ idx,
+ const char *);
+
+ if (matches(wc, repos_path))
+ {
+ ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->r,
+ "mod_dontdothat: rule %s allows %s",
+ wc, repos_path);
+ return TRUE;
+ }
+ }
+
+ /* Then look for stuff we explicitly don't allow. */
+ for (idx = 0; idx < ctx->no_recursive_ops->nelts; ++idx)
+ {
+ const char *wc = APR_ARRAY_IDX(ctx->no_recursive_ops,
+ idx,
+ const char *);
+
+ if (matches(wc, repos_path))
+ {
+ ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->r,
+ "mod_dontdothat: rule %s forbids %s",
+ wc, repos_path);
+ return FALSE;
+ }
+ }
+ }
+ }
+
+ return TRUE;
+}
+
+static apr_status_t
+dontdothat_filter(ap_filter_t *f,
+ apr_bucket_brigade *bb,
+ ap_input_mode_t mode,
+ apr_read_type_e block,
+ apr_off_t readbytes)
+{
+ dontdothat_filter_ctx *ctx = f->ctx;
+ apr_status_t rv;
+ apr_bucket *e;
+
+ if (mode != AP_MODE_READBYTES)
+ return ap_get_brigade(f->next, bb, mode, block, readbytes);
+
+ rv = ap_get_brigade(f->next, bb, mode, block, readbytes);
+ if (rv)
+ return rv;
+
+ for (e = APR_BRIGADE_FIRST(bb);
+ e != APR_BRIGADE_SENTINEL(bb);
+ e = APR_BUCKET_NEXT(e))
+ {
+ svn_boolean_t last = APR_BUCKET_IS_EOS(e);
+ const char *str;
+ apr_size_t len;
+
+ if (last)
+ {
+ str = "";
+ len = 0;
+ }
+ else
+ {
+ rv = apr_bucket_read(e, &str, &len, APR_BLOCK_READ);
+ if (rv)
+ return rv;
+ }
+
+ if (! XML_Parse(ctx->xmlp, str, len, last))
+ {
+ /* let_it_go so we clean up our parser, no_soup_for_you so that we
+ * bail out before bothering to parse this stuff a second time. */
+ ctx->let_it_go = TRUE;
+ ctx->no_soup_for_you = TRUE;
+ }
+
+ /* If we found something that isn't allowed, set the correct status
+ * and return an error so it'll bail out before it gets anywhere it
+ * can do real damage. */
+ if (ctx->no_soup_for_you)
+ {
+ /* XXX maybe set up the SVN-ACTION env var so that it'll show up
+ * in the Subversion operational logs? */
+
+ ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r,
+ "mod_dontdothat: client broke the rules, "
+ "returning error");
+
+ /* Ok, pass an error bucket and an eos bucket back to the client.
+ *
+ * NOTE: The custom error string passed here doesn't seem to be
+ * used anywhere by httpd. This is quite possibly a bug.
+ *
+ * TODO: Try and pass back a custom document body containing a
+ * serialized svn_error_t so the client displays a better
+ * error message. */
+ bb = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
+ e = ap_bucket_error_create(403, "No Soup For You!",
+ f->r->pool, f->c->bucket_alloc);
+ APR_BRIGADE_INSERT_TAIL(bb, e);
+ e = apr_bucket_eos_create(f->c->bucket_alloc);
+ APR_BRIGADE_INSERT_TAIL(bb, e);
+
+ /* Don't forget to remove us, otherwise recursion blows the stack. */
+ ap_remove_input_filter(f);
+
+ return ap_pass_brigade(f->r->output_filters, bb);
+ }
+ else if (ctx->let_it_go || last)
+ {
+ ap_remove_input_filter(f);
+
+ ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r,
+ "mod_dontdothat: letting request go through");
+
+ return rv;
+ }
+ }
+
+ return rv;
+}
+
+static void
+cdata(void *baton, const char *data, int len)
+{
+ dontdothat_filter_ctx *ctx = baton;
+
+ if (ctx->no_soup_for_you || ctx->let_it_go)
+ return;
+
+ switch (ctx->state)
+ {
+ case STATE_IN_SRC_PATH:
+ /* FALLTHROUGH */
+
+ case STATE_IN_DST_PATH:
+ /* FALLTHROUGH */
+
+ case STATE_IN_RECURSIVE:
+ if (! ctx->buffer)
+ ctx->buffer = svn_stringbuf_ncreate(data, len, ctx->r->pool);
+ else
+ svn_stringbuf_appendbytes(ctx->buffer, data, len);
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void
+start_element(void *baton, const char *name, const char **attrs)
+{
+ dontdothat_filter_ctx *ctx = baton;
+ const char *sep;
+
+ if (ctx->no_soup_for_you || ctx->let_it_go)
+ return;
+
+ /* XXX Hack. We should be doing real namespace support, but for now we
+ * just skip ahead of any namespace prefix. If someone's sending us
+ * an update-report element outside of the SVN namespace they'll get
+ * what they deserve... */
+ sep = ap_strchr_c(name, ':');
+ if (sep)
+ name = sep + 1;
+
+ switch (ctx->state)
+ {
+ case STATE_BEGINNING:
+ if (strcmp(name, "update-report") == 0)
+ ctx->state = STATE_IN_UPDATE;
+ else if (strcmp(name, "replay-report") == 0 && ctx->cfg->no_replay)
+ {
+ /* XXX it would be useful if there was a way to override this
+ * on a per-user basis... */
+ if (! is_this_legal(ctx, ctx->r->unparsed_uri))
+ ctx->no_soup_for_you = TRUE;
+ else
+ ctx->let_it_go = TRUE;
+ }
+ else
+ ctx->let_it_go = TRUE;
+ break;
+
+ case STATE_IN_UPDATE:
+ if (strcmp(name, "src-path") == 0)
+ {
+ ctx->state = STATE_IN_SRC_PATH;
+ if (ctx->buffer)
+ ctx->buffer->len = 0;
+ }
+ else if (strcmp(name, "dst-path") == 0)
+ {
+ ctx->state = STATE_IN_DST_PATH;
+ if (ctx->buffer)
+ ctx->buffer->len = 0;
+ }
+ else if (strcmp(name, "recursive") == 0)
+ {
+ ctx->state = STATE_IN_RECURSIVE;
+ if (ctx->buffer)
+ ctx->buffer->len = 0;
+ }
+ else
+ ; /* XXX Figure out what else we need to deal with... Switch
+ * has that link-path thing we probably need to look out
+ * for... */
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void
+end_element(void *baton, const char *name)
+{
+ dontdothat_filter_ctx *ctx = baton;
+ const char *sep;
+
+ if (ctx->no_soup_for_you || ctx->let_it_go)
+ return;
+
+ /* XXX Hack. We should be doing real namespace support, but for now we
+ * just skip ahead of any namespace prefix. If someone's sending us
+ * an update-report element outside of the SVN namespace they'll get
+ * what they deserve... */
+ sep = ap_strchr_c(name, ':');
+ if (sep)
+ name = sep + 1;
+
+ switch (ctx->state)
+ {
+ case STATE_IN_SRC_PATH:
+ ctx->state = STATE_IN_UPDATE;
+
+ svn_stringbuf_strip_whitespace(ctx->buffer);
+
+ if (! ctx->path_failed && ! is_this_legal(ctx, ctx->buffer->data))
+ ctx->path_failed = TRUE;
+ break;
+
+ case STATE_IN_DST_PATH:
+ ctx->state = STATE_IN_UPDATE;
+
+ svn_stringbuf_strip_whitespace(ctx->buffer);
+
+ if (! ctx->path_failed && ! is_this_legal(ctx, ctx->buffer->data))
+ ctx->path_failed = TRUE;
+ break;
+
+ case STATE_IN_RECURSIVE:
+ ctx->state = STATE_IN_UPDATE;
+
+ svn_stringbuf_strip_whitespace(ctx->buffer);
+
+ /* If this isn't recursive we let it go. */
+ if (strcmp(ctx->buffer->data, "no") == 0)
+ {
+ ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->r,
+ "mod_dontdothat: letting nonrecursive request go");
+ ctx->let_it_go = TRUE;
+ }
+ break;
+
+ case STATE_IN_UPDATE:
+ if (strcmp(name, "update-report") == 0)
+ {
+ /* If we made it here without figuring out that this is
+ * nonrecursive, then the path check is our final word
+ * on the subject. */
+
+ if (ctx->path_failed)
+ ctx->no_soup_for_you = TRUE;
+ else
+ ctx->let_it_go = TRUE;
+ }
+ else
+ ; /* XXX Is there other stuff we care about? */
+ break;
+
+ default:
+ abort();
+ }
+}
+
+static svn_boolean_t
+is_valid_wildcard(const char *wc)
+{
+ while (*wc)
+ {
+ if (*wc == '*')
+ {
+ if (wc[1] && wc[1] != '/')
+ return FALSE;
+ }
+
+ ++wc;
+ }
+
+ return TRUE;
+}
+
+static svn_boolean_t
+config_enumerator(const char *wildcard,
+ const char *action,
+ void *baton,
+ apr_pool_t *pool)
+{
+ dontdothat_filter_ctx *ctx = baton;
+
+ if (strcmp(action, "deny") == 0)
+ {
+ if (is_valid_wildcard(wildcard))
+ APR_ARRAY_PUSH(ctx->no_recursive_ops, const char *) = wildcard;
+ else
+ ctx->err = svn_error_createf(APR_EINVAL,
+ NULL,
+ "'%s' is an invalid wildcard",
+ wildcard);
+ }
+ else if (strcmp(action, "allow") == 0)
+ {
+ if (is_valid_wildcard(wildcard))
+ APR_ARRAY_PUSH(ctx->allow_recursive_ops, const char *) = wildcard;
+ else
+ ctx->err = svn_error_createf(APR_EINVAL,
+ NULL,
+ "'%s' is an invalid wildcard",
+ wildcard);
+ }
+ else
+ {
+ ctx->err = svn_error_createf(APR_EINVAL,
+ NULL,
+ "'%s' is not a valid action",
+ action);
+ }
+
+ if (ctx->err)
+ return FALSE;
+ else
+ return TRUE;
+}
+
+static apr_status_t
+clean_up_parser(void *baton)
+{
+ XML_Parser xmlp = baton;
+
+ XML_ParserFree(xmlp);
+
+ return APR_SUCCESS;
+}
+
+static void
+dontdothat_insert_filters(request_rec *r)
+{
+ dontdothat_config_rec *cfg = ap_get_module_config(r->per_dir_config,
+ &dontdothat_module);
+
+ if (! cfg->config_file)
+ return;
+
+ if (strcmp("REPORT", r->method) == 0)
+ {
+ dontdothat_filter_ctx *ctx = apr_pcalloc(r->pool, sizeof(*ctx));
+ svn_config_t *config;
+ svn_error_t *err;
+
+ ctx->r = r;
+
+ ctx->cfg = cfg;
+
+ ctx->allow_recursive_ops = apr_array_make(r->pool, 5, sizeof(char *));
+
+ ctx->no_recursive_ops = apr_array_make(r->pool, 5, sizeof(char *));
+
+ /* XXX is there a way to error out from this point? Would be nice... */
+
+ err = svn_config_read(&config, cfg->config_file, TRUE, r->pool);
+ if (err)
+ {
+ char buff[256];
+
+ ap_log_rerror(APLOG_MARK, APLOG_ERR,
+ ((err->apr_err >= APR_OS_START_USERERR &&
+ err->apr_err < APR_OS_START_CANONERR) ?
+ 0 : err->apr_err),
+ r, "Failed to load DontDoThatConfigFile: %s",
+ svn_err_best_message(err, buff, sizeof(buff)));
+
+ svn_error_clear(err);
+
+ return;
+ }
+
+ svn_config_enumerate2(config,
+ "recursive-actions",
+ config_enumerator,
+ ctx,
+ r->pool);
+ if (ctx->err)
+ {
+ char buff[256];
+
+ ap_log_rerror(APLOG_MARK, APLOG_ERR,
+ ((ctx->err->apr_err >= APR_OS_START_USERERR &&
+ ctx->err->apr_err < APR_OS_START_CANONERR) ?
+ 0 : ctx->err->apr_err),
+ r, "Failed to parse DontDoThatConfigFile: %s",
+ svn_err_best_message(ctx->err, buff, sizeof(buff)));
+
+ svn_error_clear(ctx->err);
+
+ return;
+ }
+
+ ctx->state = STATE_BEGINNING;
+
+ ctx->xmlp = XML_ParserCreate(NULL);
+
+ apr_pool_cleanup_register(r->pool, ctx->xmlp,
+ clean_up_parser,
+ apr_pool_cleanup_null);
+
+ XML_SetUserData(ctx->xmlp, ctx);
+ XML_SetElementHandler(ctx->xmlp, start_element, end_element);
+ XML_SetCharacterDataHandler(ctx->xmlp, cdata);
+
+ ap_add_input_filter("DONTDOTHAT_FILTER", ctx, r, r->connection);
+ }
+}
+
+static void
+dontdothat_register_hooks(apr_pool_t *pool)
+{
+ ap_hook_insert_filter(dontdothat_insert_filters, NULL, NULL, APR_HOOK_FIRST);
+
+ ap_register_input_filter("DONTDOTHAT_FILTER",
+ dontdothat_filter,
+ NULL,
+ AP_FTYPE_RESOURCE);
+}
+
+module AP_MODULE_DECLARE_DATA dontdothat_module =
+{
+ STANDARD20_MODULE_STUFF,
+ create_dontdothat_dir_config,
+ NULL,
+ NULL,
+ NULL,
+ dontdothat_cmds,
+ dontdothat_register_hooks
+};
diff --git a/tools/server-side/svn-backup-dumps.py b/tools/server-side/svn-backup-dumps.py
new file mode 100755
index 0000000..bb6b235
--- /dev/null
+++ b/tools/server-side/svn-backup-dumps.py
@@ -0,0 +1,692 @@
+#!/usr/bin/env python
+#
+# svn-backup-dumps.py -- Create dumpfiles to backup a subversion repository.
+#
+# ====================================================================
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# ====================================================================
+#
+# This script creates dump files from a subversion repository.
+# It is intended for use in cron jobs and post-commit hooks.
+#
+# The basic operation modes are:
+# 1. Create a full dump (revisions 0 to HEAD).
+# 2. Create incremental dumps containing at most N revisions.
+# 3. Create incremental single revision dumps (for use in post-commit).
+# 4. Create incremental dumps containing everything since last dump.
+#
+# All dump files are prefixed with the basename of the repository. All
+# examples below assume that the repository '/srv/svn/repos/src' is
+# dumped so all dumpfiles start with 'src'.
+#
+# Optional functionality:
+# 5. Create gzipped dump files.
+# 6. Create bzipped dump files.
+# 7. Transfer the dumpfile to another host using ftp.
+# 8. Transfer the dumpfile to another host using smb.
+#
+# See also 'svn-backup-dumps.py -h'.
+#
+#
+# 1. Create a full dump (revisions 0 to HEAD).
+#
+# svn-backup-dumps.py <repos> <dumpdir>
+#
+# <repos> Path to the repository.
+# <dumpdir> Directory for storing the dump file.
+#
+# This creates a dump file named 'src.000000-NNNNNN.svndmp.gz'
+# where NNNNNN is the revision number of HEAD.
+#
+#
+# 2. Create incremental dumps containing at most N revisions.
+#
+# svn-backup-dumps.py -c <count> <repos> <dumpdir>
+#
+# <count> Count of revisions per dump file.
+# <repos> Path to the repository.
+# <dumpdir> Directory for storing the dump file.
+#
+# When started the first time with a count of 1000 and if HEAD is
+# at 2923 it creates the following files:
+#
+# src.000000-000999.svndmp.gz
+# src.001000-001999.svndmp.gz
+# src.002000-002923.svndmp.gz
+#
+# Say the next time HEAD is at 3045 it creates these two files:
+#
+# src.002000-002999.svndmp.gz
+# src.003000-003045.svndmp.gz
+#
+#
+# 3. Create incremental single revision dumps (for use in post-commit).
+#
+# svn-backup-dumps.py -r <revnr> <repos> <dumpdir>
+#
+# <revnr> A revision number.
+# <repos> Path to the repository.
+# <dumpdir> Directory for storing the dump file.
+#
+# This creates a dump file named 'src.NNNNNN.svndmp.gz' where
+# NNNNNN is the revision number of HEAD.
+#
+#
+# 4. Create incremental dumps relative to last dump
+#
+# svn-backup-dumps.py -i <repos> <dumpdir>
+#
+# <repos> Path to the repository.
+# <dumpdir> Directory for storing the dump file.
+#
+# When if dumps are performed when HEAD is 2923,
+# then when HEAD is 3045, is creates these files:
+#
+# src.000000-002923.svndmp.gz
+# src.002924-003045.svndmp.gz
+#
+#
+# 5. Create gzipped dump files.
+#
+# svn-backup-dumps.py -z ...
+#
+# ... More options, see 1-4, 7, 8.
+#
+#
+# 6. Create bzipped dump files.
+#
+# svn-backup-dumps.py -b ...
+#
+# ... More options, see 1-4, 7, 8.
+#
+#
+# 7. Transfer the dumpfile to another host using ftp.
+#
+# svn-backup-dumps.py -t ftp:<host>:<user>:<password>:<path> ...
+#
+# <host> Name of the FTP host.
+# <user> Username on the remote host.
+# <password> Password for the user.
+# <path> Subdirectory on the remote host.
+# ... More options, see 1-6.
+#
+# If <path> contains the string '%r' it is replaced by the
+# repository name (basename of the repository path).
+#
+#
+# 8. Transfer the dumpfile to another host using smb.
+#
+# svn-backup-dumps.py -t smb:<share>:<user>:<password>:<path> ...
+#
+# <share> Name of an SMB share in the form '//host/share'.
+# <user> Username on the remote host.
+# <password> Password for the user.
+# <path> Subdirectory of the share.
+# ... More options, see 1-6.
+#
+# If <path> contains the string '%r' it is replaced by the
+# repository name (basename of the repository path).
+#
+#
+#
+# TODO:
+# - find out how to report smbclient errors
+# - improve documentation
+#
+
+__version = "0.6"
+
+import sys
+import os
+if os.name != "nt":
+ import fcntl
+ import select
+import gzip
+import os.path
+import re
+from optparse import OptionParser
+from ftplib import FTP
+from subprocess import Popen, PIPE
+
+try:
+ import bz2
+ have_bz2 = True
+except ImportError:
+ have_bz2 = False
+
+
+class SvnBackupOutput:
+
+ def __init__(self, abspath, filename):
+ self.__filename = filename
+ self.__absfilename = os.path.join(abspath, filename)
+
+ def open(self):
+ pass
+
+ def write(self, data):
+ pass
+
+ def close(self):
+ pass
+
+ def get_filename(self):
+ return self.__filename
+
+ def get_absfilename(self):
+ return self.__absfilename
+
+
+class SvnBackupOutputPlain(SvnBackupOutput):
+
+ def __init__(self, abspath, filename):
+ SvnBackupOutput.__init__(self, abspath, filename)
+
+ def open(self):
+ self.__ofd = open(self.get_absfilename(), "wb")
+
+ def write(self, data):
+ self.__ofd.write(data)
+
+ def close(self):
+ self.__ofd.close()
+
+
+class SvnBackupOutputGzip(SvnBackupOutput):
+
+ def __init__(self, abspath, filename):
+ SvnBackupOutput.__init__(self, abspath, filename + ".gz")
+
+ def open(self):
+ self.__compressor = gzip.GzipFile(filename=self.get_absfilename(),
+ mode="wb")
+
+ def write(self, data):
+ self.__compressor.write(data)
+
+ def close(self):
+ self.__compressor.flush()
+ self.__compressor.close()
+
+
+class SvnBackupOutputBzip2(SvnBackupOutput):
+
+ def __init__(self, abspath, filename):
+ SvnBackupOutput.__init__(self, abspath, filename + ".bz2")
+
+ def open(self):
+ self.__compressor = bz2.BZ2Compressor()
+ self.__ofd = open(self.get_absfilename(), "wb")
+
+ def write(self, data):
+ self.__ofd.write(self.__compressor.compress(data))
+
+ def close(self):
+ self.__ofd.write(self.__compressor.flush())
+ self.__ofd.close()
+
+class SvnBackupOutputCommand(SvnBackupOutput):
+
+ def __init__(self, abspath, filename, file_extension, cmd_path,
+ cmd_options):
+ SvnBackupOutput.__init__(self, abspath, filename + file_extension)
+ self.__cmd_path = cmd_path
+ self.__cmd_options = cmd_options
+
+ def open(self):
+ cmd = [ self.__cmd_path, self.__cmd_options ]
+
+ self.__ofd = open(self.get_absfilename(), "wb")
+ try:
+ proc = Popen(cmd, stdin=PIPE, stdout=self.__ofd, shell=False)
+ except:
+ print (256, "", "Popen failed (%s ...):\n %s" % (cmd[0],
+ str(sys.exc_info()[1])))
+ sys.exit(256)
+ self.__proc = proc
+ self.__stdin = proc.stdin
+
+ def write(self, data):
+ self.__stdin.write(data)
+
+ def close(self):
+ self.__stdin.close()
+ rc = self.__proc.wait()
+ self.__ofd.close()
+
+class SvnBackupException(Exception):
+
+ def __init__(self, errortext):
+ self.errortext = errortext
+
+ def __str__(self):
+ return self.errortext
+
+class SvnBackup:
+
+ def __init__(self, options, args):
+ # need 3 args: progname, reposname, dumpdir
+ if len(args) != 3:
+ if len(args) < 3:
+ raise SvnBackupException("too few arguments, specify"
+ " repospath and dumpdir.\nuse -h or"
+ " --help option to see help.")
+ else:
+ raise SvnBackupException("too many arguments, specify"
+ " repospath and dumpdir only.\nuse"
+ " -h or --help option to see help.")
+ self.__repospath = args[1]
+ self.__dumpdir = args[2]
+ # check repospath
+ rpathparts = os.path.split(self.__repospath)
+ if len(rpathparts[1]) == 0:
+ # repospath without trailing slash
+ self.__repospath = rpathparts[0]
+ if not os.path.exists(self.__repospath):
+ raise SvnBackupException("repos '%s' does not exist." % self.__repospath)
+ if not os.path.isdir(self.__repospath):
+ raise SvnBackupException("repos '%s' is not a directory." % self.__repospath)
+ for subdir in [ "db", "conf", "hooks" ]:
+ dir = os.path.join(self.__repospath, subdir)
+ if not os.path.isdir(dir):
+ raise SvnBackupException("repos '%s' is not a repository." % self.__repospath)
+ rpathparts = os.path.split(self.__repospath)
+ self.__reposname = rpathparts[1]
+ if self.__reposname in [ "", ".", ".." ]:
+ raise SvnBackupException("couldn't extract repos name from '%s'." % self.__repospath)
+ # check dumpdir
+ if not os.path.exists(self.__dumpdir):
+ raise SvnBackupException("dumpdir '%s' does not exist." % self.__dumpdir)
+ elif not os.path.isdir(self.__dumpdir):
+ raise SvnBackupException("dumpdir '%s' is not a directory." % self.__dumpdir)
+ # set options
+ self.__rev_nr = options.rev
+ self.__count = options.cnt
+ self.__quiet = options.quiet
+ self.__deltas = options.deltas
+ self.__relative_incremental = options.relative_incremental
+
+ # svnadmin/svnlook path
+ self.__svnadmin_path = "svnadmin"
+ if options.svnadmin_path:
+ self.__svnadmin_path = options.svnadmin_path
+ self.__svnlook_path = "svnlook"
+ if options.svnlook_path:
+ self.__svnlook_path = options.svnlook_path
+
+ # check compress option
+ self.__gzip_path = options.gzip_path
+ self.__bzip2_path = options.bzip2_path
+ self.__zip = None
+ compress_options = 0
+ if options.gzip_path != None:
+ compress_options = compress_options + 1
+ if options.bzip2_path != None:
+ compress_options = compress_options + 1
+ if options.bzip2:
+ compress_options = compress_options + 1
+ self.__zip = "bzip2"
+ if options.gzip:
+ compress_options = compress_options + 1
+ self.__zip = "gzip"
+ if compress_options > 1:
+ raise SvnBackupException("--bzip2-path, --gzip-path, -b, -z are "
+ "mutually exclusive.")
+
+ self.__overwrite = False
+ self.__overwrite_all = False
+ if options.overwrite > 0:
+ self.__overwrite = True
+ if options.overwrite > 1:
+ self.__overwrite_all = True
+ self.__transfer = None
+ if options.transfer != None:
+ self.__transfer = options.transfer.split(":")
+ if len(self.__transfer) != 5:
+ if len(self.__transfer) < 5:
+ raise SvnBackupException("too few fields for transfer '%s'." % self.__transfer)
+ else:
+ raise SvnBackupException("too many fields for transfer '%s'." % self.__transfer)
+ if self.__transfer[0] not in [ "ftp", "smb" ]:
+ raise SvnBackupException("unknown transfer method '%s'." % self.__transfer[0])
+
+ def set_nonblock(self, fileobj):
+ fd = fileobj.fileno()
+ n = fcntl.fcntl(fd, fcntl.F_GETFL)
+ fcntl.fcntl(fd, fcntl.F_SETFL, n|os.O_NONBLOCK)
+
+ def exec_cmd(self, cmd, output=None, printerr=False):
+ if os.name == "nt":
+ return self.exec_cmd_nt(cmd, output, printerr)
+ else:
+ return self.exec_cmd_unix(cmd, output, printerr)
+
+ def exec_cmd_unix(self, cmd, output=None, printerr=False):
+ try:
+ proc = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=False)
+ except:
+ return (256, "", "Popen failed (%s ...):\n %s" % (cmd[0],
+ str(sys.exc_info()[1])))
+ stdout = proc.stdout
+ stderr = proc.stderr
+ self.set_nonblock(stdout)
+ self.set_nonblock(stderr)
+ readfds = [ stdout, stderr ]
+ selres = select.select(readfds, [], [])
+ bufout = ""
+ buferr = ""
+ while len(selres[0]) > 0:
+ for fd in selres[0]:
+ buf = fd.read(16384)
+ if len(buf) == 0:
+ readfds.remove(fd)
+ elif fd == stdout:
+ if output:
+ output.write(buf)
+ else:
+ bufout += buf
+ else:
+ if printerr:
+ sys.stdout.write("%s " % buf)
+ else:
+ buferr += buf
+ if len(readfds) == 0:
+ break
+ selres = select.select(readfds, [], [])
+ rc = proc.wait()
+ if printerr:
+ print("")
+ return (rc, bufout, buferr)
+
+ def exec_cmd_nt(self, cmd, output=None, printerr=False):
+ try:
+ proc = Popen(cmd, stdout=PIPE, stderr=None, shell=False)
+ except:
+ return (256, "", "Popen failed (%s ...):\n %s" % (cmd[0],
+ str(sys.exc_info()[1])))
+ stdout = proc.stdout
+ bufout = ""
+ buferr = ""
+ buf = stdout.read(16384)
+ while len(buf) > 0:
+ if output:
+ output.write(buf)
+ else:
+ bufout += buf
+ buf = stdout.read(16384)
+ rc = proc.wait()
+ return (rc, bufout, buferr)
+
+ def get_head_rev(self):
+ cmd = [ self.__svnlook_path, "youngest", self.__repospath ]
+ r = self.exec_cmd(cmd)
+ if r[0] == 0 and len(r[2]) == 0:
+ return int(r[1].strip())
+ else:
+ print(r[2])
+ return -1
+
+ def get_last_dumped_rev(self):
+ filename_regex = re.compile("(.+)\.\d+-(\d+)\.svndmp.*")
+ # start with -1 so the next one will be rev 0
+ highest_rev = -1
+
+ for filename in os.listdir(self.__dumpdir):
+ m = filename_regex.match( filename )
+ if m and (m.group(1) == self.__reposname):
+ rev_end = int(m.group(2))
+
+ if rev_end > highest_rev:
+ # determine the latest revision dumped
+ highest_rev = rev_end
+
+ return highest_rev
+
+ def transfer_ftp(self, absfilename, filename):
+ rc = False
+ try:
+ host = self.__transfer[1]
+ user = self.__transfer[2]
+ passwd = self.__transfer[3]
+ destdir = self.__transfer[4].replace("%r", self.__reposname)
+ ftp = FTP(host, user, passwd)
+ ftp.cwd(destdir)
+ ifd = open(absfilename, "rb")
+ ftp.storbinary("STOR %s" % filename, ifd)
+ ftp.quit()
+ rc = len(ifd.read(1)) == 0
+ ifd.close()
+ except Exception, e:
+ raise SvnBackupException("ftp transfer failed:\n file: '%s'\n error: %s" % \
+ (absfilename, str(e)))
+ return rc
+
+ def transfer_smb(self, absfilename, filename):
+ share = self.__transfer[1]
+ user = self.__transfer[2]
+ passwd = self.__transfer[3]
+ if passwd == "":
+ passwd = "-N"
+ destdir = self.__transfer[4].replace("%r", self.__reposname)
+ cmd = ("smbclient", share, "-U", user, passwd, "-D", destdir,
+ "-c", "put %s %s" % (absfilename, filename))
+ r = self.exec_cmd(cmd)
+ rc = r[0] == 0
+ if not rc:
+ print(r[2])
+ return rc
+
+ def transfer(self, absfilename, filename):
+ if self.__transfer == None:
+ return
+ elif self.__transfer[0] == "ftp":
+ self.transfer_ftp(absfilename, filename)
+ elif self.__transfer[0] == "smb":
+ self.transfer_smb(absfilename, filename)
+ else:
+ print("unknown transfer method '%s'." % self.__transfer[0])
+
+ def create_dump(self, checkonly, overwrite, fromrev, torev=None):
+ revparam = "%d" % fromrev
+ r = "%06d" % fromrev
+ if torev != None:
+ revparam += ":%d" % torev
+ r += "-%06d" % torev
+ filename = "%s.%s.svndmp" % (self.__reposname, r)
+ output = None
+ if self.__bzip2_path:
+ output = SvnBackupOutputCommand(self.__dumpdir, filename, ".bz2",
+ self.__bzip2_path, "-cz" )
+ elif self.__gzip_path:
+ output = SvnBackupOutputCommand(self.__dumpdir, filename, ".gz",
+ self.__gzip_path, "-cf" )
+ elif self.__zip:
+ if self.__zip == "gzip":
+ output = SvnBackupOutputGzip(self.__dumpdir, filename)
+ else:
+ output = SvnBackupOutputBzip2(self.__dumpdir, filename)
+ else:
+ output = SvnBackupOutputPlain(self.__dumpdir, filename)
+ absfilename = output.get_absfilename()
+ realfilename = output.get_filename()
+ if checkonly:
+ return os.path.exists(absfilename)
+ elif os.path.exists(absfilename):
+ if overwrite:
+ print("overwriting " + absfilename)
+ else:
+ print("%s already exists." % absfilename)
+ return True
+ else:
+ print("writing " + absfilename)
+ cmd = [ self.__svnadmin_path, "dump",
+ "--incremental", "-r", revparam, self.__repospath ]
+ if self.__quiet:
+ cmd[2:2] = [ "-q" ]
+ if self.__deltas:
+ cmd[2:2] = [ "--deltas" ]
+ output.open()
+ r = self.exec_cmd(cmd, output, True)
+ output.close()
+ rc = r[0] == 0
+ if rc:
+ self.transfer(absfilename, realfilename)
+ return rc
+
+ def export_single_rev(self):
+ return self.create_dump(False, self.__overwrite, self.__rev_nr)
+
+ def export(self):
+ headrev = self.get_head_rev()
+ if headrev == -1:
+ return False
+ if self.__count is None:
+ return self.create_dump(False, self.__overwrite, 0, headrev)
+ baserev = headrev - (headrev % self.__count)
+ rc = True
+ cnt = self.__count
+ fromrev = baserev - cnt
+ torev = baserev - 1
+ while fromrev >= 0 and rc:
+ if self.__overwrite_all or \
+ not self.create_dump(True, False, fromrev, torev):
+ rc = self.create_dump(False, self.__overwrite_all,
+ fromrev, torev)
+ fromrev -= cnt
+ torev -= cnt
+ else:
+ fromrev = -1
+ if rc:
+ rc = self.create_dump(False, self.__overwrite, baserev, headrev)
+ return rc
+
+ def export_relative_incremental(self):
+ headrev = self.get_head_rev()
+ if headrev == -1:
+ return False
+
+ last_dumped_rev = self.get_last_dumped_rev();
+ if headrev < last_dumped_rev:
+ # that should not happen...
+ return False
+
+ if headrev == last_dumped_rev:
+ # already up-to-date
+ return True
+
+ return self.create_dump(False, False, last_dumped_rev + 1, headrev)
+
+ def execute(self):
+ if self.__rev_nr != None:
+ return self.export_single_rev()
+ elif self.__relative_incremental:
+ return self.export_relative_incremental()
+ else:
+ return self.export()
+
+
+if __name__ == "__main__":
+ usage = "usage: svn-backup-dumps.py [options] repospath dumpdir"
+ parser = OptionParser(usage=usage, version="%prog "+__version)
+ if have_bz2:
+ parser.add_option("-b",
+ action="store_true",
+ dest="bzip2", default=False,
+ help="compress the dump using python bzip2 library.")
+ parser.add_option("-i",
+ action="store_true",
+ dest="relative_incremental", default=False,
+ help="perform incremental relative to last dump.")
+ parser.add_option("--deltas",
+ action="store_true",
+ dest="deltas", default=False,
+ help="pass --deltas to svnadmin dump.")
+ parser.add_option("-c",
+ action="store", type="int",
+ dest="cnt", default=None,
+ help="count of revisions per dumpfile.")
+ parser.add_option("-o",
+ action="store_const", const=1,
+ dest="overwrite", default=0,
+ help="overwrite files.")
+ parser.add_option("-O",
+ action="store_const", const=2,
+ dest="overwrite", default=0,
+ help="overwrite all files.")
+ parser.add_option("-q",
+ action="store_true",
+ dest="quiet", default=False,
+ help="quiet.")
+ parser.add_option("-r",
+ action="store", type="int",
+ dest="rev", default=None,
+ help="revision number for single rev dump.")
+ parser.add_option("-t",
+ action="store", type="string",
+ dest="transfer", default=None,
+ help="transfer dumps to another machine "+
+ "(s.a. --help-transfer).")
+ parser.add_option("-z",
+ action="store_true",
+ dest="gzip", default=False,
+ help="compress the dump using python gzip library.")
+ parser.add_option("--bzip2-path",
+ action="store", type="string",
+ dest="bzip2_path", default=None,
+ help="compress the dump using bzip2 custom command.")
+ parser.add_option("--gzip-path",
+ action="store", type="string",
+ dest="gzip_path", default=None,
+ help="compress the dump using gzip custom command.")
+ parser.add_option("--svnadmin-path",
+ action="store", type="string",
+ dest="svnadmin_path", default=None,
+ help="svnadmin command path.")
+ parser.add_option("--svnlook-path",
+ action="store", type="string",
+ dest="svnlook_path", default=None,
+ help="svnlook command path.")
+ parser.add_option("--help-transfer",
+ action="store_true",
+ dest="help_transfer", default=False,
+ help="shows detailed help for the transfer option.")
+ (options, args) = parser.parse_args(sys.argv)
+ if options.help_transfer:
+ print("Transfer help:")
+ print("")
+ print(" FTP:")
+ print(" -t ftp:<host>:<user>:<password>:<dest-path>")
+ print("")
+ print(" SMB (using smbclient):")
+ print(" -t smb:<share>:<user>:<password>:<dest-path>")
+ print("")
+ sys.exit(0)
+ rc = False
+ try:
+ backup = SvnBackup(options, args)
+ rc = backup.execute()
+ except SvnBackupException, e:
+ print("svn-backup-dumps.py: %s" % e)
+ if rc:
+ print("Everything OK.")
+ sys.exit(0)
+ else:
+ print("An error occured!")
+ sys.exit(1)
+
+# vim:et:ts=4:sw=4
diff --git a/tools/server-side/svn-populate-node-origins-index.c b/tools/server-side/svn-populate-node-origins-index.c
new file mode 100644
index 0000000..b9762c4
--- /dev/null
+++ b/tools/server-side/svn-populate-node-origins-index.c
@@ -0,0 +1,193 @@
+/*
+ * svn-populate-node-origins-index.c : Populate the repository's node
+ * origins index.
+ *
+ * ====================================================================
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ * ====================================================================
+ */
+
+#include "svn_cmdline.h"
+#include "svn_error.h"
+#include "svn_fs.h"
+#include "svn_path.h"
+#include "svn_pools.h"
+#include "svn_repos.h"
+#include "svn_utf.h"
+
+/* Used to terminate lines in large multi-line string literals. */
+#define NL APR_EOL_STR
+
+static const char *usage_summary =
+ "Crawl the Subversion repository located at REPOS-PATH in an attempt to" NL
+ "populate that repository's index of node origins. " NL
+ "" NL
+ "The node origins index is new as of Subversion 1.5, and behaves as a" NL
+ "cache to vastly speed up certain history-querying operations. For" NL
+ "compatibility with repositories created with pre-1.5 versions of" NL
+ "Subversion, Subversion will gracefully handle cache misses by doing a" NL
+ "brute-force calculation of the query answer and lazily populating the" NL
+ "index with answers it calculates. Unfortunately, calculating that" NL
+ "information using the brute-force method (instead of having the" NL
+ "information appear in the index organically) can be very costly." NL
+ "" NL
+ "This tool triggers the lazy index population logic built into" NL
+ "Subversion in a fashion far more efficient than is likely to happen" NL
+ "during typical repository usage. It can be run while the repository" NL
+ "is online, too, without interrupting normal Subversion activities." NL;
+
+/* Print a usage message for this program (PROGNAME), possibly with an
+ error message ERR_MSG, if not NULL. */
+static void
+usage_maybe_with_err(const char *progname, const char *err_msg)
+{
+ FILE *out;
+
+ out = err_msg ? stderr : stdout;
+ fprintf(out, "Usage: %s REPOS-PATH\n\n%s", progname, usage_summary);
+ if (err_msg)
+ fprintf(out, "\nERROR: %s\n", err_msg);
+}
+
+/* Build the node-origins index any newly added items introduced in
+ REVISION in FS. Set *COUNT to the number of new items found. */
+static svn_error_t *
+index_revision_adds(int *count, svn_fs_t *fs,
+ svn_revnum_t revision, apr_pool_t *pool)
+{
+ svn_fs_root_t *root;
+ apr_hash_t *changes;
+ apr_hash_index_t *hi;
+ apr_pool_t *subpool;
+
+ *count = 0;
+ SVN_ERR(svn_fs_revision_root(&root, fs, revision, pool));
+ SVN_ERR(svn_fs_paths_changed2(&changes, root, pool));
+
+ /* No paths changed in this revision? Nothing to do. */
+ if (apr_hash_count(changes) == 0)
+ return SVN_NO_ERROR;
+
+ subpool = svn_pool_create(pool);
+ for (hi = apr_hash_first(pool, changes); hi; hi = apr_hash_next(hi))
+ {
+ const void *path;
+ void *val;
+ svn_fs_path_change2_t *change;
+
+ svn_pool_clear(subpool);
+ apr_hash_this(hi, &path, NULL, &val);
+ change = val;
+ if ((change->change_kind == svn_fs_path_change_add)
+ || (change->change_kind == svn_fs_path_change_replace))
+ {
+ if (! (change->copyfrom_path
+ && SVN_IS_VALID_REVNUM(change->copyfrom_rev)))
+ {
+ svn_revnum_t origin;
+ SVN_ERR(svn_fs_node_origin_rev(&origin, root, path, subpool));
+ (*count)++;
+ }
+ }
+ }
+ svn_pool_destroy(subpool);
+
+ return SVN_NO_ERROR;
+}
+
+/* Build the node-origins index for the repository located at REPOS_PATH. */
+static svn_error_t *
+build_index(const char *repos_path, apr_pool_t *pool)
+{
+ svn_repos_t *repos;
+ svn_fs_t *fs;
+ svn_revnum_t youngest_rev, i;
+ size_t slotsize;
+ const char *progress_fmt;
+ apr_pool_t *subpool;
+
+ /* Open the repository. */
+ SVN_ERR(svn_repos_open2(&repos, repos_path, NULL, pool));
+
+ /* Get a filesystem object. */
+ fs = svn_repos_fs(repos);
+
+ /* Fetch the youngest revision of the repository. */
+ SVN_ERR(svn_fs_youngest_rev(&youngest_rev, fs, pool));
+ slotsize = strlen(apr_ltoa(pool, youngest_rev));
+ progress_fmt = apr_psprintf
+ (pool,
+ "[%%%" APR_SIZE_T_FMT "ld"
+ "/%%%" APR_SIZE_T_FMT "ld] "
+ "Found %%d new lines of history."
+ "\n", slotsize, slotsize);
+
+ /* Now, iterate over all the revisions, calling index_revision_adds(). */
+ subpool = svn_pool_create(pool);
+ for (i = 0; i < youngest_rev; i++)
+ {
+ int count;
+ svn_pool_clear(subpool);
+ SVN_ERR(index_revision_adds(&count, fs, i + 1, subpool));
+ printf(progress_fmt, i + 1, youngest_rev, count);
+ }
+ svn_pool_destroy(subpool);
+
+ return SVN_NO_ERROR;
+}
+
+
+int
+main(int argc, const char **argv)
+{
+ apr_pool_t *pool;
+ svn_error_t *err = SVN_NO_ERROR;
+ const char *repos_path;
+
+ /* Initialize the app. Send all error messages to 'stderr'. */
+ if (svn_cmdline_init(argv[0], stderr) == EXIT_FAILURE)
+ return EXIT_FAILURE;
+
+ pool = svn_pool_create(NULL);
+
+ if (argc <= 1)
+ {
+ usage_maybe_with_err(argv[0], "Not enough arguments.");
+ goto cleanup;
+ }
+
+ /* Convert argv[1] into a UTF8, internal-format, canonicalized path. */
+ if ((err = svn_utf_cstring_to_utf8(&repos_path, argv[1], pool)))
+ goto cleanup;
+ repos_path = svn_dirent_internal_style(repos_path, pool);
+ repos_path = svn_dirent_canonicalize(repos_path, pool);
+
+ if ((err = build_index(repos_path, pool)))
+ goto cleanup;
+
+ cleanup:
+ svn_pool_destroy(pool);
+
+ if (err)
+ {
+ svn_handle_error2(err, stderr, FALSE,
+ "svn-populate-node-origins-index: ");
+ return EXIT_FAILURE;
+ }
+ return EXIT_SUCCESS;
+}
diff --git a/tools/server-side/svn-rep-sharing-stats.c b/tools/server-side/svn-rep-sharing-stats.c
new file mode 100644
index 0000000..e57ff91
--- /dev/null
+++ b/tools/server-side/svn-rep-sharing-stats.c
@@ -0,0 +1,535 @@
+/*
+ * ====================================================================
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ * ====================================================================
+ */
+
+#include <apr_signal.h>
+
+#include "svn_cmdline.h"
+#include "svn_dirent_uri.h"
+#include "svn_pools.h"
+#include "svn_repos.h"
+#include "svn_opt.h"
+#include "svn_utf.h"
+#include "svn_version.h"
+
+#include "../../subversion/libsvn_fs_fs/fs.h"
+#include "../../subversion/libsvn_fs_fs/fs_fs.h"
+/* for svn_fs_fs__id_* (used in assertions only) */
+#include "../../subversion/libsvn_fs_fs/id.h"
+
+#include "svn_private_config.h"
+
+
+/** Help messages and version checking. **/
+
+static svn_error_t *
+version(apr_pool_t *pool)
+{
+ return svn_opt_print_help3(NULL, "svn-rep-sharing-stats", TRUE, FALSE, NULL,
+ NULL, NULL, NULL, NULL, NULL, pool);
+}
+
+static void
+usage(apr_pool_t *pool)
+{
+ svn_error_clear(svn_cmdline_fprintf
+ (stderr, pool,
+ _("Type 'svn-rep-sharing-stats --help' for usage.\n")));
+}
+
+
+static void
+help(const apr_getopt_option_t *options, apr_pool_t *pool)
+{
+ svn_error_clear
+ (svn_cmdline_fprintf
+ (stdout, pool,
+ _("usage: svn-rep-sharing-stats [OPTIONS] REPOS_PATH\n\n"
+ " Prints the reference count statistics for representations\n"
+ " in an FSFS repository.\n"
+ "\n"
+ " At least one of the options --data/--prop/--both must be specified.\n"
+ "\n"
+ "Valid options:\n")));
+ while (options->description)
+ {
+ const char *optstr;
+ svn_opt_format_option(&optstr, options, TRUE, pool);
+ svn_error_clear(svn_cmdline_fprintf(stdout, pool, " %s\n", optstr));
+ ++options;
+ }
+ svn_error_clear(svn_cmdline_fprintf(stdout, pool, "\n"));
+ exit(0);
+}
+
+
+/* Version compatibility check */
+static svn_error_t *
+check_lib_versions(void)
+{
+ static const svn_version_checklist_t checklist[] =
+ {
+ /* ### check FSFS version */
+ { "svn_subr", svn_subr_version },
+ { "svn_fs", svn_fs_version },
+ { NULL, NULL }
+ };
+
+ SVN_VERSION_DEFINE(my_version);
+ return svn_error_trace(svn_ver_check_list(&my_version, checklist));
+}
+
+
+
+/** Cancellation stuff, ### copied from subversion/svn/main.c */
+
+/* A flag to see if we've been cancelled by the client or not. */
+static volatile sig_atomic_t cancelled = FALSE;
+
+/* A signal handler to support cancellation. */
+static void
+signal_handler(int signum)
+{
+ apr_signal(signum, SIG_IGN);
+ cancelled = TRUE;
+}
+
+/* Our cancellation callback. */
+static svn_error_t *
+svn_cl__check_cancel(void *baton)
+{
+ if (cancelled)
+ return svn_error_create(SVN_ERR_CANCELLED, NULL, _("Caught signal"));
+ else
+ return SVN_NO_ERROR;
+}
+
+static svn_cancel_func_t cancel_func = svn_cl__check_cancel;
+
+static void set_up_cancellation(void)
+{
+ /* Set up our cancellation support. */
+ apr_signal(SIGINT, signal_handler);
+#ifdef SIGBREAK
+ /* SIGBREAK is a Win32 specific signal generated by ctrl-break. */
+ apr_signal(SIGBREAK, signal_handler);
+#endif
+#ifdef SIGHUP
+ apr_signal(SIGHUP, signal_handler);
+#endif
+#ifdef SIGTERM
+ apr_signal(SIGTERM, signal_handler);
+#endif
+
+#ifdef SIGPIPE
+ /* Disable SIGPIPE generation for the platforms that have it. */
+ apr_signal(SIGPIPE, SIG_IGN);
+#endif
+
+#ifdef SIGXFSZ
+ /* Disable SIGXFSZ generation for the platforms that have it, otherwise
+ * working with large files when compiled against an APR that doesn't have
+ * large file support will crash the program, which is uncool. */
+ apr_signal(SIGXFSZ, SIG_IGN);
+#endif
+}
+
+
+/** Program-specific code. **/
+enum {
+ OPT_VERSION = SVN_OPT_FIRST_LONGOPT_ID,
+ OPT_DATA,
+ OPT_PROP,
+ OPT_BOTH
+};
+
+static svn_error_t *check_experimental(void)
+{
+ if (getenv("SVN_REP_SHARING_STATS_IS_EXPERIMENTAL"))
+ return SVN_NO_ERROR;
+
+ return svn_error_create(APR_EGENERAL, NULL,
+ "This code is experimental and should not "
+ "be used on live data.");
+}
+
+/* The parts of a rep that determine whether it's being shared. */
+struct key_t
+{
+ svn_revnum_t revision;
+ apr_off_t offset;
+};
+
+/* What we need to know about a rep. */
+struct value_t
+{
+ svn_checksum_t *sha1_checksum;
+ apr_uint64_t refcount;
+};
+
+/* Increment records[rep] if both are non-NULL and REP contains a sha1.
+ * Allocate keys and values in RESULT_POOL.
+ */
+static svn_error_t *record(apr_hash_t *records,
+ representation_t *rep,
+ apr_pool_t *result_pool)
+{
+ struct key_t *key;
+ struct value_t *value;
+
+ /* Skip if we ignore this particular kind of reps, or if the rep doesn't
+ * exist or doesn't have the checksum we are after. (The latter case
+ * often corresponds to node_rev->kind == svn_node_dir.)
+ */
+ if (records == NULL || rep == NULL || rep->sha1_checksum == NULL)
+ return SVN_NO_ERROR;
+
+ /* Construct the key.
+ *
+ * Must use calloc() because apr_hash_* pay attention to padding bytes too.
+ */
+ key = apr_pcalloc(result_pool, sizeof(*key));
+ key->revision = rep->revision;
+ key->offset = rep->offset;
+
+ /* Update or create the value. */
+ if ((value = apr_hash_get(records, key, sizeof(*key))))
+ {
+ /* Paranoia. */
+ SVN_ERR_ASSERT(value->sha1_checksum != NULL);
+ SVN_ERR_ASSERT(svn_checksum_match(value->sha1_checksum,
+ rep->sha1_checksum));
+ /* Real work. */
+ value->refcount++;
+ }
+ else
+ {
+ value = apr_palloc(result_pool, sizeof(*value));
+ value->sha1_checksum = svn_checksum_dup(rep->sha1_checksum, result_pool);
+ value->refcount = 1;
+ }
+
+ /* Store them. */
+ apr_hash_set(records, key, sizeof(*key), value);
+
+ return SVN_NO_ERROR;
+}
+
+/* Inspect the data and/or prop reps of revision REVNUM in FS. Store
+ * reference count tallies in passed hashes (allocated in RESULT_POOL).
+ *
+ * If PROP_REPS or DATA_REPS is NULL, the respective kind of reps are not
+ * tallied.
+ *
+ * Print progress report to STDERR unless QUIET is true.
+ *
+ * Use SCRATCH_POOL for temporary allocations.
+ */
+static svn_error_t *
+process_one_revision(svn_fs_t *fs,
+ svn_revnum_t revnum,
+ svn_boolean_t quiet,
+ apr_hash_t *prop_reps,
+ apr_hash_t *data_reps,
+ apr_hash_t *both_reps,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool)
+{
+ svn_fs_root_t *rev_root;
+ apr_hash_t *paths_changed;
+ apr_hash_index_t *hi;
+
+ if (! quiet)
+ SVN_ERR(svn_cmdline_fprintf(stderr, scratch_pool,
+ "processing r%ld\n", revnum));
+
+ /* Get the changed paths. */
+ SVN_ERR(svn_fs_revision_root(&rev_root, fs, revnum, scratch_pool));
+ SVN_ERR(svn_fs_paths_changed2(&paths_changed, rev_root, scratch_pool));
+
+ /* Iterate them. */
+ /* ### use iterpool? */
+ for (hi = apr_hash_first(scratch_pool, paths_changed);
+ hi; hi = apr_hash_next(hi))
+ {
+ const char *path;
+ const svn_fs_path_change2_t *change;
+ const svn_fs_id_t *node_rev_id1, *node_rev_id2;
+ const svn_fs_id_t *the_id;
+
+ node_revision_t *node_rev;
+
+ path = svn__apr_hash_index_key(hi);
+ change = svn__apr_hash_index_val(hi);
+ if (! quiet)
+ SVN_ERR(svn_cmdline_fprintf(stderr, scratch_pool,
+ "processing r%ld:%s\n", revnum, path));
+
+ if (change->change_kind == svn_fs_path_change_delete)
+ /* Can't ask for reps of PATH at REVNUM if the path no longer exists
+ * at that revision! */
+ continue;
+
+ /* Okay, we have two node_rev id's for this change: the txn one and
+ * the revision one. We'll use the latter. */
+ node_rev_id1 = change->node_rev_id;
+ SVN_ERR(svn_fs_node_id(&node_rev_id2, rev_root, path, scratch_pool));
+
+ SVN_ERR_ASSERT(svn_fs_fs__id_txn_id(node_rev_id1) != NULL);
+ SVN_ERR_ASSERT(svn_fs_fs__id_rev(node_rev_id2) != SVN_INVALID_REVNUM);
+
+ the_id = node_rev_id2;
+
+ /* Get the node_rev using the chosen node_rev_id. */
+ SVN_ERR(svn_fs_fs__get_node_revision(&node_rev, fs, the_id, scratch_pool));
+
+ /* Maybe record the sha1's. */
+ SVN_ERR(record(prop_reps, node_rev->prop_rep, result_pool));
+ SVN_ERR(record(data_reps, node_rev->data_rep, result_pool));
+ SVN_ERR(record(both_reps, node_rev->prop_rep, result_pool));
+ SVN_ERR(record(both_reps, node_rev->data_rep, result_pool));
+ }
+
+ return SVN_NO_ERROR;
+}
+
+/* Print REPS_REF_COUNT (a hash as for process_one_revision())
+ * to stdout in "refcount => sha1" format. A sha1 may appear
+ * more than once if not all its instances are shared. Prepend
+ * each line by NAME.
+ *
+ * Use SCRATCH_POOL for temporary allocations.
+ */
+static svn_error_t *
+pretty_print(const char *name,
+ apr_hash_t *reps_ref_counts,
+ apr_pool_t *scratch_pool)
+{
+ apr_hash_index_t *hi;
+
+ if (reps_ref_counts == NULL)
+ return SVN_NO_ERROR;
+
+ for (hi = apr_hash_first(scratch_pool, reps_ref_counts);
+ hi; hi = apr_hash_next(hi))
+ {
+ struct value_t *value;
+
+ SVN_ERR(cancel_func(NULL));
+
+ value = svn__apr_hash_index_val(hi);
+ SVN_ERR(svn_cmdline_printf(scratch_pool, "%s %" APR_UINT64_T_FMT " %s\n",
+ name, value->refcount,
+ svn_checksum_to_cstring_display(
+ value->sha1_checksum,
+ scratch_pool)));
+ }
+
+ return SVN_NO_ERROR;
+}
+
+/* Return an error unless FS is an fsfs fs. */
+static svn_error_t *is_fs_fsfs(svn_fs_t *fs, apr_pool_t *scratch_pool)
+{
+ const char *actual, *expected, *path;
+
+ path = svn_fs_path(fs, scratch_pool);
+
+ expected = SVN_FS_TYPE_FSFS;
+ SVN_ERR(svn_fs_type(&actual, path, scratch_pool));
+
+ if (strcmp(actual, expected) != 0)
+ return svn_error_createf(SVN_ERR_FS_UNKNOWN_FS_TYPE, NULL,
+ "Filesystem '%s' is not of type '%s'",
+ svn_dirent_local_style(path, scratch_pool),
+ actual);
+
+ return SVN_NO_ERROR;
+}
+
+/* The core logic. This function iterates the repository REPOS_PATH
+ * and sends all the (DATA and/or PROP) reps in each revision for counting
+ * by process_one_revision(). QUIET is passed to process_one_revision().
+ */
+static svn_error_t *process(const char *repos_path,
+ svn_boolean_t prop,
+ svn_boolean_t data,
+ svn_boolean_t quiet,
+ apr_pool_t *scratch_pool)
+{
+ apr_hash_t *prop_reps = NULL;
+ apr_hash_t *data_reps = NULL;
+ apr_hash_t *both_reps = NULL;
+ svn_revnum_t rev, youngest;
+ apr_pool_t *iterpool;
+ svn_repos_t *repos;
+ svn_fs_t *fs;
+
+ if (prop)
+ prop_reps = apr_hash_make(scratch_pool);
+ if (data)
+ data_reps = apr_hash_make(scratch_pool);
+ if (prop && data)
+ both_reps = apr_hash_make(scratch_pool);
+
+ /* Open the FS. */
+ SVN_ERR(svn_repos_open2(&repos, repos_path, NULL, scratch_pool));
+ fs = svn_repos_fs(repos);
+
+ SVN_ERR(is_fs_fsfs(fs, scratch_pool));
+
+ SVN_ERR(svn_fs_youngest_rev(&youngest, fs, scratch_pool));
+
+ /* Iterate the revisions. */
+ iterpool = svn_pool_create(scratch_pool);
+ for (rev = 0; rev <= youngest; rev++)
+ {
+ svn_pool_clear(iterpool);
+ SVN_ERR(cancel_func(NULL));
+ SVN_ERR(process_one_revision(fs, rev, quiet,
+ prop_reps, data_reps, both_reps,
+ scratch_pool, iterpool));
+ }
+ svn_pool_destroy(iterpool);
+
+ /* Print stats. */
+ SVN_ERR(pretty_print("prop", prop_reps, scratch_pool));
+ SVN_ERR(pretty_print("data", data_reps, scratch_pool));
+ SVN_ERR(pretty_print("both", both_reps, scratch_pool));
+
+ return SVN_NO_ERROR;
+}
+
+int
+main(int argc, const char *argv[])
+{
+ const char *repos_path;
+ apr_allocator_t *allocator;
+ apr_pool_t *pool;
+ svn_boolean_t prop = FALSE, data = FALSE;
+ svn_boolean_t quiet = FALSE;
+ svn_error_t *err;
+ apr_getopt_t *os;
+ const apr_getopt_option_t options[] =
+ {
+ {"data", OPT_DATA, 0, N_("display data reps stats")},
+ {"prop", OPT_PROP, 0, N_("display prop reps stats")},
+ {"both", OPT_BOTH, 0, N_("display combined (data+prop) reps stats")},
+ {"quiet", 'q', 0, N_("no progress (only errors) to stderr")},
+ {"help", 'h', 0, N_("display this help")},
+ {"version", OPT_VERSION, 0,
+ N_("show program version information")},
+ {0, 0, 0, 0}
+ };
+
+ /* Initialize the app. */
+ if (svn_cmdline_init("svn-rep-sharing-stats", stderr) != EXIT_SUCCESS)
+ return EXIT_FAILURE;
+
+ /* Create our top-level pool. Use a separate mutexless allocator,
+ * given this application is single threaded.
+ */
+ if (apr_allocator_create(&allocator))
+ return EXIT_FAILURE;
+
+ apr_allocator_max_free_set(allocator, SVN_ALLOCATOR_RECOMMENDED_MAX_FREE);
+
+ pool = svn_pool_create_ex(NULL, allocator);
+ apr_allocator_owner_set(allocator, pool);
+
+ /* Check library versions */
+ err = check_lib_versions();
+ if (err)
+ return svn_cmdline_handle_exit_error(err, pool, "svn-rep-sharing-stats: ");
+
+ err = svn_cmdline__getopt_init(&os, argc, argv, pool);
+ if (err)
+ return svn_cmdline_handle_exit_error(err, pool, "svn-rep-sharing-stats: ");
+
+ SVN_INT_ERR(check_experimental());
+
+ os->interleave = 1;
+ while (1)
+ {
+ int opt;
+ const char *arg;
+ apr_status_t status = apr_getopt_long(os, options, &opt, &arg);
+ if (APR_STATUS_IS_EOF(status))
+ break;
+ if (status != APR_SUCCESS)
+ {
+ usage(pool);
+ return EXIT_FAILURE;
+ }
+ switch (opt)
+ {
+ case OPT_DATA:
+ data = TRUE;
+ break;
+ /* It seems we don't actually rep-share props yet. */
+ case OPT_PROP:
+ prop = TRUE;
+ break;
+ case OPT_BOTH:
+ data = TRUE;
+ prop = TRUE;
+ break;
+ case 'q':
+ quiet = TRUE;
+ break;
+ case 'h':
+ help(options, pool);
+ break;
+ case OPT_VERSION:
+ SVN_INT_ERR(version(pool));
+ exit(0);
+ break;
+ default:
+ usage(pool);
+ return EXIT_FAILURE;
+ }
+ }
+
+ /* Exactly 1 non-option argument,
+ * and at least one of "--data"/"--prop"/"--both".
+ */
+ if (os->ind + 1 != argc || (!data && !prop))
+ {
+ usage(pool);
+ return EXIT_FAILURE;
+ }
+
+ /* Grab REPOS_PATH from argv. */
+ SVN_INT_ERR(svn_utf_cstring_to_utf8(&repos_path, os->argv[os->ind], pool));
+ repos_path = svn_dirent_internal_style(repos_path, pool);
+
+ set_up_cancellation();
+
+ /* Do something. */
+ SVN_INT_ERR(process(repos_path, prop, data, quiet, pool));
+
+ /* We're done. */
+
+ svn_pool_destroy(pool);
+ /* Flush stdout to make sure that the user will see any printing errors. */
+ SVN_INT_ERR(svn_cmdline_fflush(stdout));
+
+ return EXIT_SUCCESS;
+}
diff --git a/tools/server-side/svn_server_log_parse.py b/tools/server-side/svn_server_log_parse.py
new file mode 100755
index 0000000..5ecb104
--- /dev/null
+++ b/tools/server-side/svn_server_log_parse.py
@@ -0,0 +1,460 @@
+#!/usr/bin/python
+
+# ====================================================================
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# ====================================================================
+
+# TODO: Teach parse_open about capabilities, rather than allowing any
+# words at all.
+
+"""Parse subversion server operational logs.
+
+SVN-ACTION strings
+------------------
+
+Angle brackets denote a variable, e.g. 'commit r<N>' means you'll see
+lines like 'commit r17' for this action.
+
+<N> and <M> are revision numbers.
+
+<PATH>, <FROM-PATH>, and <TO-PATH> mean a URI-encoded path relative to
+the repository root, including a leading '/'.
+
+<REVPROP> means a revision property, e.g. 'svn:log'.
+
+<I> represents a svn_mergeinfo_inheritance_t value and is one of these
+words: explicit inherited nearest-ancestor.
+
+<D> represents a svn_depth_t value and is one of these words: empty
+files immediates infinity. If the depth value for the operation was
+svn_depth_unknown, the depth= portion is absent entirely.
+
+The get-mergeinfo and log actions use lists for paths and revprops.
+The lists are enclosed in parentheses and each item is separated by a
+space (spaces in paths are encoded as %20).
+
+The words will *always* be in this order, though some may be absent.
+
+General::
+
+ change-rev-prop r<N> <REVPROP>
+ commit r<N>
+ get-dir <PATH> r<N> text? props?
+ get-file <PATH> r<N> text? props?
+ lock (<PATH> ...) steal?
+ rev-proplist r<N>
+ unlock (<PATH> ...) break?
+
+Reports::
+
+ get-file-revs <PATH> r<N>:<M> include-merged-revisions?
+ get-mergeinfo (<PATH> ...) <I> include-descendants?
+ log (<PATH> ...) r<N>:<M> limit=<N>? discover-changed-paths? strict? include-merged-revisions? revprops=all|(<REVPROP> ...)?
+ replay <PATH> r<N>
+
+The update report::
+
+ checkout-or-export <PATH> r<N> depth=<D>?
+ diff <FROM-PATH>@<N> <TO-PATH>@<M> depth=<D>? ignore-ancestry?
+ diff <PATH> r<N>:<M> depth=<D>? ignore-ancestry?
+ status <PATH> r<N> depth=<D>?
+ switch <FROM-PATH> <TO-PATH>@<N> depth=<D>?
+ update <PATH> r<N> depth=<D>? send-copyfrom-args?
+"""
+
+
+import re
+try:
+ # Python >=3.0
+ from urllib.parse import unquote as urllib_parse_unquote
+except ImportError:
+ # Python <3.0
+ from urllib import unquote as urllib_parse_unquote
+
+import svn.core
+
+#
+# Valid words for _parse_depth and _parse_mergeinfo_inheritance
+#
+
+DEPTH_WORDS = ['empty', 'files', 'immediates', 'infinity']
+INHERITANCE_WORDS = {
+ 'explicit': svn.core.svn_mergeinfo_explicit,
+ 'inherited': svn.core.svn_mergeinfo_inherited,
+ 'nearest-ancestor': svn.core.svn_mergeinfo_nearest_ancestor,
+}
+
+#
+# Patterns for _match
+#
+
+# <PATH>
+pPATH = r'(/\S*)'
+# (<PATH> ...)
+pPATHS = r'\(([^)]*)\)'
+# r<N>
+pREVNUM = r'r(\d+)'
+# (<N> ...)
+pREVNUMS = r'\(((\d+\s*)*)\)'
+# r<N>:<M>
+pREVRANGE = r'r(-?\d+):(-?\d+)'
+# <PATH>@<N>
+pPATHREV = pPATH + r'@(\d+)'
+pWORD = r'(\S+)'
+pPROPERTY = pWORD
+# depth=<D>?
+pDEPTH = 'depth=' + pWORD
+
+#
+# Exceptions
+#
+
+class Error(Exception): pass
+class BadDepthError(Error):
+ def __init__(self, value):
+ Error.__init__(self, 'bad svn_depth_t value ' + value)
+class BadMergeinfoInheritanceError(Error):
+ def __init__(self, value):
+ Error.__init__(self, 'bad svn_mergeinfo_inheritance_t value ' + value)
+class MatchError(Error):
+ def __init__(self, pattern, line):
+ Error.__init__(self, '/%s/ does not match log line:\n%s'
+ % (pattern, line))
+
+
+#
+# Helper functions
+#
+
+# TODO: Move to kitchensink.c like svn_depth_from_word?
+try:
+ from svn.core import svn_inheritance_from_word
+except ImportError:
+ def svn_inheritance_from_word(word):
+ try:
+ return INHERITANCE_WORDS[word]
+ except KeyError:
+ # XXX svn_inheritance_to_word uses explicit as default so...
+ return svn.core.svn_mergeinfo_explicit
+
+def _parse_depth(word):
+ if word is None:
+ return svn.core.svn_depth_unknown
+ if word not in DEPTH_WORDS:
+ raise BadDepthError(word)
+ return svn.core.svn_depth_from_word(word)
+
+def _parse_mergeinfo_inheritance(word):
+ if word not in INHERITANCE_WORDS:
+ raise BadMergeinfoInheritanceError(word)
+ return svn_inheritance_from_word(word)
+
+def _match(line, *patterns):
+ """Return a re.match object from matching patterns against line.
+
+ All optional arguments must be strings suitable for ''.join()ing
+ into a single pattern string for re.match. The last optional
+ argument may instead be a list of such strings, which will be
+ joined into the final pattern as *optional* matches.
+
+ Raises:
+ Error -- if re.match returns None (i.e. no match)
+ """
+ if isinstance(patterns[-1], list):
+ optional = patterns[-1]
+ patterns = patterns[:-1]
+ else:
+ optional = []
+ pattern = r'\s+'.join(patterns)
+ pattern += ''.join([r'(\s+' + x + ')?' for x in optional])
+ m = re.match(pattern, line)
+ if m is None:
+ raise MatchError(pattern, line)
+ return m
+
+
+class Parser(object):
+ """Subclass this and define the handle_ methods according to the
+ "SVN-ACTION strings" section of this module's documentation. For
+ example, "lock <PATH> steal?" => def handle_lock(self, path, steal)
+ where steal will be True if "steal" was present.
+
+ See the end of test_svn_server_log_parse.py for a complete example.
+ """
+ def parse(self, line):
+ """Parse line and call appropriate handle_ method.
+
+ Returns one of:
+ - line remaining after the svn action, if one was parsed
+ - whatever your handle_unknown implementation returns
+
+ Raises:
+ BadDepthError -- for bad svn_depth_t values
+ BadMergeinfoInheritanceError -- for bad svn_mergeinfo_inheritance_t
+ values
+ Error -- any other parse error
+ """
+ self.line = line
+ words = self.split_line = line.split(' ')
+ try:
+ method = getattr(self, '_parse_' + words[0].replace('-', '_'))
+ except AttributeError:
+ return self.handle_unknown(self.line)
+ return method(' '.join(words[1:]))
+
+ def _parse_commit(self, line):
+ m = _match(line, pREVNUM)
+ self.handle_commit(int(m.group(1)))
+ return line[m.end():]
+
+ def _parse_open(self, line):
+ pINT = r'(\d+)'
+ pCAP = r'cap=\(([^)]*)\)'
+ pCLIENT = pWORD
+ m = _match(line, pINT, pCAP, pPATH, pCLIENT, pCLIENT)
+ protocol = int(m.group(1))
+ if m.group(2) is None:
+ capabilities = []
+ else:
+ capabilities = m.group(2).split()
+ path = m.group(3)
+ ra_client = urllib_parse_unquote(m.group(4))
+ client = urllib_parse_unquote(m.group(5))
+ self.handle_open(protocol, capabilities, path, ra_client, client)
+ return line[m.end():]
+
+ def _parse_reparent(self, line):
+ m = _match(line, pPATH)
+ self.handle_reparent(urllib_parse_unquote(m.group(1)))
+ return line[m.end():]
+
+ def _parse_get_latest_rev(self, line):
+ self.handle_get_latest_rev()
+ return line
+
+ def _parse_get_dated_rev(self, line):
+ m = _match(line, pWORD)
+ self.handle_get_dated_rev(m.group(1))
+ return line[m.end():]
+
+ def _parse_get_dir(self, line):
+ m = _match(line, pPATH, pREVNUM, ['text', 'props'])
+ self.handle_get_dir(urllib_parse_unquote(m.group(1)), int(m.group(2)),
+ m.group(3) is not None,
+ m.group(4) is not None)
+ return line[m.end():]
+
+ def _parse_get_file(self, line):
+ m = _match(line, pPATH, pREVNUM, ['text', 'props'])
+ self.handle_get_file(urllib_parse_unquote(m.group(1)), int(m.group(2)),
+ m.group(3) is not None,
+ m.group(4) is not None)
+ return line[m.end():]
+
+ def _parse_lock(self, line):
+ m = _match(line, pPATHS, ['steal'])
+ paths = [urllib_parse_unquote(x) for x in m.group(1).split()]
+ self.handle_lock(paths, m.group(2) is not None)
+ return line[m.end():]
+
+ def _parse_change_rev_prop(self, line):
+ m = _match(line, pREVNUM, pPROPERTY)
+ self.handle_change_rev_prop(int(m.group(1)),
+ urllib_parse_unquote(m.group(2)))
+ return line[m.end():]
+
+ def _parse_rev_proplist(self, line):
+ m = _match(line, pREVNUM)
+ self.handle_rev_proplist(int(m.group(1)))
+ return line[m.end():]
+
+ def _parse_rev_prop(self, line):
+ m = _match(line, pREVNUM, pPROPERTY)
+ self.handle_rev_prop(int(m.group(1)), urllib_parse_unquote(m.group(2)))
+ return line[m.end():]
+
+ def _parse_unlock(self, line):
+ m = _match(line, pPATHS, ['break'])
+ paths = [urllib_parse_unquote(x) for x in m.group(1).split()]
+ self.handle_unlock(paths, m.group(2) is not None)
+ return line[m.end():]
+
+ def _parse_get_lock(self, line):
+ m = _match(line, pPATH)
+ self.handle_get_lock(urllib_parse_unquote(m.group(1)))
+ return line[m.end():]
+
+ def _parse_get_locks(self, line):
+ m = _match(line, pPATH)
+ self.handle_get_locks(urllib_parse_unquote(m.group(1)))
+ return line[m.end():]
+
+ def _parse_get_locations(self, line):
+ m = _match(line, pPATH, pREVNUMS)
+ path = urllib_parse_unquote(m.group(1))
+ revnums = [int(x) for x in m.group(2).split()]
+ self.handle_get_locations(path, revnums)
+ return line[m.end():]
+
+ def _parse_get_location_segments(self, line):
+ m = _match(line, pPATHREV, pREVRANGE)
+ path = urllib_parse_unquote(m.group(1))
+ peg = int(m.group(2))
+ left = int(m.group(3))
+ right = int(m.group(4))
+ self.handle_get_location_segments(path, peg, left, right)
+ return line[m.end():]
+
+ def _parse_get_file_revs(self, line):
+ m = _match(line, pPATH, pREVRANGE, ['include-merged-revisions'])
+ path = urllib_parse_unquote(m.group(1))
+ left = int(m.group(2))
+ right = int(m.group(3))
+ include_merged_revisions = m.group(4) is not None
+ self.handle_get_file_revs(path, left, right, include_merged_revisions)
+ return line[m.end():]
+
+ def _parse_get_mergeinfo(self, line):
+ # <I>
+ pMERGEINFO_INHERITANCE = pWORD
+ pINCLUDE_DESCENDANTS = pWORD
+ m = _match(line,
+ pPATHS, pMERGEINFO_INHERITANCE, ['include-descendants'])
+ paths = [urllib_parse_unquote(x) for x in m.group(1).split()]
+ inheritance = _parse_mergeinfo_inheritance(m.group(2))
+ include_descendants = m.group(3) is not None
+ self.handle_get_mergeinfo(paths, inheritance, include_descendants)
+ return line[m.end():]
+
+ def _parse_log(self, line):
+ # limit=<N>?
+ pLIMIT = r'limit=(\d+)'
+ # revprops=all|(<REVPROP> ...)?
+ pREVPROPS = r'revprops=(all|\(([^)]+)\))'
+ m = _match(line, pPATHS, pREVRANGE,
+ [pLIMIT, 'discover-changed-paths', 'strict',
+ 'include-merged-revisions', pREVPROPS])
+ paths = [urllib_parse_unquote(x) for x in m.group(1).split()]
+ left = int(m.group(2))
+ right = int(m.group(3))
+ if m.group(5) is None:
+ limit = 0
+ else:
+ limit = int(m.group(5))
+ discover_changed_paths = m.group(6) is not None
+ strict = m.group(7) is not None
+ include_merged_revisions = m.group(8) is not None
+ if m.group(10) == 'all':
+ revprops = None
+ else:
+ if m.group(11) is None:
+ revprops = []
+ else:
+ revprops = [urllib_parse_unquote(x) for x in m.group(11).split()]
+ self.handle_log(paths, left, right, limit, discover_changed_paths,
+ strict, include_merged_revisions, revprops)
+ return line[m.end():]
+
+ def _parse_check_path(self, line):
+ m = _match(line, pPATHREV)
+ path = urllib_parse_unquote(m.group(1))
+ revnum = int(m.group(2))
+ self.handle_check_path(path, revnum)
+ return line[m.end():]
+
+ def _parse_stat(self, line):
+ m = _match(line, pPATHREV)
+ path = urllib_parse_unquote(m.group(1))
+ revnum = int(m.group(2))
+ self.handle_stat(path, revnum)
+ return line[m.end():]
+
+ def _parse_replay(self, line):
+ m = _match(line, pPATH, pREVNUM)
+ path = urllib_parse_unquote(m.group(1))
+ revision = int(m.group(2))
+ self.handle_replay(path, revision)
+ return line[m.end():]
+
+ # the update report
+
+ def _parse_checkout_or_export(self, line):
+ m = _match(line, pPATH, pREVNUM, [pDEPTH])
+ path = urllib_parse_unquote(m.group(1))
+ revision = int(m.group(2))
+ depth = _parse_depth(m.group(4))
+ self.handle_checkout_or_export(path, revision, depth)
+ return line[m.end():]
+
+ def _parse_diff(self, line):
+ # First, try 1-path form.
+ try:
+ m = _match(line, pPATH, pREVRANGE, [pDEPTH, 'ignore-ancestry'])
+ f = self._parse_diff_1path
+ except Error:
+ # OK, how about 2-path form?
+ m = _match(line, pPATHREV, pPATHREV, [pDEPTH, 'ignore-ancestry'])
+ f = self._parse_diff_2paths
+ return f(line, m)
+
+ def _parse_diff_1path(self, line, m):
+ path = urllib_parse_unquote(m.group(1))
+ left = int(m.group(2))
+ right = int(m.group(3))
+ depth = _parse_depth(m.group(5))
+ ignore_ancestry = m.group(6) is not None
+ self.handle_diff_1path(path, left, right,
+ depth, ignore_ancestry)
+ return line[m.end():]
+
+ def _parse_diff_2paths(self, line, m):
+ from_path = urllib_parse_unquote(m.group(1))
+ from_rev = int(m.group(2))
+ to_path = urllib_parse_unquote(m.group(3))
+ to_rev = int(m.group(4))
+ depth = _parse_depth(m.group(6))
+ ignore_ancestry = m.group(7) is not None
+ self.handle_diff_2paths(from_path, from_rev, to_path, to_rev,
+ depth, ignore_ancestry)
+ return line[m.end():]
+
+ def _parse_status(self, line):
+ m = _match(line, pPATH, pREVNUM, [pDEPTH])
+ path = urllib_parse_unquote(m.group(1))
+ revision = int(m.group(2))
+ depth = _parse_depth(m.group(4))
+ self.handle_status(path, revision, depth)
+ return line[m.end():]
+
+ def _parse_switch(self, line):
+ m = _match(line, pPATH, pPATHREV, [pDEPTH])
+ from_path = urllib_parse_unquote(m.group(1))
+ to_path = urllib_parse_unquote(m.group(2))
+ to_rev = int(m.group(3))
+ depth = _parse_depth(m.group(5))
+ self.handle_switch(from_path, to_path, to_rev, depth)
+ return line[m.end():]
+
+ def _parse_update(self, line):
+ m = _match(line, pPATH, pREVNUM, [pDEPTH, 'send-copyfrom-args'])
+ path = urllib_parse_unquote(m.group(1))
+ revision = int(m.group(2))
+ depth = _parse_depth(m.group(4))
+ send_copyfrom_args = m.group(5) is not None
+ self.handle_update(path, revision, depth, send_copyfrom_args)
+ return line[m.end():]
diff --git a/tools/server-side/svnauthz-validate.c b/tools/server-side/svnauthz-validate.c
new file mode 100644
index 0000000..df7d541
--- /dev/null
+++ b/tools/server-side/svnauthz-validate.c
@@ -0,0 +1,76 @@
+/*
+ * svnauthz-validate.c : Load and validate an authz file.
+ *
+ * ====================================================================
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ * ====================================================================
+ *
+ *
+ * svnauthz-validate.c : load and validate an authz file, returns
+ * value == 0 if syntax of authz file is correct
+ * value == 1 if syntax of authz file is invalid or file not found
+ * value == 2 in case of general error
+ *
+ */
+
+#include "svn_pools.h"
+#include "svn_repos.h"
+#include "svn_cmdline.h"
+
+int
+main(int argc, const char **argv)
+{
+ apr_pool_t *pool;
+ svn_error_t *err;
+ svn_authz_t *authz;
+ const char *authz_file;
+
+ if (argc <= 1)
+ {
+ printf("Usage: %s PATH \n\n", argv[0]);
+ printf("Loads the authz file at PATH and validates its syntax. \n"
+ "Returns:\n"
+ " 0 when syntax is OK.\n"
+ " 1 when syntax is invalid.\n"
+ " 2 operational error\n");
+ return 2;
+ }
+
+ authz_file = argv[1];
+
+ /* Initialize the app. Send all error messages to 'stderr'. */
+ if (svn_cmdline_init(argv[0], stderr) != EXIT_SUCCESS)
+ return 2;
+
+ pool = svn_pool_create(NULL);
+
+ /* Read the access file and validate it. */
+ err = svn_repos_authz_read(&authz, authz_file, TRUE, pool);
+
+ svn_pool_destroy(pool);
+
+ if (err)
+ {
+ svn_handle_error2(err, stderr, FALSE, "svnauthz-validate: ");
+ return 1;
+ }
+ else
+ {
+ return 0;
+ }
+}
diff --git a/tools/server-side/test_svn_server_log_parse.py b/tools/server-side/test_svn_server_log_parse.py
new file mode 100755
index 0000000..2fa8759
--- /dev/null
+++ b/tools/server-side/test_svn_server_log_parse.py
@@ -0,0 +1,611 @@
+#!/usr/bin/python
+
+# ====================================================================
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# ====================================================================
+
+# Run this without arguments to run unit tests.
+# Run with a path to a davautocheck ops log to test that it can parse that.
+
+import os
+import re
+import sys
+import tempfile
+try:
+ # Python >=3.0
+ from urllib.parse import quote as urllib_parse_quote
+except ImportError:
+ # Python <3.0
+ from urllib import quote as urllib_parse_quote
+import unittest
+
+import svn.core
+
+import svn_server_log_parse
+
+class TestCase(unittest.TestCase):
+ def setUp(self):
+ # Define a class to stuff everything passed to any handle_
+ # method into self.result.
+ class cls(svn_server_log_parse.Parser):
+ def __getattr__(cls_self, attr):
+ if attr.startswith('handle_'):
+ return lambda *a: setattr(self, 'result', a)
+ raise AttributeError
+ self.parse = cls().parse
+
+ def test_unknown(self):
+ line = 'unknown log line'
+ self.parse(line)
+ self.assertEqual(self.result, (line,))
+
+ def test_open(self):
+ self.assertRaises(svn_server_log_parse.Error, self.parse, 'open')
+ self.assertRaises(svn_server_log_parse.Error, self.parse, 'open 2 cap / SVN/1.60. fooclient')
+ self.assertRaises(svn_server_log_parse.Error, self.parse, 'open a cap=() / SVN/1.60. fooclient')
+ self.assertEqual(self.parse('open 2 cap=() / SVN fooclient'), '')
+ self.assertEqual(self.result, (2, [], '/', 'SVN', 'fooclient'))
+ # TODO: Teach it about the capabilities, rather than allowing
+ # any words at all.
+ self.assertEqual(self.parse('open 2 cap=(foo) / SVN foo%20client'), '')
+ self.assertEqual(self.result, (2, ['foo'], '/', 'SVN', 'foo client'))
+
+ def test_reparent(self):
+ self.assertRaises(svn_server_log_parse.Error, self.parse, 'reparent')
+ self.assertEqual(self.parse('reparent /'), '')
+ self.assertEqual(self.result, ('/',))
+
+ def test_get_latest_rev(self):
+ self.assertEqual(self.parse('get-latest-rev'), '')
+ self.assertEqual(self.result, ())
+ self.assertEqual(self.parse('get-latest-rev r3'), 'r3')
+ self.assertEqual(self.result, ())
+
+ def test_get_dated_rev(self):
+ self.assertRaises(svn_server_log_parse.Error, self.parse,
+ 'get-dated-rev')
+ self.assertEqual(self.parse('get-dated-rev 2008-04-15T20:41:24.000000Z'), '')
+ self.assertEqual(self.result, ('2008-04-15T20:41:24.000000Z',))
+
+ def test_commit(self):
+ self.assertRaises(svn_server_log_parse.Error, self.parse, 'commit')
+ self.assertRaises(svn_server_log_parse.Error, self.parse, 'commit 3')
+ self.assertEqual(self.parse('commit r3'), '')
+ self.assertEqual(self.result, (3,))
+ self.assertEqual(self.parse('commit r3 leftover'), ' leftover')
+ self.assertEqual(self.result, (3,))
+
+ def test_get_dir(self):
+ self.get_dir_or_file('get-dir')
+
+ def test_get_file(self):
+ self.get_dir_or_file('get-file')
+
+ def get_dir_or_file(self, c):
+ self.assertRaises(svn_server_log_parse.Error, self.parse, c)
+ self.assertRaises(svn_server_log_parse.Error, self.parse, c + ' foo')
+ self.assertRaises(svn_server_log_parse.Error, self.parse, c + ' foo 3')
+ self.assertEqual(self.parse(c + ' /a/b/c r3 ...'), ' ...')
+ self.assertEqual(self.result, ('/a/b/c', 3, False, False))
+ self.assertEqual(self.parse(c + ' / r3'), '')
+ self.assertEqual(self.result, ('/', 3, False, False))
+ # path must be absolute
+ self.assertRaises(svn_server_log_parse.Error,
+ self.parse, c + ' a/b/c r3')
+ self.assertEqual(self.parse(c + ' /k r27 text'), '')
+ self.assertEqual(self.result, ('/k', 27, True, False))
+ self.assertEqual(self.parse(c + ' /k r27 props'), '')
+ self.assertEqual(self.result, ('/k', 27, False, True))
+ self.assertEqual(self.parse(c + ' /k r27 text props'), '')
+ self.assertEqual(self.result, ('/k', 27, True, True))
+ # out of order not accepted
+ self.assertEqual(self.parse(c + ' /k r27 props text'), ' text')
+ self.assertEqual(self.result, ('/k', 27, False, True))
+
+ def test_lock(self):
+ self.assertRaises(svn_server_log_parse.Error, self.parse, 'lock')
+ self.parse('lock (/foo)')
+ self.assertEqual(self.result, (['/foo'], False))
+ self.assertEqual(self.parse('lock (/foo) steal ...'), ' ...')
+ self.assertEqual(self.result, (['/foo'], True))
+ self.assertEqual(self.parse('lock (/foo) stear'), ' stear')
+
+ def test_change_rev_prop(self):
+ self.assertRaises(svn_server_log_parse.Error,
+ self.parse, 'change-rev-prop r3')
+ self.assertRaises(svn_server_log_parse.Error,
+ self.parse, 'change-rev-prop r svn:log')
+ self.assertRaises(svn_server_log_parse.Error,
+ self.parse, 'change-rev-prop rX svn:log')
+ self.assertEqual(self.parse('change-rev-prop r3 svn:log ...'), ' ...')
+ self.assertEqual(self.result, (3, 'svn:log'))
+
+ def test_rev_proplist(self):
+ self.assertRaises(svn_server_log_parse.Error,
+ self.parse, 'rev-proplist')
+ self.assertRaises(svn_server_log_parse.Error,
+ self.parse, 'rev-proplist r')
+ self.assertRaises(svn_server_log_parse.Error,
+ self.parse, 'rev-proplist rX')
+ self.assertEqual(self.parse('rev-proplist r3 ...'), ' ...')
+ self.assertEqual(self.result, (3,))
+
+ def test_rev_prop(self):
+ self.assertRaises(svn_server_log_parse.Error, self.parse, 'rev-prop')
+ self.assertRaises(svn_server_log_parse.Error, self.parse, 'rev-prop r')
+ self.assertRaises(svn_server_log_parse.Error, self.parse, 'rev-prop rX')
+ self.assertEqual(self.parse('rev-prop r3 foo ...'), ' ...')
+ self.assertEqual(self.result, (3, 'foo'))
+
+ def test_unlock(self):
+ self.assertRaises(svn_server_log_parse.Error, self.parse, 'unlock')
+ self.parse('unlock (/foo)')
+ self.assertEqual(self.result, (['/foo'], False))
+ self.assertEqual(self.parse('unlock (/foo) break ...'), ' ...')
+ self.assertEqual(self.result, (['/foo'], True))
+ self.assertEqual(self.parse('unlock (/foo) bear'), ' bear')
+
+ def test_get_lock(self):
+ self.assertRaises(svn_server_log_parse.Error, self.parse, 'get-lock')
+ self.parse('get-lock /foo')
+ self.assertEqual(self.result, ('/foo',))
+
+ def test_get_locks(self):
+ self.assertRaises(svn_server_log_parse.Error, self.parse, 'get-locks')
+ self.parse('get-locks /foo')
+ self.assertEqual(self.result, ('/foo',))
+
+ def test_get_locations(self):
+ self.assertRaises(svn_server_log_parse.Error, self.parse,
+ 'get-locations')
+ self.assertRaises(svn_server_log_parse.Error,
+ self.parse, 'get-locations /foo 3')
+ self.assertEqual(self.parse('get-locations /foo (3 4) ...'), ' ...')
+ self.assertEqual(self.result, ('/foo', [3, 4]))
+ self.assertEqual(self.parse('get-locations /foo (3)'), '')
+ self.assertEqual(self.result, ('/foo', [3]))
+
+ def test_get_location_segments(self):
+ self.assertRaises(svn_server_log_parse.Error, self.parse,
+ 'get-location-segments')
+ self.assertRaises(svn_server_log_parse.Error,
+ self.parse, 'get-location-segments /foo 3')
+ self.assertEqual(self.parse('get-location-segments /foo@2 r3:4'), '')
+ self.assertEqual(self.result, ('/foo', 2, 3, 4))
+
+ def test_get_file_revs(self):
+ self.assertRaises(svn_server_log_parse.Error, self.parse, 'get-file-revs')
+ self.assertRaises(svn_server_log_parse.Error,
+ self.parse, 'get-file-revs /foo 3')
+ self.assertRaises(svn_server_log_parse.Error,
+ self.parse, 'get-file-revs /foo 3:a')
+ self.assertRaises(svn_server_log_parse.Error,
+ self.parse, 'get-file-revs /foo r3:a')
+ self.assertEqual(self.parse('get-file-revs /foo r3:4 ...'), ' ...')
+ self.assertEqual(self.result, ('/foo', 3, 4, False))
+ self.assertEqual(self.parse('get-file-revs /foo r3:4'
+ ' include-merged-revisions ...'), ' ...')
+ self.assertEqual(self.result, ('/foo', 3, 4, True))
+
+ def test_get_mergeinfo(self):
+ self.assertRaises(svn_server_log_parse.Error,
+ self.parse, 'get-mergeinfo')
+ self.assertRaises(svn_server_log_parse.Error,
+ self.parse, 'get-mergeinfo /foo')
+ self.assertRaises(svn_server_log_parse.Error,
+ self.parse, 'get-mergeinfo (/foo')
+ self.assertRaises(svn_server_log_parse.Error,
+ self.parse, 'get-mergeinfo (/foo /bar')
+ self.assertRaises(svn_server_log_parse.Error,
+ self.parse, 'get-mergeinfo (/foo)')
+ self.assertRaises(svn_server_log_parse.BadMergeinfoInheritanceError,
+ self.parse, 'get-mergeinfo (/foo) bork')
+ self.assertEqual(self.parse('get-mergeinfo (/foo) explicit'), '')
+ self.assertEqual(self.result, (['/foo'],
+ svn.core.svn_mergeinfo_explicit, False))
+ self.assertEqual(self.parse('get-mergeinfo (/foo /bar) inherited ...'),
+ ' ...')
+ self.assertEqual(self.result, (['/foo', '/bar'],
+ svn.core.svn_mergeinfo_inherited, False))
+ self.assertEqual(self.result, (['/foo', '/bar'],
+ svn.core.svn_mergeinfo_inherited, False))
+
+ def test_log(self):
+ self.assertRaises(svn_server_log_parse.Error, self.parse, 'log')
+ self.assertRaises(svn_server_log_parse.Error,
+ self.parse, 'log /foo')
+ self.assertRaises(svn_server_log_parse.Error,
+ self.parse, 'log (/foo)')
+ self.assertEqual(self.parse('log (/foo) r3:4'
+ ' include-merged-revisions'), '')
+ self.assertEqual(self.result,
+ (['/foo'], 3, 4, 0, False, False, True, []))
+ self.assertEqual(self.parse('log (/foo /bar) r3:4 revprops=all ...'),
+ ' ...')
+ self.assertEqual(self.result,
+ (['/foo', '/bar'], 3, 4, 0, False, False, False, None))
+ self.assertEqual(self.parse('log (/foo) r3:4 revprops=(a b) ...'),
+ ' ...')
+ self.assertEqual(self.result,
+ (['/foo'], 3, 4, 0, False, False, False, ['a', 'b']))
+ self.assertEqual(self.parse('log (/foo) r8:1 limit=3'), '')
+ self.assertEqual(self.result,
+ (['/foo'], 8, 1, 3, False, False, False, []))
+
+ def test_check_path(self):
+ self.assertRaises(svn_server_log_parse.Error, self.parse, 'check-path')
+ self.assertEqual(self.parse('check-path /foo@9'), '')
+ self.assertEqual(self.result, ('/foo', 9))
+
+ def test_stat(self):
+ self.assertRaises(svn_server_log_parse.Error, self.parse, 'stat')
+ self.assertEqual(self.parse('stat /foo@9'), '')
+ self.assertEqual(self.result, ('/foo', 9))
+
+ def test_replay(self):
+ self.assertRaises(svn_server_log_parse.Error, self.parse, 'replay')
+ self.assertRaises(svn_server_log_parse.Error,
+ self.parse, 'replay /foo')
+ self.assertRaises(svn_server_log_parse.Error,
+ self.parse, 'replay (/foo) r9')
+ self.assertRaises(svn_server_log_parse.Error,
+ self.parse, 'replay (/foo) r9:10')
+ self.assertEqual(self.parse('replay /foo r9'), '')
+ self.assertEqual(self.result, ('/foo', 9))
+
+ def test_checkout_or_export(self):
+ self.assertRaises(svn_server_log_parse.Error,
+ self.parse, 'checkout-or-export')
+ self.assertRaises(svn_server_log_parse.Error,
+ self.parse, 'checkout-or-export /foo')
+ self.assertEqual(self.parse('checkout-or-export /foo r9'), '')
+ self.assertEqual(self.result, ('/foo', 9, svn.core.svn_depth_unknown))
+ self.assertRaises(svn_server_log_parse.BadDepthError, self.parse,
+ 'checkout-or-export /foo r9 depth=INVALID-DEPTH')
+ self.assertRaises(svn_server_log_parse.BadDepthError, self.parse,
+ 'checkout-or-export /foo r9 depth=bork')
+ self.assertEqual(self.parse('checkout-or-export /foo r9 depth=files .'),
+ ' .')
+ self.assertEqual(self.result, ('/foo', 9, svn.core.svn_depth_files))
+
+ def test_diff_1path(self):
+ self.assertRaises(svn_server_log_parse.Error,
+ self.parse, 'diff')
+ self.assertEqual(self.parse('diff /foo r9:10'), '')
+ self.assertEqual(self.result, ('/foo', 9, 10,
+ svn.core.svn_depth_unknown, False))
+ self.assertEqual(self.parse('diff /foo r9:10'
+ ' ignore-ancestry ...'), ' ...')
+ self.assertEqual(self.result, ('/foo', 9, 10,
+ svn.core.svn_depth_unknown, True))
+ self.assertEqual(self.parse('diff /foo r9:10 depth=files'), '')
+ self.assertEqual(self.result, ('/foo', 9, 10,
+ svn.core.svn_depth_files, False))
+
+ def test_diff_2paths(self):
+ self.assertEqual(self.parse('diff /foo@9 /bar@10'), '')
+ self.assertEqual(self.result, ('/foo', 9, '/bar', 10,
+ svn.core.svn_depth_unknown, False))
+ self.assertEqual(self.parse('diff /foo@9 /bar@10'
+ ' ignore-ancestry ...'), ' ...')
+ self.assertEqual(self.result, ('/foo', 9, '/bar', 10,
+ svn.core.svn_depth_unknown, True))
+ self.assertEqual(self.parse('diff /foo@9 /bar@10'
+ ' depth=files ignore-ancestry'), '')
+ self.assertEqual(self.result, ('/foo', 9, '/bar', 10,
+ svn.core.svn_depth_files, True))
+
+ def test_status(self):
+ self.assertRaises(svn_server_log_parse.Error,
+ self.parse, 'status')
+ self.assertRaises(svn_server_log_parse.Error,
+ self.parse, 'status /foo')
+ self.assertEqual(self.parse('status /foo r9'), '')
+ self.assertEqual(self.result, ('/foo', 9, svn.core.svn_depth_unknown))
+ self.assertRaises(svn_server_log_parse.BadDepthError, self.parse,
+ 'status /foo r9 depth=INVALID-DEPTH')
+ self.assertRaises(svn_server_log_parse.BadDepthError, self.parse,
+ 'status /foo r9 depth=bork')
+ self.assertEqual(self.parse('status /foo r9 depth=files .'),
+ ' .')
+ self.assertEqual(self.result, ('/foo', 9, svn.core.svn_depth_files))
+
+ def test_switch(self):
+ self.assertEqual(self.parse('switch /foo /bar@10 ...'), ' ...')
+ self.assertEqual(self.result, ('/foo', '/bar', 10,
+ svn.core.svn_depth_unknown))
+ self.assertEqual(self.parse('switch /foo /bar@10'
+ ' depth=files'), '')
+ self.assertEqual(self.result, ('/foo', '/bar', 10,
+ svn.core.svn_depth_files))
+
+ def test_update(self):
+ self.assertRaises(svn_server_log_parse.Error,
+ self.parse, 'update')
+ self.assertRaises(svn_server_log_parse.Error,
+ self.parse, 'update /foo')
+ self.assertEqual(self.parse('update /foo r9'), '')
+ self.assertEqual(self.result, ('/foo', 9, svn.core.svn_depth_unknown,
+ False))
+ self.assertRaises(svn_server_log_parse.BadDepthError, self.parse,
+ 'update /foo r9 depth=INVALID-DEPTH')
+ self.assertRaises(svn_server_log_parse.BadDepthError, self.parse,
+ 'update /foo r9 depth=bork')
+ self.assertEqual(self.parse('update /foo r9 depth=files .'), ' .')
+ self.assertEqual(self.result, ('/foo', 9, svn.core.svn_depth_files,
+ False))
+ self.assertEqual(self.parse('update /foo r9 send-copyfrom-args .'),
+ ' .')
+ self.assertEqual(self.result, ('/foo', 9, svn.core.svn_depth_unknown,
+ True))
+
+if __name__ == '__main__':
+ if len(sys.argv) == 1:
+ # No arguments so run the unit tests.
+ unittest.main()
+ sys.stderr.write('unittest.main failed to exit\n')
+ sys.exit(2)
+
+ # Use the argument as the path to a log file to test against.
+
+ def uri_encode(s):
+ # urllib.parse.quote encodes :&@ characters, svn does not.
+ return urllib_parse_quote(s, safe='/:&@')
+
+ # Define a class to reconstruct the SVN-ACTION string.
+ class Test(svn_server_log_parse.Parser):
+ def handle_unknown(self, line):
+ sys.stderr.write('unknown log line at %d:\n%s\n' % (self.linenum,
+ line))
+ sys.exit(2)
+
+ def handle_open(self, protocol, capabilities, path, ra_client, client):
+ capabilities = ' '.join(capabilities)
+ if ra_client is None:
+ ra_client = '-'
+ if client is None:
+ client = '-'
+ path = uri_encode(path)
+ self.action = ('open %d cap=(%s) %s %s %s'
+ % (protocol, capabilities, path, ra_client, client))
+
+ def handle_reparent(self, path):
+ path = uri_encode(path)
+ self.action = 'reparent ' + path
+
+ def handle_get_latest_rev(self):
+ self.action = 'get-latest-rev'
+
+ def handle_get_dated_rev(self, date):
+ self.action = 'get-dated-rev ' + date
+
+ def handle_commit(self, revision):
+ self.action = 'commit r%d' % (revision,)
+
+ def handle_get_dir(self, path, revision, text, props):
+ path = uri_encode(path)
+ self.action = 'get-dir %s r%d' % (path, revision)
+ if text:
+ self.action += ' text'
+ if props:
+ self.action += ' props'
+
+ def handle_get_file(self, path, revision, text, props):
+ path = uri_encode(path)
+ self.action = 'get-file %s r%d' % (path, revision)
+ if text:
+ self.action += ' text'
+ if props:
+ self.action += ' props'
+
+ def handle_lock(self, paths, steal):
+ paths = [uri_encode(x) for x in paths]
+ self.action = 'lock (%s)' % (' '.join(paths),)
+ if steal:
+ self.action += ' steal'
+
+ def handle_change_rev_prop(self, revision, revprop):
+ revprop = uri_encode(revprop)
+ self.action = 'change-rev-prop r%d %s' % (revision, revprop)
+
+ def handle_rev_prop(self, revision, revprop):
+ revprop = uri_encode(revprop)
+ self.action = 'rev-prop r%d %s' % (revision, revprop)
+
+ def handle_rev_proplist(self, revision):
+ self.action = 'rev-proplist r%d' % (revision,)
+
+ def handle_unlock(self, paths, break_lock):
+ paths = [uri_encode(x) for x in paths]
+ self.action = 'unlock (%s)' % (' '.join(paths),)
+ if break_lock:
+ self.action += ' break'
+
+ def handle_get_lock(self, path):
+ path = uri_encode(path)
+ self.action = 'get-lock ' + path
+
+ def handle_get_locks(self, path):
+ self.action = 'get-locks ' + path
+ path = uri_encode(path)
+
+ def handle_get_locations(self, path, revisions):
+ path = uri_encode(path)
+ self.action = ('get-locations %s (%s)'
+ % (path, ' '.join([str(x) for x in revisions])))
+
+ def handle_get_location_segments(self, path, peg, left, right):
+ path = uri_encode(path)
+ self.action = 'get-location-segments %s@%d r%d:%d' % (path, peg,
+ left, right)
+
+ def handle_get_file_revs(self, path, left, right,
+ include_merged_revisions):
+ path = uri_encode(path)
+ self.action = 'get-file-revs %s r%d:%d' % (path, left, right)
+ if include_merged_revisions:
+ self.action += ' include-merged-revisions'
+
+ def handle_get_mergeinfo(self, paths, inheritance, include_descendants):
+ paths = [uri_encode(x) for x in paths]
+ self.action = ('get-mergeinfo (%s) %s'
+ % (' '.join(paths),
+ svn.core.svn_inheritance_to_word(inheritance)))
+ if include_descendants:
+ self.action += ' include-descendants'
+
+ def handle_log(self, paths, left, right, limit, discover_changed_paths,
+ strict, include_merged_revisions, revprops):
+ paths = [uri_encode(x) for x in paths]
+ self.action = 'log (%s) r%d:%d' % (' '.join(paths),
+ left, right)
+ if limit != 0:
+ self.action += ' limit=%d' % (limit,)
+ if discover_changed_paths:
+ self.action += ' discover-changed-paths'
+ if strict:
+ self.action += ' strict'
+ if include_merged_revisions:
+ self.action += ' include-merged-revisions'
+ if revprops is None:
+ self.action += ' revprops=all'
+ elif len(revprops) > 0:
+ revprops = [uri_encode(x) for x in revprops]
+ self.action += ' revprops=(%s)' % (' '.join(revprops),)
+
+ def handle_check_path(self, path, revision):
+ path = uri_encode(path)
+ self.action = 'check-path %s@%d' % (path, revision)
+
+ def handle_stat(self, path, revision):
+ path = uri_encode(path)
+ self.action = 'stat %s@%d' % (path, revision)
+
+ def handle_replay(self, path, revision):
+ path = uri_encode(path)
+ self.action = 'replay %s r%d' % (path, revision)
+
+ def maybe_depth(self, depth):
+ if depth != svn.core.svn_depth_unknown:
+ self.action += ' depth=%s' % (
+ svn.core.svn_depth_to_word(depth),)
+
+ def handle_checkout_or_export(self, path, revision, depth):
+ path = uri_encode(path)
+ self.action = 'checkout-or-export %s r%d' % (path, revision)
+ self.maybe_depth(depth)
+
+ def handle_diff_1path(self, path, left, right,
+ depth, ignore_ancestry):
+ path = uri_encode(path)
+ self.action = 'diff %s r%d:%d' % (path, left, right)
+ self.maybe_depth(depth)
+ if ignore_ancestry:
+ self.action += ' ignore-ancestry'
+
+ def handle_diff_2paths(self, from_path, from_rev,
+ to_path, to_rev,
+ depth, ignore_ancestry):
+ from_path = uri_encode(from_path)
+ to_path = uri_encode(to_path)
+ self.action = ('diff %s@%d %s@%d'
+ % (from_path, from_rev, to_path, to_rev))
+ self.maybe_depth(depth)
+ if ignore_ancestry:
+ self.action += ' ignore-ancestry'
+
+ def handle_status(self, path, revision, depth):
+ path = uri_encode(path)
+ self.action = 'status %s r%d' % (path, revision)
+ self.maybe_depth(depth)
+
+ def handle_switch(self, from_path, to_path, to_rev, depth):
+ from_path = uri_encode(from_path)
+ to_path = uri_encode(to_path)
+ self.action = ('switch %s %s@%d'
+ % (from_path, to_path, to_rev))
+ self.maybe_depth(depth)
+
+ def handle_update(self, path, revision, depth, send_copyfrom_args):
+ path = uri_encode(path)
+ self.action = 'update %s r%d' % (path, revision)
+ self.maybe_depth(depth)
+ if send_copyfrom_args:
+ self.action += ' send-copyfrom-args'
+
+ tmp = tempfile.mktemp()
+ try:
+ fp = open(tmp, 'w')
+ parser = Test()
+ parser.linenum = 0
+ log_file = sys.argv[1]
+ log_type = None
+ for line in open(log_file):
+ if log_type is None:
+ # Figure out which log type we have.
+ if re.match(r'\d+ \d\d\d\d-', line):
+ log_type = 'svnserve'
+ elif re.match(r'\[\d\d/', line):
+ log_type = 'mod_dav_svn'
+ else:
+ sys.stderr.write("unknown log format in '%s'"
+ % (log_file,))
+ sys.exit(3)
+ sys.stderr.write('parsing %s log...\n' % (log_type,))
+ sys.stderr.flush()
+
+ words = line.split()
+ if log_type == 'svnserve':
+ # Skip over PID, date, client address, username, and repos.
+ if words[5].startswith('ERR'):
+ # Skip error lines.
+ fp.write(line)
+ continue
+ leading = ' '.join(words[:5])
+ action = ' '.join(words[5:])
+ else:
+ # Find the SVN-ACTION string from the CustomLog format
+ # davautocheck.sh uses. If that changes, this will need
+ # to as well. Currently it's
+ # %t %u %{SVN-REPOS-NAME}e %{SVN-ACTION}e
+ leading = ' '.join(words[:4])
+ action = ' '.join(words[4:])
+
+ # Parse the action and write the reconstructed action to
+ # the temporary file. Ignore the returned trailing text,
+ # as we have none in the davautocheck ops log.
+ parser.linenum += 1
+ try:
+ parser.parse(action)
+ except svn_server_log_parse.Error:
+ sys.stderr.write('error at line %d: %s\n'
+ % (parser.linenum, action))
+ raise
+ fp.write(leading + ' ' + parser.action + '\n')
+ fp.close()
+ # Check differences between original and reconstructed files
+ # (should be identical).
+ result = os.spawnlp(os.P_WAIT, 'diff', 'diff', '-u', log_file, tmp)
+ if result == 0:
+ sys.stderr.write('OK\n')
+ sys.exit(result)
+ finally:
+ try:
+ os.unlink(tmp)
+ except Exception, e:
+ sys.stderr.write('os.unlink(tmp): %s\n' % (e,))