summaryrefslogtreecommitdiff
path: root/tools/server-side
diff options
context:
space:
mode:
Diffstat (limited to 'tools/server-side')
-rwxr-xr-xtools/server-side/fsfs-reshard.py2
-rw-r--r--tools/server-side/fsfs-stats.c2181
-rw-r--r--tools/server-side/mod_dontdothat/mod_dontdothat.c41
-rw-r--r--tools/server-side/svn-populate-node-origins-index.c2
-rw-r--r--tools/server-side/svn-rep-sharing-stats.c530
-rw-r--r--tools/server-side/svnauthz.c147
-rwxr-xr-xtools/server-side/svnpredumpfilter.py51
-rwxr-xr-xtools/server-side/svnpubsub/commit-hook.py43
-rw-r--r--tools/server-side/svnpubsub/daemonize.py193
-rwxr-xr-xtools/server-side/svnpubsub/rc.d/svnpubsub.freebsd2
-rwxr-xr-xtools/server-side/svnpubsub/revprop-change-hook.py90
-rw-r--r--tools/server-side/svnpubsub/svnpubsub/client.py54
-rw-r--r--tools/server-side/svnpubsub/svnpubsub/server.py114
-rw-r--r--tools/server-side/svnpubsub/svnpubsub/util.py36
-rwxr-xr-xtools/server-side/svnpubsub/svnwcsub.py46
-rwxr-xr-xtools/server-side/svnpubsub/watcher.py5
16 files changed, 567 insertions, 2970 deletions
diff --git a/tools/server-side/fsfs-reshard.py b/tools/server-side/fsfs-reshard.py
index 16d2fcd..bd82080 100755
--- a/tools/server-side/fsfs-reshard.py
+++ b/tools/server-side/fsfs-reshard.py
@@ -46,7 +46,7 @@
# under the License.
# ====================================================================
#
-# $HeadURL: http://svn.apache.org/repos/asf/subversion/branches/1.8.x/tools/server-side/fsfs-reshard.py $
+# $HeadURL: https://svn.apache.org/repos/asf/subversion/branches/1.9.x/tools/server-side/fsfs-reshard.py $
# $LastChangedDate: 2009-11-16 19:07:17 +0000 (Mon, 16 Nov 2009) $
# $LastChangedBy: hwright $
# $LastChangedRevision: 880911 $
diff --git a/tools/server-side/fsfs-stats.c b/tools/server-side/fsfs-stats.c
deleted file mode 100644
index 80a09f9..0000000
--- a/tools/server-side/fsfs-stats.c
+++ /dev/null
@@ -1,2181 +0,0 @@
-/* fsfs-stats.c -- gather size statistics on FSFS repositories
- *
- * ====================================================================
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- * ====================================================================
- */
-
-
-#include <assert.h>
-
-#include <apr.h>
-#include <apr_general.h>
-#include <apr_file_io.h>
-#include <apr_poll.h>
-
-#include "svn_pools.h"
-#include "svn_diff.h"
-#include "svn_io.h"
-#include "svn_utf.h"
-#include "svn_dirent_uri.h"
-#include "svn_sorts.h"
-#include "svn_delta.h"
-#include "svn_hash.h"
-#include "svn_cache_config.h"
-
-#include "private/svn_string_private.h"
-#include "private/svn_subr_private.h"
-#include "private/svn_dep_compat.h"
-#include "private/svn_cache.h"
-
-#ifndef _
-#define _(x) x
-#endif
-
-#define ERROR_TAG "fsfs-stats: "
-
-/* We group representations into 2x2 different kinds plus one default:
- * [dir / file] x [text / prop]. The assignment is done by the first node
- * that references the respective representation.
- */
-typedef enum rep_kind_t
-{
- /* The representation is _directly_ unused, i.e. not referenced by any
- * noderev. However, some other representation may use it as delta base.
- * null value. Should not occur in real-word repositories. */
- unused_rep,
-
- /* a properties on directory rep */
- dir_property_rep,
-
- /* a properties on file rep */
- file_property_rep,
-
- /* a directory rep */
- dir_rep,
-
- /* a file rep */
- file_rep
-} rep_kind_t;
-
-/* A representation fragment.
- */
-typedef struct representation_t
-{
- /* absolute offset in the file */
- apr_size_t offset;
-
- /* item length in bytes */
- apr_size_t size;
-
- /* item length after de-deltification */
- apr_size_t expanded_size;
-
- /* deltification base, or NULL if there is none */
- struct representation_t *delta_base;
-
- /* revision that contains this representation
- * (may be referenced by other revisions, though) */
- svn_revnum_t revision;
-
- /* number of nodes that reference this representation */
- apr_uint32_t ref_count;
-
- /* length of the PLAIN / DELTA line in the source file in bytes */
- apr_uint16_t header_size;
-
- /* classification of the representation. values of rep_kind_t */
- char kind;
-
- /* the source content has a PLAIN header, so we may simply copy the
- * source content into the target */
- char is_plain;
-
-} representation_t;
-
-/* Represents a single revision.
- * There will be only one instance per revision. */
-typedef struct revision_info_t
-{
- /* number of this revision */
- svn_revnum_t revision;
-
- /* pack file offset (manifest value), 0 for non-packed files */
- apr_size_t offset;
-
- /* offset of the changes list relative to OFFSET */
- apr_size_t changes;
-
- /* length of the changes list on bytes */
- apr_size_t changes_len;
-
- /* offset of the changes list relative to OFFSET */
- apr_size_t change_count;
-
- /* first offset behind the revision data in the pack file (file length
- * for non-packed revs) */
- apr_size_t end;
-
- /* number of directory noderevs in this revision */
- apr_size_t dir_noderev_count;
-
- /* number of file noderevs in this revision */
- apr_size_t file_noderev_count;
-
- /* total size of directory noderevs (i.e. the structs - not the rep) */
- apr_size_t dir_noderev_size;
-
- /* total size of file noderevs (i.e. the structs - not the rep) */
- apr_size_t file_noderev_size;
-
- /* all representation_t of this revision (in no particular order),
- * i.e. those that point back to this struct */
- apr_array_header_t *representations;
-} revision_info_t;
-
-/* Data type to identify a representation. It will be used to address
- * cached combined (un-deltified) windows.
- */
-typedef struct window_cache_key_t
-{
- /* revision of the representation */
- svn_revnum_t revision;
-
- /* its offset */
- apr_size_t offset;
-} window_cache_key_t;
-
-/* Description of one large representation. It's content will be reused /
- * overwritten when it gets replaced by an even larger representation.
- */
-typedef struct large_change_info_t
-{
- /* size of the (deltified) representation */
- apr_size_t size;
-
- /* revision of the representation */
- svn_revnum_t revision;
-
- /* node path. "" for unused instances */
- svn_stringbuf_t *path;
-} large_change_info_t;
-
-/* Container for the largest representations found so far. The capacity
- * is fixed and entries will be inserted by reusing the last one and
- * reshuffling the entry pointers.
- */
-typedef struct largest_changes_t
-{
- /* number of entries allocated in CHANGES */
- apr_size_t count;
-
- /* size of the smallest change */
- apr_size_t min_size;
-
- /* changes kept in this struct */
- large_change_info_t **changes;
-} largest_changes_t;
-
-/* Information we gather per size bracket.
- */
-typedef struct histogram_line_t
-{
- /* number of item that fall into this bracket */
- apr_int64_t count;
-
- /* sum of values in this bracket */
- apr_int64_t sum;
-} histogram_line_t;
-
-/* A histogram of 64 bit integer values.
- */
-typedef struct histogram_t
-{
- /* total sum over all brackets */
- histogram_line_t total;
-
- /* one bracket per binary step.
- * line[i] is the 2^(i-1) <= x < 2^i bracket */
- histogram_line_t lines[64];
-} histogram_t;
-
-/* Information we collect per file ending.
- */
-typedef struct extension_info_t
-{
- /* file extension, including leading "."
- * "(none)" in the container for files w/o extension. */
- const char *extension;
-
- /* histogram of representation sizes */
- histogram_t rep_histogram;
-
- /* histogram of sizes of changed files */
- histogram_t node_histogram;
-} extension_info_t;
-
-/* Root data structure containing all information about a given repository.
- */
-typedef struct fs_fs_t
-{
- /* repository to reorg */
- const char *path;
-
- /* revision to start at (must be 0, ATM) */
- svn_revnum_t start_revision;
-
- /* FSFS format number */
- int format;
-
- /* highest revision number in the repo */
- svn_revnum_t max_revision;
-
- /* first non-packed revision */
- svn_revnum_t min_unpacked_rev;
-
- /* sharing size*/
- int max_files_per_dir;
-
- /* all revisions */
- apr_array_header_t *revisions;
-
- /* empty representation.
- * Used as a dummy base for DELTA reps without base. */
- representation_t *null_base;
-
- /* undeltified txdelta window cache */
- svn_cache__t *window_cache;
-
- /* track the biggest contributors to repo size */
- largest_changes_t *largest_changes;
-
- /* history of representation sizes */
- histogram_t rep_size_histogram;
-
- /* history of sizes of changed nodes */
- histogram_t node_size_histogram;
-
- /* history of unused representations */
- histogram_t unused_rep_histogram;
-
- /* history of sizes of changed files */
- histogram_t file_histogram;
-
- /* history of sizes of file representations */
- histogram_t file_rep_histogram;
-
- /* history of sizes of changed file property sets */
- histogram_t file_prop_histogram;
-
- /* history of sizes of file property representations */
- histogram_t file_prop_rep_histogram;
-
- /* history of sizes of changed directories (in bytes) */
- histogram_t dir_histogram;
-
- /* history of sizes of directories representations */
- histogram_t dir_rep_histogram;
-
- /* history of sizes of changed directories property sets */
- histogram_t dir_prop_histogram;
-
- /* history of sizes of directories property representations */
- histogram_t dir_prop_rep_histogram;
-
- /* extension -> extension_info_t* map */
- apr_hash_t *by_extension;
-} fs_fs_t;
-
-/* Return the rev pack folder for revision REV in FS.
- */
-static const char *
-get_pack_folder(fs_fs_t *fs,
- svn_revnum_t rev,
- apr_pool_t *pool)
-{
- return apr_psprintf(pool, "%s/db/revs/%ld.pack",
- fs->path, rev / fs->max_files_per_dir);
-}
-
-/* Return the path of the file containing revision REV in FS.
- */
-static const char *
-rev_or_pack_file_name(fs_fs_t *fs,
- svn_revnum_t rev,
- apr_pool_t *pool)
-{
- return fs->min_unpacked_rev > rev
- ? svn_dirent_join(get_pack_folder(fs, rev, pool), "pack", pool)
- : apr_psprintf(pool, "%s/db/revs/%ld/%ld", fs->path,
- rev / fs->max_files_per_dir, rev);
-}
-
-/* Open the file containing revision REV in FS and return it in *FILE.
- */
-static svn_error_t *
-open_rev_or_pack_file(apr_file_t **file,
- fs_fs_t *fs,
- svn_revnum_t rev,
- apr_pool_t *pool)
-{
- return svn_io_file_open(file,
- rev_or_pack_file_name(fs, rev, pool),
- APR_READ | APR_BUFFERED,
- APR_OS_DEFAULT,
- pool);
-}
-
-/* Return the length of FILE in *FILE_SIZE. Use POOL for allocations.
-*/
-static svn_error_t *
-get_file_size(apr_off_t *file_size,
- apr_file_t *file,
- apr_pool_t *pool)
-{
- apr_finfo_t finfo;
-
- SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE, file, pool));
-
- *file_size = finfo.size;
- return SVN_NO_ERROR;
-}
-
-/* Get the file content of revision REVISION in FS and return it in *CONTENT.
- * Read the LEN bytes starting at file OFFSET. When provided, use FILE as
- * packed or plain rev file.
- * Use POOL for temporary allocations.
- */
-static svn_error_t *
-get_content(svn_stringbuf_t **content,
- apr_file_t *file,
- fs_fs_t *fs,
- svn_revnum_t revision,
- apr_off_t offset,
- apr_size_t len,
- apr_pool_t *pool)
-{
- apr_pool_t * file_pool = svn_pool_create(pool);
- apr_size_t large_buffer_size = 0x10000;
-
- if (file == NULL)
- SVN_ERR(open_rev_or_pack_file(&file, fs, revision, file_pool));
-
- *content = svn_stringbuf_create_ensure(len, pool);
- (*content)->len = len;
-
-#if APR_VERSION_AT_LEAST(1,3,0)
- /* for better efficiency use larger buffers on large reads */
- if ( (len >= large_buffer_size)
- && (apr_file_buffer_size_get(file) < large_buffer_size))
- apr_file_buffer_set(file,
- apr_palloc(apr_file_pool_get(file),
- large_buffer_size),
- large_buffer_size);
-#endif
-
- SVN_ERR(svn_io_file_seek(file, APR_SET, &offset, pool));
- SVN_ERR(svn_io_file_read_full2(file, (*content)->data, len,
- NULL, NULL, pool));
- svn_pool_destroy(file_pool);
-
- return SVN_NO_ERROR;
-}
-
-/* In *RESULT, return the cached txdelta window stored in REPRESENTATION
- * within FS. If that has not been found in cache, return NULL.
- * Allocate the result in POOL.
- */
-static svn_error_t *
-get_cached_window(svn_stringbuf_t **result,
- fs_fs_t *fs,
- representation_t *representation,
- apr_pool_t *pool)
-{
- svn_boolean_t found = FALSE;
- window_cache_key_t key;
- key.revision = representation->revision;
- key.offset = representation->offset;
-
- *result = NULL;
- return svn_error_trace(svn_cache__get((void**)result, &found,
- fs->window_cache,
- &key, pool));
-}
-
-/* Cache the undeltified txdelta WINDOW for REPRESENTATION within FS.
- * Use POOL for temporaries.
- */
-static svn_error_t *
-set_cached_window(fs_fs_t *fs,
- representation_t *representation,
- svn_stringbuf_t *window,
- apr_pool_t *pool)
-{
- /* select entry */
- window_cache_key_t key;
- key.revision = representation->revision;
- key.offset = representation->offset;
-
- return svn_error_trace(svn_cache__set(fs->window_cache, &key, window,
- pool));
-}
-
-/* Initialize the LARGEST_CHANGES member in FS with a capacity of COUNT
- * entries. Use POOL for allocations.
- */
-static void
-initialize_largest_changes(fs_fs_t *fs,
- apr_size_t count,
- apr_pool_t *pool)
-{
- apr_size_t i;
-
- fs->largest_changes = apr_pcalloc(pool, sizeof(*fs->largest_changes));
- fs->largest_changes->count = count;
- fs->largest_changes->min_size = 1;
- fs->largest_changes->changes
- = apr_palloc(pool, count * sizeof(*fs->largest_changes->changes));
-
- /* allocate *all* entries before the path stringbufs. This increases
- * cache locality and enhances performance significantly. */
- for (i = 0; i < count; ++i)
- fs->largest_changes->changes[i]
- = apr_palloc(pool, sizeof(**fs->largest_changes->changes));
-
- /* now initialize them and allocate the stringbufs */
- for (i = 0; i < count; ++i)
- {
- fs->largest_changes->changes[i]->size = 0;
- fs->largest_changes->changes[i]->revision = SVN_INVALID_REVNUM;
- fs->largest_changes->changes[i]->path
- = svn_stringbuf_create_ensure(1024, pool);
- }
-}
-
-/* Add entry for SIZE to HISTOGRAM.
- */
-static void
-add_to_histogram(histogram_t *histogram,
- apr_int64_t size)
-{
- apr_int64_t shift = 0;
-
- while (((apr_int64_t)(1) << shift) <= size)
- shift++;
-
- histogram->total.count++;
- histogram->total.sum += size;
- histogram->lines[(apr_size_t)shift].count++;
- histogram->lines[(apr_size_t)shift].sum += size;
-}
-
-/* Update data aggregators in FS with this representation of type KIND, on-
- * disk REP_SIZE and expanded node size EXPANDED_SIZE for PATH in REVSION.
- */
-static void
-add_change(fs_fs_t *fs,
- apr_int64_t rep_size,
- apr_int64_t expanded_size,
- svn_revnum_t revision,
- const char *path,
- rep_kind_t kind)
-{
- /* identify largest reps */
- if (rep_size >= fs->largest_changes->min_size)
- {
- apr_size_t i;
- large_change_info_t *info
- = fs->largest_changes->changes[fs->largest_changes->count - 1];
- info->size = rep_size;
- info->revision = revision;
- svn_stringbuf_set(info->path, path);
-
- /* linear insertion but not too bad since count is low and insertions
- * near the end are more likely than close to front */
- for (i = fs->largest_changes->count - 1; i > 0; --i)
- if (fs->largest_changes->changes[i-1]->size >= rep_size)
- break;
- else
- fs->largest_changes->changes[i] = fs->largest_changes->changes[i-1];
-
- fs->largest_changes->changes[i] = info;
- fs->largest_changes->min_size
- = fs->largest_changes->changes[fs->largest_changes->count-1]->size;
- }
-
- /* global histograms */
- add_to_histogram(&fs->rep_size_histogram, rep_size);
- add_to_histogram(&fs->node_size_histogram, expanded_size);
-
- /* specific histograms by type */
- switch (kind)
- {
- case unused_rep: add_to_histogram(&fs->unused_rep_histogram,
- rep_size);
- break;
- case dir_property_rep: add_to_histogram(&fs->dir_prop_rep_histogram,
- rep_size);
- add_to_histogram(&fs->dir_prop_histogram,
- expanded_size);
- break;
- case file_property_rep: add_to_histogram(&fs->file_prop_rep_histogram,
- rep_size);
- add_to_histogram(&fs->file_prop_histogram,
- expanded_size);
- break;
- case dir_rep: add_to_histogram(&fs->dir_rep_histogram,
- rep_size);
- add_to_histogram(&fs->dir_histogram,
- expanded_size);
- break;
- case file_rep: add_to_histogram(&fs->file_rep_histogram,
- rep_size);
- add_to_histogram(&fs->file_histogram,
- expanded_size);
- break;
- }
-
- /* by extension */
- if (kind == file_rep)
- {
- /* determine extension */
- extension_info_t *info;
- const char * file_name = strrchr(path, '/');
- const char * extension = file_name ? strrchr(file_name, '.') : NULL;
-
- if (extension == NULL || extension == file_name + 1)
- extension = "(none)";
-
- /* get / auto-insert entry for this extension */
- info = apr_hash_get(fs->by_extension, extension, APR_HASH_KEY_STRING);
- if (info == NULL)
- {
- apr_pool_t *pool = apr_hash_pool_get(fs->by_extension);
- info = apr_pcalloc(pool, sizeof(*info));
- info->extension = apr_pstrdup(pool, extension);
-
- apr_hash_set(fs->by_extension, info->extension,
- APR_HASH_KEY_STRING, info);
- }
-
- /* update per-extension histogram */
- add_to_histogram(&info->node_histogram, expanded_size);
- add_to_histogram(&info->rep_histogram, rep_size);
- }
-}
-
-/* Given rev pack PATH in FS, read the manifest file and return the offsets
- * in *MANIFEST. Use POOL for allocations.
- */
-static svn_error_t *
-read_manifest(apr_array_header_t **manifest,
- fs_fs_t *fs,
- const char *path,
- apr_pool_t *pool)
-{
- svn_stream_t *manifest_stream;
- apr_pool_t *iterpool;
-
- /* Open the manifest file. */
- SVN_ERR(svn_stream_open_readonly(&manifest_stream,
- svn_dirent_join(path, "manifest", pool),
- pool, pool));
-
- /* While we're here, let's just read the entire manifest file into an array,
- so we can cache the entire thing. */
- iterpool = svn_pool_create(pool);
- *manifest = apr_array_make(pool, fs->max_files_per_dir, sizeof(apr_size_t));
- while (1)
- {
- svn_stringbuf_t *sb;
- svn_boolean_t eof;
- apr_uint64_t val;
- svn_error_t *err;
-
- svn_pool_clear(iterpool);
- SVN_ERR(svn_stream_readline(manifest_stream, &sb, "\n", &eof, iterpool));
- if (eof)
- break;
-
- err = svn_cstring_strtoui64(&val, sb->data, 0, APR_SIZE_MAX, 10);
- if (err)
- return svn_error_createf(SVN_ERR_FS_CORRUPT, err,
- _("Manifest offset '%s' too large"),
- sb->data);
- APR_ARRAY_PUSH(*manifest, apr_size_t) = (apr_size_t)val;
- }
- svn_pool_destroy(iterpool);
-
- return svn_stream_close(manifest_stream);
-}
-
-/* Read header information for the revision stored in FILE_CONTENT (one
- * whole revision). Return the offsets within FILE_CONTENT for the
- * *ROOT_NODEREV, the list of *CHANGES and its len in *CHANGES_LEN.
- * Use POOL for temporary allocations. */
-static svn_error_t *
-read_revision_header(apr_size_t *changes,
- apr_size_t *changes_len,
- apr_size_t *root_noderev,
- svn_stringbuf_t *file_content,
- apr_pool_t *pool)
-{
- char buf[64];
- const char *line;
- char *space;
- apr_uint64_t val;
- apr_size_t len;
-
- /* Read in this last block, from which we will identify the last line. */
- len = sizeof(buf);
- if (len > file_content->len)
- len = file_content->len;
-
- memcpy(buf, file_content->data + file_content->len - len, len);
-
- /* The last byte should be a newline. */
- if (buf[(apr_ssize_t)len - 1] != '\n')
- return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
- _("Revision lacks trailing newline"));
-
- /* Look for the next previous newline. */
- buf[len - 1] = 0;
- line = strrchr(buf, '\n');
- if (line == NULL)
- return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
- _("Final line in revision file longer "
- "than 64 characters"));
-
- space = strchr(line, ' ');
- if (space == NULL)
- return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
- _("Final line in revision file missing space"));
-
- /* terminate the header line */
- *space = 0;
-
- /* extract information */
- SVN_ERR(svn_cstring_strtoui64(&val, line+1, 0, APR_SIZE_MAX, 10));
- *root_noderev = (apr_size_t)val;
- SVN_ERR(svn_cstring_strtoui64(&val, space+1, 0, APR_SIZE_MAX, 10));
- *changes = (apr_size_t)val;
- *changes_len = file_content->len - *changes - (buf + len - line) + 1;
-
- return SVN_NO_ERROR;
-}
-
-/* Read the FSFS format number and sharding size from the format file at
- * PATH and return it in *PFORMAT and *MAX_FILES_PER_DIR respectively.
- * Use POOL for temporary allocations.
- */
-static svn_error_t *
-read_format(int *pformat, int *max_files_per_dir,
- const char *path, apr_pool_t *pool)
-{
- svn_error_t *err;
- apr_file_t *file;
- char buf[80];
- apr_size_t len;
-
- /* open format file and read the first line */
- err = svn_io_file_open(&file, path, APR_READ | APR_BUFFERED,
- APR_OS_DEFAULT, pool);
- if (err && APR_STATUS_IS_ENOENT(err->apr_err))
- {
- /* Treat an absent format file as format 1. Do not try to
- create the format file on the fly, because the repository
- might be read-only for us, or this might be a read-only
- operation, and the spirit of FSFS is to make no changes
- whatseover in read-only operations. See thread starting at
- http://subversion.tigris.org/servlets/ReadMsg?list=dev&msgNo=97600
- for more. */
- svn_error_clear(err);
- *pformat = 1;
- *max_files_per_dir = 0;
-
- return SVN_NO_ERROR;
- }
- SVN_ERR(err);
-
- len = sizeof(buf);
- err = svn_io_read_length_line(file, buf, &len, pool);
- if (err && APR_STATUS_IS_EOF(err->apr_err))
- {
- /* Return a more useful error message. */
- svn_error_clear(err);
- return svn_error_createf(SVN_ERR_BAD_VERSION_FILE_FORMAT, NULL,
- _("Can't read first line of format file '%s'"),
- svn_dirent_local_style(path, pool));
- }
- SVN_ERR(err);
-
- /* Check that the first line contains only digits. */
- SVN_ERR(svn_cstring_atoi(pformat, buf));
-
- /* Set the default values for anything that can be set via an option. */
- *max_files_per_dir = 0;
-
- /* Read any options. */
- while (1)
- {
- len = sizeof(buf);
- err = svn_io_read_length_line(file, buf, &len, pool);
- if (err && APR_STATUS_IS_EOF(err->apr_err))
- {
- /* No more options; that's okay. */
- svn_error_clear(err);
- break;
- }
- SVN_ERR(err);
-
- if (strncmp(buf, "layout ", 7) == 0)
- {
- if (strcmp(buf+7, "linear") == 0)
- {
- *max_files_per_dir = 0;
- continue;
- }
-
- if (strncmp(buf+7, "sharded ", 8) == 0)
- {
- /* Check that the argument is numeric. */
- SVN_ERR(svn_cstring_atoi(max_files_per_dir, buf + 15));
- continue;
- }
- }
-
- return svn_error_createf(SVN_ERR_BAD_VERSION_FILE_FORMAT, NULL,
- _("'%s' contains invalid filesystem format option '%s'"),
- svn_dirent_local_style(path, pool), buf);
- }
-
- return svn_io_file_close(file, pool);
-}
-
-/* Read the content of the file at PATH and return it in *RESULT.
- * Use POOL for temporary allocations.
- */
-static svn_error_t *
-read_number(svn_revnum_t *result, const char *path, apr_pool_t *pool)
-{
- svn_stringbuf_t *content;
- apr_uint64_t number;
-
- SVN_ERR(svn_stringbuf_from_file2(&content, path, pool));
-
- content->data[content->len-1] = 0;
- SVN_ERR(svn_cstring_strtoui64(&number, content->data, 0, LONG_MAX, 10));
- *result = (svn_revnum_t)number;
-
- return SVN_NO_ERROR;
-}
-
-/* Create *FS for the repository at PATH and read the format and size info.
- * Use POOL for temporary allocations.
- */
-static svn_error_t *
-fs_open(fs_fs_t **fs, const char *path, apr_pool_t *pool)
-{
- *fs = apr_pcalloc(pool, sizeof(**fs));
- (*fs)->path = apr_pstrdup(pool, path);
- (*fs)->max_files_per_dir = 1000;
-
- /* Read the FS format number. */
- SVN_ERR(read_format(&(*fs)->format,
- &(*fs)->max_files_per_dir,
- svn_dirent_join(path, "db/format", pool),
- pool));
- if (((*fs)->format != 4) && ((*fs)->format != 6))
- return svn_error_create(SVN_ERR_FS_UNSUPPORTED_FORMAT, NULL, NULL);
-
- /* read size (HEAD) info */
- SVN_ERR(read_number(&(*fs)->min_unpacked_rev,
- svn_dirent_join(path, "db/min-unpacked-rev", pool),
- pool));
- return read_number(&(*fs)->max_revision,
- svn_dirent_join(path, "db/current", pool),
- pool);
-}
-
-/* Utility function that returns true if STRING->DATA matches KEY.
- */
-static svn_boolean_t
-key_matches(svn_string_t *string, const char *key)
-{
- return strcmp(string->data, key) == 0;
-}
-
-/* Comparator used for binary search comparing the absolute file offset
- * of a representation to some other offset. DATA is a *representation_t,
- * KEY is a pointer to an apr_size_t.
- */
-static int
-compare_representation_offsets(const void *data, const void *key)
-{
- apr_ssize_t diff = (*(const representation_t *const *)data)->offset
- - *(const apr_size_t *)key;
-
- /* sizeof(int) may be < sizeof(ssize_t) */
- if (diff < 0)
- return -1;
- return diff > 0 ? 1 : 0;
-}
-
-/* Find the revision_info_t object to the given REVISION in FS and return
- * it in *REVISION_INFO. For performance reasons, we skip the lookup if
- * the info is already provided.
- *
- * In that revision, look for the representation_t object for offset OFFSET.
- * If it already exists, set *IDX to its index in *REVISION_INFO's
- * representations list and return the representation object. Otherwise,
- * set the index to where it must be inserted and return NULL.
- */
-static representation_t *
-find_representation(int *idx,
- fs_fs_t *fs,
- revision_info_t **revision_info,
- svn_revnum_t revision,
- apr_size_t offset)
-{
- revision_info_t *info;
- *idx = -1;
-
- /* first let's find the revision */
- info = revision_info ? *revision_info : NULL;
- if (info == NULL || info->revision != revision)
- {
- info = APR_ARRAY_IDX(fs->revisions,
- revision - fs->start_revision,
- revision_info_t*);
- if (revision_info)
- *revision_info = info;
- }
-
- /* not found -> no result */
- if (info == NULL)
- return NULL;
-
- assert(revision == info->revision);
-
- /* look for the representation */
- *idx = svn_sort__bsearch_lower_bound(&offset,
- info->representations,
- compare_representation_offsets);
- if (*idx < info->representations->nelts)
- {
- /* return the representation, if this is the one we were looking for */
- representation_t *result
- = APR_ARRAY_IDX(info->representations, *idx, representation_t *);
- if (result->offset == offset)
- return result;
- }
-
- /* not parsed, yet */
- return NULL;
-}
-
-/* Read the representation header in FILE_CONTENT at OFFSET. Return its
- * size in *HEADER_SIZE, set *IS_PLAIN if no deltification was used and
- * return the deltification base representation in *REPRESENTATION. If
- * there is none, set it to NULL. Use FS to it look up.
- *
- * Use POOL for allocations and SCRATCH_POOL for temporaries.
- */
-static svn_error_t *
-read_rep_base(representation_t **representation,
- apr_size_t *header_size,
- svn_boolean_t *is_plain,
- fs_fs_t *fs,
- svn_stringbuf_t *file_content,
- apr_size_t offset,
- apr_pool_t *pool,
- apr_pool_t *scratch_pool)
-{
- char *str, *last_str;
- int idx;
- svn_revnum_t revision;
- apr_uint64_t temp;
-
- /* identify representation header (1 line) */
- const char *buffer = file_content->data + offset;
- const char *line_end = strchr(buffer, '\n');
- *header_size = line_end - buffer + 1;
-
- /* check for PLAIN rep */
- if (strncmp(buffer, "PLAIN\n", *header_size) == 0)
- {
- *is_plain = TRUE;
- *representation = NULL;
- return SVN_NO_ERROR;
- }
-
- /* check for DELTA against empty rep */
- *is_plain = FALSE;
- if (strncmp(buffer, "DELTA\n", *header_size) == 0)
- {
- /* This is a delta against the empty stream. */
- *representation = fs->null_base;
- return SVN_NO_ERROR;
- }
-
- str = apr_pstrndup(scratch_pool, buffer, line_end - buffer);
- last_str = str;
-
- /* parse it. */
- str = svn_cstring_tokenize(" ", &last_str);
- str = svn_cstring_tokenize(" ", &last_str);
- SVN_ERR(svn_revnum_parse(&revision, str, NULL));
-
- str = svn_cstring_tokenize(" ", &last_str);
- SVN_ERR(svn_cstring_strtoui64(&temp, str, 0, APR_SIZE_MAX, 10));
-
- /* it should refer to a rep in an earlier revision. Look it up */
- *representation = find_representation(&idx, fs, NULL, revision, (apr_size_t)temp);
- return SVN_NO_ERROR;
-}
-
-/* Parse the representation reference (text: or props:) in VALUE, look
- * it up in FS and return it in *REPRESENTATION. To be able to parse the
- * base rep, we pass the FILE_CONTENT as well.
- *
- * If necessary, allocate the result in POOL; use SCRATCH_POOL for temp.
- * allocations.
- */
-static svn_error_t *
-parse_representation(representation_t **representation,
- fs_fs_t *fs,
- svn_stringbuf_t *file_content,
- svn_string_t *value,
- revision_info_t *revision_info,
- apr_pool_t *pool,
- apr_pool_t *scratch_pool)
-{
- representation_t *result;
- svn_revnum_t revision;
-
- apr_uint64_t offset;
- apr_uint64_t size;
- apr_uint64_t expanded_size;
- int idx;
-
- /* read location (revision, offset) and size */
- char *c = (char *)value->data;
- SVN_ERR(svn_revnum_parse(&revision, svn_cstring_tokenize(" ", &c), NULL));
- SVN_ERR(svn_cstring_strtoui64(&offset, svn_cstring_tokenize(" ", &c), 0, APR_SIZE_MAX, 10));
- SVN_ERR(svn_cstring_strtoui64(&size, svn_cstring_tokenize(" ", &c), 0, APR_SIZE_MAX, 10));
- SVN_ERR(svn_cstring_strtoui64(&expanded_size, svn_cstring_tokenize(" ", &c), 0, APR_SIZE_MAX, 10));
-
- /* look it up */
- result = find_representation(&idx, fs, &revision_info, revision, (apr_size_t)offset);
- if (!result)
- {
- /* not parsed, yet (probably a rep in the same revision).
- * Create a new rep object and determine its base rep as well.
- */
- apr_size_t header_size;
- svn_boolean_t is_plain;
-
- result = apr_pcalloc(pool, sizeof(*result));
- result->revision = revision;
- result->expanded_size = (apr_size_t)(expanded_size ? expanded_size : size);
- result->offset = (apr_size_t)offset;
- result->size = (apr_size_t)size;
- SVN_ERR(read_rep_base(&result->delta_base, &header_size,
- &is_plain, fs, file_content,
- (apr_size_t)offset,
- pool, scratch_pool));
-
- result->header_size = header_size;
- result->is_plain = is_plain;
- svn_sort__array_insert(&result, revision_info->representations, idx);
- }
-
- *representation = result;
-
- return SVN_NO_ERROR;
-}
-
-/* Get the unprocessed (i.e. still deltified) content of REPRESENTATION in
- * FS and return it in *CONTENT. If no NULL, FILE_CONTENT must contain
- * the contents of the revision that also contains the representation.
- * Use POOL for allocations.
- */
-static svn_error_t *
-get_rep_content(svn_stringbuf_t **content,
- fs_fs_t *fs,
- representation_t *representation,
- svn_stringbuf_t *file_content,
- apr_pool_t *pool)
-{
- apr_off_t offset;
- svn_revnum_t revision = representation->revision;
- revision_info_t *revision_info = APR_ARRAY_IDX(fs->revisions,
- revision - fs->start_revision,
- revision_info_t*);
-
- /* not in cache. Is the revision valid at all? */
- if (revision - fs->start_revision > fs->revisions->nelts)
- return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
- _("Unknown revision %ld"), revision);
-
- if (file_content)
- {
- offset = representation->offset
- + representation->header_size;
- *content = svn_stringbuf_ncreate(file_content->data + offset,
- representation->size, pool);
- }
- else
- {
- offset = revision_info->offset
- + representation->offset
- + representation->header_size;
- SVN_ERR(get_content(content, NULL, fs, revision, offset,
- representation->size, pool));
- }
-
- return SVN_NO_ERROR;
-}
-
-
-/* Read the delta window contents of all windows in REPRESENTATION in FS.
- * If no NULL, FILE_CONTENT must contain the contents of the revision that
- * also contains the representation.
- * Return the data as svn_txdelta_window_t* instances in *WINDOWS.
- * Use POOL for allocations.
- */
-static svn_error_t *
-read_windows(apr_array_header_t **windows,
- fs_fs_t *fs,
- representation_t *representation,
- svn_stringbuf_t *file_content,
- apr_pool_t *pool)
-{
- svn_stringbuf_t *content;
- svn_stream_t *stream;
- char version;
- apr_size_t len = sizeof(version);
-
- *windows = apr_array_make(pool, 0, sizeof(svn_txdelta_window_t *));
-
- /* get the whole revision content */
- SVN_ERR(get_rep_content(&content, fs, representation, file_content, pool));
-
- /* create a read stream and position it directly after the rep header */
- content->data += 3;
- content->len -= 3;
- stream = svn_stream_from_stringbuf(content, pool);
- SVN_ERR(svn_stream_read(stream, &version, &len));
-
- /* read the windows from that stream */
- while (TRUE)
- {
- svn_txdelta_window_t *window;
- svn_stream_mark_t *mark;
- char dummy;
-
- len = sizeof(dummy);
- SVN_ERR(svn_stream_mark(stream, &mark, pool));
- SVN_ERR(svn_stream_read(stream, &dummy, &len));
- if (len == 0)
- break;
-
- SVN_ERR(svn_stream_seek(stream, mark));
- SVN_ERR(svn_txdelta_read_svndiff_window(&window, stream, version, pool));
- APR_ARRAY_PUSH(*windows, svn_txdelta_window_t *) = window;
- }
-
- return SVN_NO_ERROR;
-}
-
-/* Get the undeltified representation that is a result of combining all
- * deltas from the current desired REPRESENTATION in FS with its base
- * representation. If no NULL, FILE_CONTENT must contain the contents of
- * the revision that also contains the representation. Store the result
- * in *CONTENT. Use POOL for allocations.
- */
-static svn_error_t *
-get_combined_window(svn_stringbuf_t **content,
- fs_fs_t *fs,
- representation_t *representation,
- svn_stringbuf_t *file_content,
- apr_pool_t *pool)
-{
- int i;
- apr_array_header_t *windows;
- svn_stringbuf_t *base_content, *result;
- const char *source;
- apr_pool_t *sub_pool;
- apr_pool_t *iter_pool;
-
- /* special case: no un-deltification necessary */
- if (representation->is_plain)
- {
- SVN_ERR(get_rep_content(content, fs, representation, file_content,
- pool));
- SVN_ERR(set_cached_window(fs, representation, *content, pool));
- return SVN_NO_ERROR;
- }
-
- /* special case: data already in cache */
- SVN_ERR(get_cached_window(content, fs, representation, pool));
- if (*content)
- return SVN_NO_ERROR;
-
- /* read the delta windows for this representation */
- sub_pool = svn_pool_create(pool);
- iter_pool = svn_pool_create(pool);
- SVN_ERR(read_windows(&windows, fs, representation, file_content, sub_pool));
-
- /* fetch the / create a base content */
- if (representation->delta_base && representation->delta_base->revision)
- SVN_ERR(get_combined_window(&base_content, fs,
- representation->delta_base, NULL, sub_pool));
- else
- base_content = svn_stringbuf_create_empty(sub_pool);
-
- /* apply deltas */
- result = svn_stringbuf_create_empty(pool);
- source = base_content->data;
-
- for (i = 0; i < windows->nelts; ++i)
- {
- svn_txdelta_window_t *window
- = APR_ARRAY_IDX(windows, i, svn_txdelta_window_t *);
- svn_stringbuf_t *buf
- = svn_stringbuf_create_ensure(window->tview_len, iter_pool);
-
- buf->len = window->tview_len;
- svn_txdelta_apply_instructions(window, window->src_ops ? source : NULL,
- buf->data, &buf->len);
-
- svn_stringbuf_appendbytes(result, buf->data, buf->len);
- source += window->sview_len;
-
- svn_pool_clear(iter_pool);
- }
-
- /* cache result and return it */
- SVN_ERR(set_cached_window(fs, representation, result, sub_pool));
- *content = result;
-
- svn_pool_destroy(iter_pool);
- svn_pool_destroy(sub_pool);
-
- return SVN_NO_ERROR;
-}
-
-/* forward declaration */
-static svn_error_t *
-read_noderev(fs_fs_t *fs,
- svn_stringbuf_t *file_content,
- apr_size_t offset,
- revision_info_t *revision_info,
- apr_pool_t *pool,
- apr_pool_t *scratch_pool);
-
-/* Starting at the directory in REPRESENTATION in FILE_CONTENT, read all
- * DAG nodes, directories and representations linked in that tree structure.
- * Store them in FS and REVISION_INFO. Also, read them only once.
- *
- * Use POOL for persistent allocations and SCRATCH_POOL for temporaries.
- */
-static svn_error_t *
-parse_dir(fs_fs_t *fs,
- svn_stringbuf_t *file_content,
- representation_t *representation,
- revision_info_t *revision_info,
- apr_pool_t *pool,
- apr_pool_t *scratch_pool)
-{
- svn_stringbuf_t *text;
- apr_pool_t *iter_pool;
- apr_pool_t *text_pool;
- const char *current;
- const char *revision_key;
- apr_size_t key_len;
-
- /* special case: empty dir rep */
- if (representation == NULL)
- return SVN_NO_ERROR;
-
- /* get the directory as unparsed string */
- iter_pool = svn_pool_create(scratch_pool);
- text_pool = svn_pool_create(scratch_pool);
-
- SVN_ERR(get_combined_window(&text, fs, representation, file_content,
- text_pool));
- current = text->data;
-
- /* calculate some invariants */
- revision_key = apr_psprintf(text_pool, "r%ld/", representation->revision);
- key_len = strlen(revision_key);
-
- /* Parse and process all directory entries. */
- while (*current != 'E')
- {
- char *next;
-
- /* skip "K ???\n<name>\nV ???\n" lines*/
- current = strchr(current, '\n');
- if (current)
- current = strchr(current+1, '\n');
- if (current)
- current = strchr(current+1, '\n');
- next = current ? strchr(++current, '\n') : NULL;
- if (next == NULL)
- return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
- _("Corrupt directory representation in r%ld at offset %ld"),
- representation->revision,
- (long)representation->offset);
-
- /* iff this entry refers to a node in the same revision as this dir,
- * recurse into that node */
- *next = 0;
- current = strstr(current, revision_key);
- if (current)
- {
- /* recurse */
- apr_uint64_t offset;
-
- SVN_ERR(svn_cstring_strtoui64(&offset, current + key_len, 0,
- APR_SIZE_MAX, 10));
- SVN_ERR(read_noderev(fs, file_content, (apr_size_t)offset,
- revision_info, pool, iter_pool));
-
- svn_pool_clear(iter_pool);
- }
- current = next+1;
- }
-
- svn_pool_destroy(iter_pool);
- svn_pool_destroy(text_pool);
- return SVN_NO_ERROR;
-}
-
-/* Starting at the noderev at OFFSET in FILE_CONTENT, read all DAG nodes,
- * directories and representations linked in that tree structure. Store
- * them in FS and REVISION_INFO. Also, read them only once. Return the
- * result in *NODEREV.
- *
- * Use POOL for persistent allocations and SCRATCH_POOL for temporaries.
- */
-static svn_error_t *
-read_noderev(fs_fs_t *fs,
- svn_stringbuf_t *file_content,
- apr_size_t offset,
- revision_info_t *revision_info,
- apr_pool_t *pool,
- apr_pool_t *scratch_pool)
-{
- svn_string_t *line;
- representation_t *text = NULL;
- representation_t *props = NULL;
- apr_size_t start_offset = offset;
- svn_boolean_t is_dir = FALSE;
- const char *path = "???";
-
- scratch_pool = svn_pool_create(scratch_pool);
-
- /* parse the noderev line-by-line until we find an empty line */
- while (1)
- {
- /* for this line, extract key and value. Ignore invalid values */
- svn_string_t key;
- svn_string_t value;
- char *sep;
- const char *start = file_content->data + offset;
- const char *end = strchr(start, '\n');
-
- line = svn_string_ncreate(start, end - start, scratch_pool);
- offset += end - start + 1;
-
- /* empty line -> end of noderev data */
- if (line->len == 0)
- break;
-
- sep = strchr(line->data, ':');
- if (sep == NULL)
- continue;
-
- key.data = line->data;
- key.len = sep - key.data;
- *sep = 0;
-
- if (key.len + 2 > line->len)
- continue;
-
- value.data = sep + 2;
- value.len = line->len - (key.len + 2);
-
- /* translate (key, value) into noderev elements */
- if (key_matches(&key, "type"))
- is_dir = strcmp(value.data, "dir") == 0;
- else if (key_matches(&key, "text"))
- {
- SVN_ERR(parse_representation(&text, fs, file_content,
- &value, revision_info,
- pool, scratch_pool));
-
- /* if we are the first to use this rep, mark it as "text rep" */
- if (++text->ref_count == 1)
- text->kind = is_dir ? dir_rep : file_rep;
- }
- else if (key_matches(&key, "props"))
- {
- SVN_ERR(parse_representation(&props, fs, file_content,
- &value, revision_info,
- pool, scratch_pool));
-
- /* if we are the first to use this rep, mark it as "prop rep" */
- if (++props->ref_count == 1)
- props->kind = is_dir ? dir_property_rep : file_property_rep;
- }
- else if (key_matches(&key, "cpath"))
- path = value.data;
- }
-
- /* record largest changes */
- if (text && text->ref_count == 1)
- add_change(fs, (apr_int64_t)text->size, (apr_int64_t)text->expanded_size,
- text->revision, path, text->kind);
- if (props && props->ref_count == 1)
- add_change(fs, (apr_int64_t)props->size, (apr_int64_t)props->expanded_size,
- props->revision, path, props->kind);
-
- /* if this is a directory and has not been processed, yet, read and
- * process it recursively */
- if (is_dir && text && text->ref_count == 1)
- SVN_ERR(parse_dir(fs, file_content, text, revision_info,
- pool, scratch_pool));
-
- /* update stats */
- if (is_dir)
- {
- revision_info->dir_noderev_size += offset - start_offset;
- revision_info->dir_noderev_count++;
- }
- else
- {
- revision_info->file_noderev_size += offset - start_offset;
- revision_info->file_noderev_count++;
- }
- svn_pool_destroy(scratch_pool);
-
- return SVN_NO_ERROR;
-}
-
-/* Given the unparsed changes list in CHANGES with LEN chars, return the
- * number of changed paths encoded in it.
- */
-static apr_size_t
-get_change_count(const char *changes,
- apr_size_t len)
-{
- apr_size_t lines = 0;
- const char *end = changes + len;
-
- /* line count */
- for (; changes < end; ++changes)
- if (*changes == '\n')
- ++lines;
-
- /* two lines per change */
- return lines / 2;
-}
-
-/* Simple utility to print a REVISION number and make it appear immediately.
- */
-static void
-print_progress(svn_revnum_t revision)
-{
- printf("%8ld", revision);
- fflush(stdout);
-}
-
-/* Read the content of the pack file staring at revision BASE and store it
- * in FS. Use POOL for allocations.
- */
-static svn_error_t *
-read_pack_file(fs_fs_t *fs,
- svn_revnum_t base,
- apr_pool_t *pool)
-{
- apr_array_header_t *manifest = NULL;
- apr_pool_t *local_pool = svn_pool_create(pool);
- apr_pool_t *iter_pool = svn_pool_create(local_pool);
- int i;
- apr_off_t file_size = 0;
- apr_file_t *file;
- const char *pack_folder = get_pack_folder(fs, base, local_pool);
-
- /* parse the manifest file */
- SVN_ERR(read_manifest(&manifest, fs, pack_folder, local_pool));
- if (manifest->nelts != fs->max_files_per_dir)
- return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, NULL);
-
- SVN_ERR(open_rev_or_pack_file(&file, fs, base, local_pool));
- SVN_ERR(get_file_size(&file_size, file, local_pool));
-
- /* process each revision in the pack file */
- for (i = 0; i < manifest->nelts; ++i)
- {
- apr_size_t root_node_offset;
- svn_stringbuf_t *rev_content;
-
- /* create the revision info for the current rev */
- revision_info_t *info = apr_pcalloc(pool, sizeof(*info));
- info->representations = apr_array_make(iter_pool, 4, sizeof(representation_t*));
-
- info->revision = base + i;
- info->offset = APR_ARRAY_IDX(manifest, i, apr_size_t);
- info->end = i+1 < manifest->nelts
- ? APR_ARRAY_IDX(manifest, i+1 , apr_size_t)
- : file_size;
-
- SVN_ERR(get_content(&rev_content, file, fs, info->revision,
- info->offset,
- info->end - info->offset,
- iter_pool));
-
- SVN_ERR(read_revision_header(&info->changes,
- &info->changes_len,
- &root_node_offset,
- rev_content,
- iter_pool));
-
- info->change_count
- = get_change_count(rev_content->data + info->changes,
- info->changes_len);
- SVN_ERR(read_noderev(fs, rev_content,
- root_node_offset, info, pool, iter_pool));
-
- info->representations = apr_array_copy(pool, info->representations);
- APR_ARRAY_PUSH(fs->revisions, revision_info_t*) = info;
-
- /* destroy temps */
- svn_pool_clear(iter_pool);
- }
-
- /* one more pack file processed */
- print_progress(base);
- svn_pool_destroy(local_pool);
-
- return SVN_NO_ERROR;
-}
-
-/* Read the content of the file for REVSION and store its contents in FS.
- * Use POOL for allocations.
- */
-static svn_error_t *
-read_revision_file(fs_fs_t *fs,
- svn_revnum_t revision,
- apr_pool_t *pool)
-{
- apr_size_t root_node_offset;
- apr_pool_t *local_pool = svn_pool_create(pool);
- svn_stringbuf_t *rev_content;
- revision_info_t *info = apr_pcalloc(pool, sizeof(*info));
- apr_off_t file_size = 0;
- apr_file_t *file;
-
- /* read the whole pack file into memory */
- SVN_ERR(open_rev_or_pack_file(&file, fs, revision, local_pool));
- SVN_ERR(get_file_size(&file_size, file, local_pool));
-
- /* create the revision info for the current rev */
- info->representations = apr_array_make(pool, 4, sizeof(representation_t*));
-
- info->revision = revision;
- info->offset = 0;
- info->end = file_size;
-
- SVN_ERR(get_content(&rev_content, file, fs, revision, 0, file_size,
- local_pool));
-
- SVN_ERR(read_revision_header(&info->changes,
- &info->changes_len,
- &root_node_offset,
- rev_content,
- local_pool));
-
- /* put it into our containers */
- APR_ARRAY_PUSH(fs->revisions, revision_info_t*) = info;
-
- info->change_count
- = get_change_count(rev_content->data + info->changes,
- info->changes_len);
-
- /* parse the revision content recursively. */
- SVN_ERR(read_noderev(fs, rev_content,
- root_node_offset, info,
- pool, local_pool));
-
- /* show progress every 1000 revs or so */
- if (revision % fs->max_files_per_dir == 0)
- print_progress(revision);
-
- svn_pool_destroy(local_pool);
-
- return SVN_NO_ERROR;
-}
-
-/* Read the repository at PATH beginning with revision START_REVISION and
- * return the result in *FS. Allocate caches with MEMSIZE bytes total
- * capacity. Use POOL for non-cache allocations.
- */
-static svn_error_t *
-read_revisions(fs_fs_t **fs,
- const char *path,
- svn_revnum_t start_revision,
- apr_size_t memsize,
- apr_pool_t *pool)
-{
- svn_revnum_t revision;
- svn_cache_config_t cache_config = *svn_cache_config_get();
-
- /* determine cache sizes */
-
- if (memsize < 100)
- memsize = 100;
-
- cache_config.cache_size = memsize * 1024 * 1024;
- svn_cache_config_set(&cache_config);
-
- SVN_ERR(fs_open(fs, path, pool));
-
- /* create data containers and caches */
- (*fs)->start_revision = start_revision
- - (start_revision % (*fs)->max_files_per_dir);
- (*fs)->revisions = apr_array_make(pool,
- (*fs)->max_revision + 1 - (*fs)->start_revision,
- sizeof(revision_info_t *));
- (*fs)->null_base = apr_pcalloc(pool, sizeof(*(*fs)->null_base));
- initialize_largest_changes(*fs, 64, pool);
- (*fs)->by_extension = apr_hash_make(pool);
-
- SVN_ERR(svn_cache__create_membuffer_cache(&(*fs)->window_cache,
- svn_cache__get_global_membuffer_cache(),
- NULL, NULL,
- sizeof(window_cache_key_t),
- "", FALSE, pool));
-
- /* read all packed revs */
- for ( revision = start_revision
- ; revision < (*fs)->min_unpacked_rev
- ; revision += (*fs)->max_files_per_dir)
- SVN_ERR(read_pack_file(*fs, revision, pool));
-
- /* read non-packed revs */
- for ( ; revision <= (*fs)->max_revision; ++revision)
- SVN_ERR(read_revision_file(*fs, revision, pool));
-
- return SVN_NO_ERROR;
-}
-
-/* Compression statistics we collect over a given set of representations.
- */
-typedef struct rep_pack_stats_t
-{
- /* number of representations */
- apr_int64_t count;
-
- /* total size after deltification (i.e. on disk size) */
- apr_int64_t packed_size;
-
- /* total size after de-deltification (i.e. plain text size) */
- apr_int64_t expanded_size;
-
- /* total on-disk header size */
- apr_int64_t overhead_size;
-} rep_pack_stats_t;
-
-/* Statistics we collect over a given set of representations.
- * We group them into shared and non-shared ("unique") reps.
- */
-typedef struct representation_stats_t
-{
- /* stats over all representations */
- rep_pack_stats_t total;
-
- /* stats over those representations with ref_count == 1 */
- rep_pack_stats_t uniques;
-
- /* stats over those representations with ref_count > 1 */
- rep_pack_stats_t shared;
-
- /* sum of all ref_counts */
- apr_int64_t references;
-
- /* sum of ref_count * expanded_size,
- * i.e. total plaintext content if there was no rep sharing */
- apr_int64_t expanded_size;
-} representation_stats_t;
-
-/* Basic statistics we collect over a given set of noderevs.
- */
-typedef struct node_stats_t
-{
- /* number of noderev structs */
- apr_int64_t count;
-
- /* their total size on disk (structs only) */
- apr_int64_t size;
-} node_stats_t;
-
-/* Accumulate stats of REP in STATS.
- */
-static void
-add_rep_pack_stats(rep_pack_stats_t *stats,
- representation_t *rep)
-{
- stats->count++;
-
- stats->packed_size += rep->size;
- stats->expanded_size += rep->expanded_size;
- stats->overhead_size += rep->header_size + 7 /* ENDREP\n */;
-}
-
-/* Accumulate stats of REP in STATS.
- */
-static void
-add_rep_stats(representation_stats_t *stats,
- representation_t *rep)
-{
- add_rep_pack_stats(&stats->total, rep);
- if (rep->ref_count == 1)
- add_rep_pack_stats(&stats->uniques, rep);
- else
- add_rep_pack_stats(&stats->shared, rep);
-
- stats->references += rep->ref_count;
- stats->expanded_size += rep->ref_count * rep->expanded_size;
-}
-
-/* Print statistics for the given group of representations to console.
- * Use POOL for allocations.
- */
-static void
-print_rep_stats(representation_stats_t *stats,
- apr_pool_t *pool)
-{
- printf(_("%20s bytes in %12s reps\n"
- "%20s bytes in %12s shared reps\n"
- "%20s bytes expanded size\n"
- "%20s bytes expanded shared size\n"
- "%20s bytes with rep-sharing off\n"
- "%20s shared references\n"),
- svn__i64toa_sep(stats->total.packed_size, ',', pool),
- svn__i64toa_sep(stats->total.count, ',', pool),
- svn__i64toa_sep(stats->shared.packed_size, ',', pool),
- svn__i64toa_sep(stats->shared.count, ',', pool),
- svn__i64toa_sep(stats->total.expanded_size, ',', pool),
- svn__i64toa_sep(stats->shared.expanded_size, ',', pool),
- svn__i64toa_sep(stats->expanded_size, ',', pool),
- svn__i64toa_sep(stats->references - stats->total.count, ',', pool));
-}
-
-/* Print the (used) contents of CHANGES. Use POOL for allocations.
- */
-static void
-print_largest_reps(largest_changes_t *changes,
- apr_pool_t *pool)
-{
- apr_size_t i;
- for (i = 0; i < changes->count && changes->changes[i]->size; ++i)
- printf(_("%12s r%-8ld %s\n"),
- svn__i64toa_sep(changes->changes[i]->size, ',', pool),
- changes->changes[i]->revision,
- changes->changes[i]->path->data);
-}
-
-/* Print the non-zero section of HISTOGRAM to console.
- * Use POOL for allocations.
- */
-static void
-print_histogram(histogram_t *histogram,
- apr_pool_t *pool)
-{
- int first = 0;
- int last = 63;
- int i;
-
- /* identify non-zero range */
- while (last > 0 && histogram->lines[last].count == 0)
- --last;
-
- while (first <= last && histogram->lines[first].count == 0)
- ++first;
-
- /* display histogram lines */
- for (i = last; i >= first; --i)
- printf(_(" [2^%2d, 2^%2d) %15s (%2d%%) bytes in %12s (%2d%%) items\n"),
- i-1, i,
- svn__i64toa_sep(histogram->lines[i].sum, ',', pool),
- (int)(histogram->lines[i].sum * 100 / histogram->total.sum),
- svn__i64toa_sep(histogram->lines[i].count, ',', pool),
- (int)(histogram->lines[i].count * 100 / histogram->total.count));
-}
-
-/* COMPARISON_FUNC for svn_sort__hash.
- * Sort extension_info_t values by total count in descending order.
- */
-static int
-compare_count(const svn_sort__item_t *a,
- const svn_sort__item_t *b)
-{
- const extension_info_t *lhs = a->value;
- const extension_info_t *rhs = b->value;
- apr_int64_t diff = lhs->node_histogram.total.count
- - rhs->node_histogram.total.count;
-
- return diff > 0 ? -1 : (diff < 0 ? 1 : 0);
-}
-
-/* COMPARISON_FUNC for svn_sort__hash.
- * Sort extension_info_t values by total uncompressed size in descending order.
- */
-static int
-compare_node_size(const svn_sort__item_t *a,
- const svn_sort__item_t *b)
-{
- const extension_info_t *lhs = a->value;
- const extension_info_t *rhs = b->value;
- apr_int64_t diff = lhs->node_histogram.total.sum
- - rhs->node_histogram.total.sum;
-
- return diff > 0 ? -1 : (diff < 0 ? 1 : 0);
-}
-
-/* COMPARISON_FUNC for svn_sort__hash.
- * Sort extension_info_t values by total prep count in descending order.
- */
-static int
-compare_rep_size(const svn_sort__item_t *a,
- const svn_sort__item_t *b)
-{
- const extension_info_t *lhs = a->value;
- const extension_info_t *rhs = b->value;
- apr_int64_t diff = lhs->rep_histogram.total.sum
- - rhs->rep_histogram.total.sum;
-
- return diff > 0 ? -1 : (diff < 0 ? 1 : 0);
-}
-
-/* Return an array of extension_info_t* for the (up to) 16 most prominent
- * extensions in FS according to the sort criterion COMPARISON_FUNC.
- * Allocate results in POOL.
- */
-static apr_array_header_t *
-get_by_extensions(fs_fs_t *fs,
- int (*comparison_func)(const svn_sort__item_t *,
- const svn_sort__item_t *),
- apr_pool_t *pool)
-{
- /* sort all data by extension */
- apr_array_header_t *sorted
- = svn_sort__hash(fs->by_extension, comparison_func, pool);
-
- /* select the top (first) 16 entries */
- int count = MIN(sorted->nelts, 16);
- apr_array_header_t *result
- = apr_array_make(pool, count, sizeof(extension_info_t*));
- int i;
-
- for (i = 0; i < count; ++i)
- APR_ARRAY_PUSH(result, extension_info_t*)
- = APR_ARRAY_IDX(sorted, i, svn_sort__item_t).value;
-
- return result;
-}
-
-/* Add all extension_info_t* entries of TO_ADD not already in TARGET to
- * TARGET.
- */
-static void
-merge_by_extension(apr_array_header_t *target,
- apr_array_header_t *to_add)
-{
- int i, k, count;
-
- count = target->nelts;
- for (i = 0; i < to_add->nelts; ++i)
- {
- extension_info_t *info = APR_ARRAY_IDX(to_add, i, extension_info_t *);
- for (k = 0; k < count; ++k)
- if (info == APR_ARRAY_IDX(target, k, extension_info_t *))
- break;
-
- if (k == count)
- APR_ARRAY_PUSH(target, extension_info_t*) = info;
- }
-}
-
-/* Print the (up to) 16 extensions in FS with the most changes.
- * Use POOL for allocations.
- */
-static void
-print_extensions_by_changes(fs_fs_t *fs,
- apr_pool_t *pool)
-{
- apr_array_header_t *data = get_by_extensions(fs, compare_count, pool);
- apr_int64_t sum = 0;
- int i;
-
- for (i = 0; i < data->nelts; ++i)
- {
- extension_info_t *info = APR_ARRAY_IDX(data, i, extension_info_t *);
- sum += info->node_histogram.total.count;
- printf(_(" %9s %12s (%2d%%) changes\n"),
- info->extension,
- svn__i64toa_sep(info->node_histogram.total.count, ',', pool),
- (int)(info->node_histogram.total.count * 100 /
- fs->file_histogram.total.count));
- }
-
- printf(_(" %9s %12s (%2d%%) changes\n"),
- "(others)",
- svn__i64toa_sep(fs->file_histogram.total.count - sum, ',', pool),
- (int)((fs->file_histogram.total.count - sum) * 100 /
- fs->file_histogram.total.count));
-}
-
-/* Print the (up to) 16 extensions in FS with the largest total size of
- * changed file content. Use POOL for allocations.
- */
-static void
-print_extensions_by_nodes(fs_fs_t *fs,
- apr_pool_t *pool)
-{
- apr_array_header_t *data = get_by_extensions(fs, compare_node_size, pool);
- apr_int64_t sum = 0;
- int i;
-
- for (i = 0; i < data->nelts; ++i)
- {
- extension_info_t *info = APR_ARRAY_IDX(data, i, extension_info_t *);
- sum += info->node_histogram.total.sum;
- printf(_(" %9s %20s (%2d%%) bytes\n"),
- info->extension,
- svn__i64toa_sep(info->node_histogram.total.sum, ',', pool),
- (int)(info->node_histogram.total.sum * 100 /
- fs->file_histogram.total.sum));
- }
-
- printf(_(" %9s %20s (%2d%%) bytes\n"),
- "(others)",
- svn__i64toa_sep(fs->file_histogram.total.sum - sum, ',', pool),
- (int)((fs->file_histogram.total.sum - sum) * 100 /
- fs->file_histogram.total.sum));
-}
-
-/* Print the (up to) 16 extensions in FS with the largest total size of
- * changed file content. Use POOL for allocations.
- */
-static void
-print_extensions_by_reps(fs_fs_t *fs,
- apr_pool_t *pool)
-{
- apr_array_header_t *data = get_by_extensions(fs, compare_rep_size, pool);
- apr_int64_t sum = 0;
- int i;
-
- for (i = 0; i < data->nelts; ++i)
- {
- extension_info_t *info = APR_ARRAY_IDX(data, i, extension_info_t *);
- sum += info->rep_histogram.total.sum;
- printf(_(" %9s %20s (%2d%%) bytes\n"),
- info->extension,
- svn__i64toa_sep(info->rep_histogram.total.sum, ',', pool),
- (int)(info->rep_histogram.total.sum * 100 /
- fs->rep_size_histogram.total.sum));
- }
-
- printf(_(" %9s %20s (%2d%%) bytes\n"),
- "(others)",
- svn__i64toa_sep(fs->rep_size_histogram.total.sum - sum, ',', pool),
- (int)((fs->rep_size_histogram.total.sum - sum) * 100 /
- fs->rep_size_histogram.total.sum));
-}
-
-/* Print per-extension histograms for the most frequent extensions in FS.
- * Use POOL for allocations. */
-static void
-print_histograms_by_extension(fs_fs_t *fs,
- apr_pool_t *pool)
-{
- apr_array_header_t *data = get_by_extensions(fs, compare_count, pool);
- int i;
-
- merge_by_extension(data, get_by_extensions(fs, compare_node_size, pool));
- merge_by_extension(data, get_by_extensions(fs, compare_rep_size, pool));
-
- for (i = 0; i < data->nelts; ++i)
- {
- extension_info_t *info = APR_ARRAY_IDX(data, i, extension_info_t *);
- printf("\nHistogram of '%s' file sizes:\n", info->extension);
- print_histogram(&info->node_histogram, pool);
- printf("\nHistogram of '%s' file representation sizes:\n",
- info->extension);
- print_histogram(&info->rep_histogram, pool);
- }
-}
-
-/* Post-process stats for FS and print them to the console.
- * Use POOL for allocations.
- */
-static void
-print_stats(fs_fs_t *fs,
- apr_pool_t *pool)
-{
- int i, k;
-
- /* initialize stats to collect */
- representation_stats_t file_rep_stats = { { 0 } };
- representation_stats_t dir_rep_stats = { { 0 } };
- representation_stats_t file_prop_rep_stats = { { 0 } };
- representation_stats_t dir_prop_rep_stats = { { 0 } };
- representation_stats_t total_rep_stats = { { 0 } };
-
- node_stats_t dir_node_stats = { 0 };
- node_stats_t file_node_stats = { 0 };
- node_stats_t total_node_stats = { 0 };
-
- apr_int64_t total_size = 0;
- apr_int64_t change_count = 0;
- apr_int64_t change_len = 0;
-
- /* aggregate info from all revisions */
- for (i = 0; i < fs->revisions->nelts; ++i)
- {
- revision_info_t *revision = APR_ARRAY_IDX(fs->revisions, i,
- revision_info_t *);
-
- /* data gathered on a revision level */
- change_count += revision->change_count;
- change_len += revision->changes_len;
- total_size += revision->end - revision->offset;
-
- dir_node_stats.count += revision->dir_noderev_count;
- dir_node_stats.size += revision->dir_noderev_size;
- file_node_stats.count += revision->file_noderev_count;
- file_node_stats.size += revision->file_noderev_size;
- total_node_stats.count += revision->dir_noderev_count
- + revision->file_noderev_count;
- total_node_stats.size += revision->dir_noderev_size
- + revision->file_noderev_size;
-
- /* process representations */
- for (k = 0; k < revision->representations->nelts; ++k)
- {
- representation_t *rep = APR_ARRAY_IDX(revision->representations,
- k, representation_t *);
-
- /* accumulate in the right bucket */
- switch(rep->kind)
- {
- case file_rep:
- add_rep_stats(&file_rep_stats, rep);
- break;
- case dir_rep:
- add_rep_stats(&dir_rep_stats, rep);
- break;
- case file_property_rep:
- add_rep_stats(&file_prop_rep_stats, rep);
- break;
- case dir_property_rep:
- add_rep_stats(&dir_prop_rep_stats, rep);
- break;
- default:
- break;
- }
-
- add_rep_stats(&total_rep_stats, rep);
- }
- }
-
- /* print results */
- printf("\nGlobal statistics:\n");
- printf(_("%20s bytes in %12s revisions\n"
- "%20s bytes in %12s changes\n"
- "%20s bytes in %12s node revision records\n"
- "%20s bytes in %12s representations\n"
- "%20s bytes expanded representation size\n"
- "%20s bytes with rep-sharing off\n"),
- svn__i64toa_sep(total_size, ',', pool),
- svn__i64toa_sep(fs->revisions->nelts, ',', pool),
- svn__i64toa_sep(change_len, ',', pool),
- svn__i64toa_sep(change_count, ',', pool),
- svn__i64toa_sep(total_node_stats.size, ',', pool),
- svn__i64toa_sep(total_node_stats.count, ',', pool),
- svn__i64toa_sep(total_rep_stats.total.packed_size, ',', pool),
- svn__i64toa_sep(total_rep_stats.total.count, ',', pool),
- svn__i64toa_sep(total_rep_stats.total.expanded_size, ',', pool),
- svn__i64toa_sep(total_rep_stats.expanded_size, ',', pool));
-
- printf("\nNoderev statistics:\n");
- printf(_("%20s bytes in %12s nodes total\n"
- "%20s bytes in %12s directory noderevs\n"
- "%20s bytes in %12s file noderevs\n"),
- svn__i64toa_sep(total_node_stats.size, ',', pool),
- svn__i64toa_sep(total_node_stats.count, ',', pool),
- svn__i64toa_sep(dir_node_stats.size, ',', pool),
- svn__i64toa_sep(dir_node_stats.count, ',', pool),
- svn__i64toa_sep(file_node_stats.size, ',', pool),
- svn__i64toa_sep(file_node_stats.count, ',', pool));
-
- printf("\nRepresentation statistics:\n");
- printf(_("%20s bytes in %12s representations total\n"
- "%20s bytes in %12s directory representations\n"
- "%20s bytes in %12s file representations\n"
- "%20s bytes in %12s directory property representations\n"
- "%20s bytes in %12s file property representations\n"
- "%20s bytes in header & footer overhead\n"),
- svn__i64toa_sep(total_rep_stats.total.packed_size, ',', pool),
- svn__i64toa_sep(total_rep_stats.total.count, ',', pool),
- svn__i64toa_sep(dir_rep_stats.total.packed_size, ',', pool),
- svn__i64toa_sep(dir_rep_stats.total.count, ',', pool),
- svn__i64toa_sep(file_rep_stats.total.packed_size, ',', pool),
- svn__i64toa_sep(file_rep_stats.total.count, ',', pool),
- svn__i64toa_sep(dir_prop_rep_stats.total.packed_size, ',', pool),
- svn__i64toa_sep(dir_prop_rep_stats.total.count, ',', pool),
- svn__i64toa_sep(file_prop_rep_stats.total.packed_size, ',', pool),
- svn__i64toa_sep(file_prop_rep_stats.total.count, ',', pool),
- svn__i64toa_sep(total_rep_stats.total.overhead_size, ',', pool));
-
- printf("\nDirectory representation statistics:\n");
- print_rep_stats(&dir_rep_stats, pool);
- printf("\nFile representation statistics:\n");
- print_rep_stats(&file_rep_stats, pool);
- printf("\nDirectory property representation statistics:\n");
- print_rep_stats(&dir_prop_rep_stats, pool);
- printf("\nFile property representation statistics:\n");
- print_rep_stats(&file_prop_rep_stats, pool);
-
- printf("\nLargest representations:\n");
- print_largest_reps(fs->largest_changes, pool);
- printf("\nExtensions by number of changes:\n");
- print_extensions_by_changes(fs, pool);
- printf("\nExtensions by size of changed files:\n");
- print_extensions_by_nodes(fs, pool);
- printf("\nExtensions by size of representations:\n");
- print_extensions_by_reps(fs, pool);
-
- printf("\nHistogram of expanded node sizes:\n");
- print_histogram(&fs->node_size_histogram, pool);
- printf("\nHistogram of representation sizes:\n");
- print_histogram(&fs->rep_size_histogram, pool);
- printf("\nHistogram of file sizes:\n");
- print_histogram(&fs->file_histogram, pool);
- printf("\nHistogram of file representation sizes:\n");
- print_histogram(&fs->file_rep_histogram, pool);
- printf("\nHistogram of file property sizes:\n");
- print_histogram(&fs->file_prop_histogram, pool);
- printf("\nHistogram of file property representation sizes:\n");
- print_histogram(&fs->file_prop_rep_histogram, pool);
- printf("\nHistogram of directory sizes:\n");
- print_histogram(&fs->dir_histogram, pool);
- printf("\nHistogram of directory representation sizes:\n");
- print_histogram(&fs->dir_rep_histogram, pool);
- printf("\nHistogram of directory property sizes:\n");
- print_histogram(&fs->dir_prop_histogram, pool);
- printf("\nHistogram of directory property representation sizes:\n");
- print_histogram(&fs->dir_prop_rep_histogram, pool);
-
- print_histograms_by_extension(fs, pool);
-}
-
-/* Write tool usage info text to OSTREAM using PROGNAME as a prefix and
- * POOL for allocations.
- */
-static void
-print_usage(svn_stream_t *ostream, const char *progname,
- apr_pool_t *pool)
-{
- svn_error_clear(svn_stream_printf(ostream, pool,
- "\n"
- "Usage: %s <repo> [cachesize]\n"
- "\n"
- "Read the repository at local path <repo> starting at revision 0,\n"
- "count statistical information and write that data to stdout.\n"
- "Use up to [cachesize] MB of memory for caching. This does not include\n"
- "temporary representation of the repository structure, i.e. the actual\n"
- "memory may be considerably higher. If not given, defaults to 100 MB.\n",
- progname));
-}
-
-/* linear control flow */
-int main(int argc, const char *argv[])
-{
- apr_pool_t *pool;
- svn_stream_t *ostream;
- svn_error_t *svn_err;
- const char *repo_path = NULL;
- svn_revnum_t start_revision = 0;
- apr_size_t memsize = 100;
- apr_uint64_t temp = 0;
- fs_fs_t *fs;
-
- apr_initialize();
- atexit(apr_terminate);
-
- pool = apr_allocator_owner_get(svn_pool_create_allocator(FALSE));
-
- svn_err = svn_stream_for_stdout(&ostream, pool);
- if (svn_err)
- {
- svn_handle_error2(svn_err, stdout, FALSE, ERROR_TAG);
- return 2;
- }
-
- if (argc < 2 || argc > 3)
- {
- print_usage(ostream, argv[0], pool);
- return 2;
- }
-
- if (argc == 3)
- {
- svn_err = svn_cstring_strtoui64(&temp, argv[2], 0, APR_SIZE_MAX, 10);
- if (svn_err)
- {
- print_usage(ostream, argv[0], pool);
- svn_error_clear(svn_err);
- return 2;
- }
-
- memsize = (apr_size_t)temp;
- }
-
- repo_path = svn_dirent_canonicalize(argv[1], pool);
- start_revision = 0;
-
- printf("Reading revisions\n");
- svn_err = read_revisions(&fs, repo_path, start_revision, memsize, pool);
- printf("\n");
-
- if (svn_err)
- {
- svn_handle_error2(svn_err, stdout, FALSE, ERROR_TAG);
- return 2;
- }
-
- print_stats(fs, pool);
-
- return 0;
-}
diff --git a/tools/server-side/mod_dontdothat/mod_dontdothat.c b/tools/server-side/mod_dontdothat/mod_dontdothat.c
index b4801ed..b939ca7 100644
--- a/tools/server-side/mod_dontdothat/mod_dontdothat.c
+++ b/tools/server-side/mod_dontdothat/mod_dontdothat.c
@@ -40,7 +40,15 @@
#include "svn_path.h"
#include "private/svn_fspath.h"
-module AP_MODULE_DECLARE_DATA dontdothat_module;
+extern module AP_MODULE_DECLARE_DATA dontdothat_module;
+
+#ifndef XML_VERSION_AT_LEAST
+#define XML_VERSION_AT_LEAST(major,minor,patch) \
+(((major) < XML_MAJOR_VERSION) \
+ || ((major) == XML_MAJOR_VERSION && (minor) < XML_MINOR_VERSION) \
+ || ((major) == XML_MAJOR_VERSION && (minor) == XML_MINOR_VERSION && \
+ (patch) <= XML_MICRO_VERSION))
+#endif /* XML_VERSION_AT_LEAST */
typedef struct dontdothat_config_rec {
const char *config_file;
@@ -551,6 +559,31 @@ end_element(void *baton, const char *name)
}
}
+#if XML_VERSION_AT_LEAST(1, 95, 8)
+static void
+expat_entity_declaration(void *userData,
+ const XML_Char *entityName,
+ int is_parameter_entity,
+ const XML_Char *value,
+ int value_length,
+ const XML_Char *base,
+ const XML_Char *systemId,
+ const XML_Char *publicId,
+ const XML_Char *notationName)
+{
+ dontdothat_filter_ctx *ctx = userData;
+
+ /* Stop the parser if an entity declaration is hit. */
+ XML_StopParser(ctx->xmlp, 0 /* resumable */);
+}
+#else
+/* A noop default_handler. */
+static void
+expat_default_handler(void *userData, const XML_Char *s, int len)
+{
+}
+#endif
+
static svn_boolean_t
is_valid_wildcard(const char *wc)
{
@@ -696,6 +729,12 @@ dontdothat_insert_filters(request_rec *r)
XML_SetElementHandler(ctx->xmlp, start_element, end_element);
XML_SetCharacterDataHandler(ctx->xmlp, cdata);
+#if XML_VERSION_AT_LEAST(1, 95, 8)
+ XML_SetEntityDeclHandler(ctx->xmlp, expat_entity_declaration);
+#else
+ XML_SetDefaultHandler(ctx->xmlp, expat_default_handler);
+#endif
+
ap_add_input_filter("DONTDOTHAT_FILTER", ctx, r, r->connection);
}
}
diff --git a/tools/server-side/svn-populate-node-origins-index.c b/tools/server-side/svn-populate-node-origins-index.c
index b9762c4..5d74c0c 100644
--- a/tools/server-side/svn-populate-node-origins-index.c
+++ b/tools/server-side/svn-populate-node-origins-index.c
@@ -122,7 +122,7 @@ build_index(const char *repos_path, apr_pool_t *pool)
apr_pool_t *subpool;
/* Open the repository. */
- SVN_ERR(svn_repos_open2(&repos, repos_path, NULL, pool));
+ SVN_ERR(svn_repos_open3(&repos, repos_path, NULL, pool, pool));
/* Get a filesystem object. */
fs = svn_repos_fs(repos);
diff --git a/tools/server-side/svn-rep-sharing-stats.c b/tools/server-side/svn-rep-sharing-stats.c
deleted file mode 100644
index f610409..0000000
--- a/tools/server-side/svn-rep-sharing-stats.c
+++ /dev/null
@@ -1,530 +0,0 @@
-/*
- * ====================================================================
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- * ====================================================================
- */
-
-#include <apr_signal.h>
-
-#include "svn_cmdline.h"
-#include "svn_dirent_uri.h"
-#include "svn_pools.h"
-#include "svn_repos.h"
-#include "svn_opt.h"
-#include "svn_utf.h"
-#include "svn_version.h"
-
-#include "../../subversion/libsvn_fs_fs/fs.h"
-#include "../../subversion/libsvn_fs_fs/fs_fs.h"
-/* for svn_fs_fs__id_* (used in assertions only) */
-#include "../../subversion/libsvn_fs_fs/id.h"
-
-#include "private/svn_cmdline_private.h"
-
-#include "svn_private_config.h"
-
-
-/** Help messages and version checking. **/
-
-static svn_error_t *
-version(apr_pool_t *pool)
-{
- return svn_opt_print_help4(NULL, "svn-rep-sharing-stats", TRUE, FALSE, FALSE,
- NULL, NULL, NULL, NULL, NULL, NULL, pool);
-}
-
-static void
-usage(apr_pool_t *pool)
-{
- svn_error_clear(svn_cmdline_fprintf
- (stderr, pool,
- _("Type 'svn-rep-sharing-stats --help' for usage.\n")));
-}
-
-
-static void
-help(const apr_getopt_option_t *options, apr_pool_t *pool)
-{
- svn_error_clear
- (svn_cmdline_fprintf
- (stdout, pool,
- _("usage: svn-rep-sharing-stats [OPTIONS] REPOS_PATH\n\n"
- " Prints the reference count statistics for representations\n"
- " in an FSFS repository.\n"
- "\n"
- " At least one of the options --data/--prop/--both must be specified.\n"
- "\n"
- "Valid options:\n")));
- while (options->description)
- {
- const char *optstr;
- svn_opt_format_option(&optstr, options, TRUE, pool);
- svn_error_clear(svn_cmdline_fprintf(stdout, pool, " %s\n", optstr));
- ++options;
- }
- svn_error_clear(svn_cmdline_fprintf(stdout, pool, "\n"));
- exit(0);
-}
-
-
-/* Version compatibility check */
-static svn_error_t *
-check_lib_versions(void)
-{
- static const svn_version_checklist_t checklist[] =
- {
- /* ### check FSFS version */
- { "svn_subr", svn_subr_version },
- { "svn_fs", svn_fs_version },
- { NULL, NULL }
- };
- SVN_VERSION_DEFINE(my_version);
-
- return svn_error_trace(svn_ver_check_list(&my_version, checklist));
-}
-
-
-
-/** Cancellation stuff, ### copied from subversion/svn/main.c */
-
-/* A flag to see if we've been cancelled by the client or not. */
-static volatile sig_atomic_t cancelled = FALSE;
-
-/* A signal handler to support cancellation. */
-static void
-signal_handler(int signum)
-{
- apr_signal(signum, SIG_IGN);
- cancelled = TRUE;
-}
-
-/* Our cancellation callback. */
-static svn_error_t *
-svn_cl__check_cancel(void *baton)
-{
- if (cancelled)
- return svn_error_create(SVN_ERR_CANCELLED, NULL, _("Caught signal"));
- else
- return SVN_NO_ERROR;
-}
-
-static svn_cancel_func_t cancel_func = svn_cl__check_cancel;
-
-static void set_up_cancellation(void)
-{
- /* Set up our cancellation support. */
- apr_signal(SIGINT, signal_handler);
-#ifdef SIGBREAK
- /* SIGBREAK is a Win32 specific signal generated by ctrl-break. */
- apr_signal(SIGBREAK, signal_handler);
-#endif
-#ifdef SIGHUP
- apr_signal(SIGHUP, signal_handler);
-#endif
-#ifdef SIGTERM
- apr_signal(SIGTERM, signal_handler);
-#endif
-
-#ifdef SIGPIPE
- /* Disable SIGPIPE generation for the platforms that have it. */
- apr_signal(SIGPIPE, SIG_IGN);
-#endif
-
-#ifdef SIGXFSZ
- /* Disable SIGXFSZ generation for the platforms that have it, otherwise
- * working with large files when compiled against an APR that doesn't have
- * large file support will crash the program, which is uncool. */
- apr_signal(SIGXFSZ, SIG_IGN);
-#endif
-}
-
-
-/** Program-specific code. **/
-enum {
- OPT_VERSION = SVN_OPT_FIRST_LONGOPT_ID,
- OPT_DATA,
- OPT_PROP,
- OPT_BOTH
-};
-
-static svn_error_t *check_experimental(void)
-{
- if (getenv("SVN_REP_SHARING_STATS_IS_EXPERIMENTAL"))
- return SVN_NO_ERROR;
-
- return svn_error_create(APR_EGENERAL, NULL,
- "This code is experimental and should not "
- "be used on live data.");
-}
-
-/* The parts of a rep that determine whether it's being shared. */
-struct key_t
-{
- svn_revnum_t revision;
- apr_off_t offset;
-};
-
-/* What we need to know about a rep. */
-struct value_t
-{
- svn_checksum_t *sha1_checksum;
- apr_uint64_t refcount;
-};
-
-/* Increment records[rep] if both are non-NULL and REP contains a sha1.
- * Allocate keys and values in RESULT_POOL.
- */
-static svn_error_t *record(apr_hash_t *records,
- representation_t *rep,
- apr_pool_t *result_pool)
-{
- struct key_t *key;
- struct value_t *value;
-
- /* Skip if we ignore this particular kind of reps, or if the rep doesn't
- * exist or doesn't have the checksum we are after. (The latter case
- * often corresponds to node_rev->kind == svn_node_dir.)
- */
- if (records == NULL || rep == NULL || rep->sha1_checksum == NULL)
- return SVN_NO_ERROR;
-
- /* Construct the key.
- *
- * Must use calloc() because apr_hash_* pay attention to padding bytes too.
- */
- key = apr_pcalloc(result_pool, sizeof(*key));
- key->revision = rep->revision;
- key->offset = rep->offset;
-
- /* Update or create the value. */
- if ((value = apr_hash_get(records, key, sizeof(*key))))
- {
- /* Paranoia. */
- SVN_ERR_ASSERT(value->sha1_checksum != NULL);
- SVN_ERR_ASSERT(svn_checksum_match(value->sha1_checksum,
- rep->sha1_checksum));
- /* Real work. */
- value->refcount++;
- }
- else
- {
- value = apr_palloc(result_pool, sizeof(*value));
- value->sha1_checksum = svn_checksum_dup(rep->sha1_checksum, result_pool);
- value->refcount = 1;
- }
-
- /* Store them. */
- apr_hash_set(records, key, sizeof(*key), value);
-
- return SVN_NO_ERROR;
-}
-
-/* Inspect the data and/or prop reps of revision REVNUM in FS. Store
- * reference count tallies in passed hashes (allocated in RESULT_POOL).
- *
- * If PROP_REPS or DATA_REPS is NULL, the respective kind of reps are not
- * tallied.
- *
- * Print progress report to STDERR unless QUIET is true.
- *
- * Use SCRATCH_POOL for temporary allocations.
- */
-static svn_error_t *
-process_one_revision(svn_fs_t *fs,
- svn_revnum_t revnum,
- svn_boolean_t quiet,
- apr_hash_t *prop_reps,
- apr_hash_t *data_reps,
- apr_hash_t *both_reps,
- apr_pool_t *result_pool,
- apr_pool_t *scratch_pool)
-{
- svn_fs_root_t *rev_root;
- apr_hash_t *paths_changed;
- apr_hash_index_t *hi;
-
- if (! quiet)
- SVN_ERR(svn_cmdline_fprintf(stderr, scratch_pool,
- "processing r%ld\n", revnum));
-
- /* Get the changed paths. */
- SVN_ERR(svn_fs_revision_root(&rev_root, fs, revnum, scratch_pool));
- SVN_ERR(svn_fs_paths_changed2(&paths_changed, rev_root, scratch_pool));
-
- /* Iterate them. */
- /* ### use iterpool? */
- for (hi = apr_hash_first(scratch_pool, paths_changed);
- hi; hi = apr_hash_next(hi))
- {
- const char *path;
- const svn_fs_path_change2_t *change;
- const svn_fs_id_t *node_rev_id1, *node_rev_id2;
- const svn_fs_id_t *the_id;
-
- node_revision_t *node_rev;
-
- path = svn__apr_hash_index_key(hi);
- change = svn__apr_hash_index_val(hi);
- if (! quiet)
- SVN_ERR(svn_cmdline_fprintf(stderr, scratch_pool,
- "processing r%ld:%s\n", revnum, path));
-
- if (change->change_kind == svn_fs_path_change_delete)
- /* Can't ask for reps of PATH at REVNUM if the path no longer exists
- * at that revision! */
- continue;
-
- /* Okay, we have two node_rev id's for this change: the txn one and
- * the revision one. We'll use the latter. */
- node_rev_id1 = change->node_rev_id;
- SVN_ERR(svn_fs_node_id(&node_rev_id2, rev_root, path, scratch_pool));
-
- SVN_ERR_ASSERT(svn_fs_fs__id_txn_id(node_rev_id1) != NULL);
- SVN_ERR_ASSERT(svn_fs_fs__id_rev(node_rev_id2) != SVN_INVALID_REVNUM);
-
- the_id = node_rev_id2;
-
- /* Get the node_rev using the chosen node_rev_id. */
- SVN_ERR(svn_fs_fs__get_node_revision(&node_rev, fs, the_id, scratch_pool));
-
- /* Maybe record the sha1's. */
- SVN_ERR(record(prop_reps, node_rev->prop_rep, result_pool));
- SVN_ERR(record(data_reps, node_rev->data_rep, result_pool));
- SVN_ERR(record(both_reps, node_rev->prop_rep, result_pool));
- SVN_ERR(record(both_reps, node_rev->data_rep, result_pool));
- }
-
- return SVN_NO_ERROR;
-}
-
-/* Print REPS_REF_COUNT (a hash as for process_one_revision())
- * to stdout in "refcount => sha1" format. A sha1 may appear
- * more than once if not all its instances are shared. Prepend
- * each line by NAME.
- *
- * Use SCRATCH_POOL for temporary allocations.
- */
-static svn_error_t *
-pretty_print(const char *name,
- apr_hash_t *reps_ref_counts,
- apr_pool_t *scratch_pool)
-{
- apr_hash_index_t *hi;
-
- if (reps_ref_counts == NULL)
- return SVN_NO_ERROR;
-
- for (hi = apr_hash_first(scratch_pool, reps_ref_counts);
- hi; hi = apr_hash_next(hi))
- {
- struct value_t *value;
-
- SVN_ERR(cancel_func(NULL));
-
- value = svn__apr_hash_index_val(hi);
- SVN_ERR(svn_cmdline_printf(scratch_pool, "%s %" APR_UINT64_T_FMT " %s\n",
- name, value->refcount,
- svn_checksum_to_cstring_display(
- value->sha1_checksum,
- scratch_pool)));
- }
-
- return SVN_NO_ERROR;
-}
-
-/* Return an error unless FS is an fsfs fs. */
-static svn_error_t *is_fs_fsfs(svn_fs_t *fs, apr_pool_t *scratch_pool)
-{
- const char *actual, *expected, *path;
-
- path = svn_fs_path(fs, scratch_pool);
-
- expected = SVN_FS_TYPE_FSFS;
- SVN_ERR(svn_fs_type(&actual, path, scratch_pool));
-
- if (strcmp(actual, expected) != 0)
- return svn_error_createf(SVN_ERR_FS_UNKNOWN_FS_TYPE, NULL,
- "Filesystem '%s' is not of type '%s'",
- svn_dirent_local_style(path, scratch_pool),
- actual);
-
- return SVN_NO_ERROR;
-}
-
-/* The core logic. This function iterates the repository REPOS_PATH
- * and sends all the (DATA and/or PROP) reps in each revision for counting
- * by process_one_revision(). QUIET is passed to process_one_revision().
- */
-static svn_error_t *process(const char *repos_path,
- svn_boolean_t prop,
- svn_boolean_t data,
- svn_boolean_t quiet,
- apr_pool_t *scratch_pool)
-{
- apr_hash_t *prop_reps = NULL;
- apr_hash_t *data_reps = NULL;
- apr_hash_t *both_reps = NULL;
- svn_revnum_t rev, youngest;
- apr_pool_t *iterpool;
- svn_repos_t *repos;
- svn_fs_t *fs;
-
- if (prop)
- prop_reps = apr_hash_make(scratch_pool);
- if (data)
- data_reps = apr_hash_make(scratch_pool);
- if (prop && data)
- both_reps = apr_hash_make(scratch_pool);
-
- /* Open the FS. */
- SVN_ERR(svn_repos_open2(&repos, repos_path, NULL, scratch_pool));
- fs = svn_repos_fs(repos);
-
- SVN_ERR(is_fs_fsfs(fs, scratch_pool));
-
- SVN_ERR(svn_fs_youngest_rev(&youngest, fs, scratch_pool));
-
- /* Iterate the revisions. */
- iterpool = svn_pool_create(scratch_pool);
- for (rev = 0; rev <= youngest; rev++)
- {
- svn_pool_clear(iterpool);
- SVN_ERR(cancel_func(NULL));
- SVN_ERR(process_one_revision(fs, rev, quiet,
- prop_reps, data_reps, both_reps,
- scratch_pool, iterpool));
- }
- svn_pool_destroy(iterpool);
-
- /* Print stats. */
- SVN_ERR(pretty_print("prop", prop_reps, scratch_pool));
- SVN_ERR(pretty_print("data", data_reps, scratch_pool));
- SVN_ERR(pretty_print("both", both_reps, scratch_pool));
-
- return SVN_NO_ERROR;
-}
-
-int
-main(int argc, const char *argv[])
-{
- const char *repos_path;
- apr_pool_t *pool;
- svn_boolean_t prop = FALSE, data = FALSE;
- svn_boolean_t quiet = FALSE;
- svn_error_t *err;
- apr_getopt_t *os;
- const apr_getopt_option_t options[] =
- {
- {"data", OPT_DATA, 0, N_("display data reps stats")},
- {"prop", OPT_PROP, 0, N_("display prop reps stats")},
- {"both", OPT_BOTH, 0, N_("display combined (data+prop) reps stats")},
- {"quiet", 'q', 0, N_("no progress (only errors) to stderr")},
- {"help", 'h', 0, N_("display this help")},
- {"version", OPT_VERSION, 0,
- N_("show program version information")},
- {0, 0, 0, 0}
- };
-
- /* Initialize the app. */
- if (svn_cmdline_init("svn-rep-sharing-stats", stderr) != EXIT_SUCCESS)
- return EXIT_FAILURE;
-
- /* Create our top-level pool. Use a separate mutexless allocator,
- * given this application is single threaded.
- */
- pool = apr_allocator_owner_get(svn_pool_create_allocator(FALSE));
-
- /* Check library versions */
- err = check_lib_versions();
- if (err)
- return svn_cmdline_handle_exit_error(err, pool, "svn-rep-sharing-stats: ");
-
- err = svn_cmdline__getopt_init(&os, argc, argv, pool);
- if (err)
- return svn_cmdline_handle_exit_error(err, pool, "svn-rep-sharing-stats: ");
-
- SVN_INT_ERR(check_experimental());
-
- os->interleave = 1;
- while (1)
- {
- int opt;
- const char *arg;
- apr_status_t status = apr_getopt_long(os, options, &opt, &arg);
- if (APR_STATUS_IS_EOF(status))
- break;
- if (status != APR_SUCCESS)
- {
- usage(pool);
- return EXIT_FAILURE;
- }
- switch (opt)
- {
- case OPT_DATA:
- data = TRUE;
- break;
- /* It seems we don't actually rep-share props yet. */
- case OPT_PROP:
- prop = TRUE;
- break;
- case OPT_BOTH:
- data = TRUE;
- prop = TRUE;
- break;
- case 'q':
- quiet = TRUE;
- break;
- case 'h':
- help(options, pool);
- break;
- case OPT_VERSION:
- SVN_INT_ERR(version(pool));
- exit(0);
- break;
- default:
- usage(pool);
- return EXIT_FAILURE;
- }
- }
-
- /* Exactly 1 non-option argument,
- * and at least one of "--data"/"--prop"/"--both".
- */
- if (os->ind + 1 != argc || (!data && !prop))
- {
- usage(pool);
- return EXIT_FAILURE;
- }
-
- /* Grab REPOS_PATH from argv. */
- SVN_INT_ERR(svn_utf_cstring_to_utf8(&repos_path, os->argv[os->ind], pool));
- repos_path = svn_dirent_internal_style(repos_path, pool);
-
- set_up_cancellation();
-
- /* Do something. */
- SVN_INT_ERR(process(repos_path, prop, data, quiet, pool));
-
- /* We're done. */
-
- svn_pool_destroy(pool);
- /* Flush stdout to make sure that the user will see any printing errors. */
- SVN_INT_ERR(svn_cmdline_fflush(stdout));
-
- return EXIT_SUCCESS;
-}
diff --git a/tools/server-side/svnauthz.c b/tools/server-side/svnauthz.c
index ab8c62d..3fadd23 100644
--- a/tools/server-side/svnauthz.c
+++ b/tools/server-side/svnauthz.c
@@ -234,7 +234,7 @@ get_authz_from_txn(svn_authz_t **authz, const char *repos_path,
svn_error_t *err;
/* Open up the repository and find the transaction root */
- SVN_ERR(svn_repos_open2(&repos, repos_path, NULL, pool));
+ SVN_ERR(svn_repos_open3(&repos, repos_path, NULL, pool, pool));
fs = svn_repos_fs(repos);
SVN_ERR(svn_fs_open_txn(&txn, fs, txn_name, pool));
SVN_ERR(svn_fs_txn_root(&root, txn, pool));
@@ -382,42 +382,6 @@ subcommand_accessof(apr_getopt_t *os, void *baton, apr_pool_t *pool)
#undef EXIT_FAILURE
#define EXIT_FAILURE 2
-/* Similar to svn_cmdline_handle_exit_error but with an exit_code argument
- so we can comply with our contract and exit with 2 for internal failures.
- Also is missing the pool argument since we don't need it given
- main/sub_main. */
-static int
-handle_exit_error(svn_error_t *err, const char *prefix, int exit_code)
-{
- /* Issue #3014:
- * Don't print anything on broken pipes. The pipe was likely
- * closed by the process at the other end. We expect that
- * process to perform error reporting as necessary.
- *
- * ### This assumes that there is only one error in a chain for
- * ### SVN_ERR_IO_PIPE_WRITE_ERROR. See svn_cmdline_fputs(). */
- if (err->apr_err != SVN_ERR_IO_PIPE_WRITE_ERROR)
- svn_handle_error2(err, stderr, FALSE, prefix);
- svn_error_clear(err);
- return exit_code;
-}
-
-/* Report and clear the error ERR, and return EXIT_FAILURE. */
-#define EXIT_ERROR(err, exit_code) \
- handle_exit_error(err, "svnauthz: ", exit_code)
-
-/* A redefinition of the public SVN_INT_ERR macro, that suppresses the
- * error message if it is SVN_ERR_IO_PIPE_WRITE_ERROR, amd with the
- * program name 'svnauthz' instead of 'svn'. */
-#undef SVN_INT_ERR
-#define SVN_INT_ERR(expr) \
- do { \
- svn_error_t *svn_err__temp = (expr); \
- if (svn_err__temp) \
- return EXIT_ERROR(svn_err__temp, EXIT_FAILURE); \
- } while (0)
-
-
/* Return TRUE if the UI of 'svnauthz-validate' (svn 1.7 and earlier)
should be emulated, given argv[0]. */
static svn_boolean_t
@@ -485,8 +449,13 @@ canonicalize_access_file(const char **canonicalized_access_file,
return SVN_NO_ERROR;
}
-static int
-sub_main(int argc, const char *argv[], apr_pool_t *pool)
+/*
+ * On success, leave *EXIT_CODE untouched and return SVN_NO_ERROR. On error,
+ * either return an error to be displayed, or set *EXIT_CODE to non-zero and
+ * return SVN_NO_ERROR.
+ */
+static svn_error_t *
+sub_main(int *exit_code, int argc, const char *argv[], apr_pool_t *pool)
{
svn_error_t *err;
@@ -497,7 +466,7 @@ sub_main(int argc, const char *argv[], apr_pool_t *pool)
int i;
/* Initialize the FS library. */
- SVN_INT_ERR(svn_fs_initialize(pool));
+ SVN_ERR(svn_fs_initialize(pool));
received_opts = apr_array_make(pool, SVN_OPT_MAX_OPTIONS, sizeof(int));
@@ -506,7 +475,7 @@ sub_main(int argc, const char *argv[], apr_pool_t *pool)
opt_state.txn = opt_state.repos_path = opt_state.groups_file = NULL;
/* Parse options. */
- SVN_INT_ERR(svn_cmdline__getopt_init(&os, argc, argv, pool));
+ SVN_ERR(svn_cmdline__getopt_init(&os, argc, argv, pool));
os->interleave = 1;
if (!use_compat_mode(argv[0], pool))
@@ -521,8 +490,9 @@ sub_main(int argc, const char *argv[], apr_pool_t *pool)
break;
if (status != APR_SUCCESS)
{
- SVN_INT_ERR(subcommand_help(NULL, NULL, pool));
- return EXIT_FAILURE;
+ SVN_ERR(subcommand_help(NULL, NULL, pool));
+ *exit_code = EXIT_FAILURE;
+ return SVN_NO_ERROR;
}
/* Stash the option code in an array before parsing it. */
@@ -535,7 +505,7 @@ sub_main(int argc, const char *argv[], apr_pool_t *pool)
opt_state.help = TRUE;
break;
case 't':
- SVN_INT_ERR(svn_utf_cstring_to_utf8(&opt_state.txn, arg, pool));
+ SVN_ERR(svn_utf_cstring_to_utf8(&opt_state.txn, arg, pool));
break;
case 'R':
opt_state.recursive = TRUE;
@@ -544,28 +514,29 @@ sub_main(int argc, const char *argv[], apr_pool_t *pool)
opt_state.version = TRUE;
break;
case svnauthz__username:
- SVN_INT_ERR(svn_utf_cstring_to_utf8(&opt_state.username, arg, pool));
+ SVN_ERR(svn_utf_cstring_to_utf8(&opt_state.username, arg, pool));
break;
case svnauthz__path:
- SVN_INT_ERR(svn_utf_cstring_to_utf8(&opt_state.fspath, arg, pool));
+ SVN_ERR(svn_utf_cstring_to_utf8(&opt_state.fspath, arg, pool));
opt_state.fspath = svn_fspath__canonicalize(opt_state.fspath,
pool);
break;
case svnauthz__repos:
- SVN_INT_ERR(svn_utf_cstring_to_utf8(&opt_state.repos_name, arg, pool));
+ SVN_ERR(svn_utf_cstring_to_utf8(&opt_state.repos_name, arg, pool));
break;
case svnauthz__is:
- SVN_INT_ERR(svn_utf_cstring_to_utf8(&opt_state.is, arg, pool));
+ SVN_ERR(svn_utf_cstring_to_utf8(&opt_state.is, arg, pool));
break;
case svnauthz__groups_file:
- SVN_INT_ERR(
+ SVN_ERR(
svn_utf_cstring_to_utf8(&opt_state.groups_file,
arg, pool));
break;
default:
{
- SVN_INT_ERR(subcommand_help(NULL, NULL, pool));
- return EXIT_FAILURE;
+ SVN_ERR(subcommand_help(NULL, NULL, pool));
+ *exit_code = EXIT_FAILURE;
+ return SVN_NO_ERROR;
}
}
}
@@ -603,8 +574,9 @@ sub_main(int argc, const char *argv[], apr_pool_t *pool)
{
svn_error_clear(svn_cmdline_fprintf(stderr, pool,
("subcommand argument required\n")));
- SVN_INT_ERR(subcommand_help(NULL, NULL, pool));
- return EXIT_FAILURE;
+ SVN_ERR(subcommand_help(NULL, NULL, pool));
+ *exit_code = EXIT_FAILURE;
+ return SVN_NO_ERROR;
}
}
else
@@ -616,14 +588,15 @@ sub_main(int argc, const char *argv[], apr_pool_t *pool)
const char *first_arg_utf8;
os->ind++;
- SVN_INT_ERR(svn_utf_cstring_to_utf8(&first_arg_utf8,
+ SVN_ERR(svn_utf_cstring_to_utf8(&first_arg_utf8,
first_arg, pool));
svn_error_clear(
svn_cmdline_fprintf(stderr, pool,
("Unknown subcommand: '%s'\n"),
first_arg_utf8));
- SVN_INT_ERR(subcommand_help(NULL, NULL, pool));
- return EXIT_FAILURE;
+ SVN_ERR(subcommand_help(NULL, NULL, pool));
+ *exit_code = EXIT_FAILURE;
+ return SVN_NO_ERROR;
}
}
}
@@ -637,13 +610,12 @@ sub_main(int argc, const char *argv[], apr_pool_t *pool)
{
if (os->ind +2 != argc)
{
- err = svn_error_create(SVN_ERR_CL_ARG_PARSING_ERROR, NULL,
- ("Repository and authz file arguments "
- "required"));
- return EXIT_ERROR(err, EXIT_FAILURE);
+ return svn_error_create(SVN_ERR_CL_ARG_PARSING_ERROR, NULL,
+ ("Repository and authz file arguments "
+ "required"));
}
- SVN_INT_ERR(svn_utf_cstring_to_utf8(&opt_state.repos_path, os->argv[os->ind],
+ SVN_ERR(svn_utf_cstring_to_utf8(&opt_state.repos_path, os->argv[os->ind],
pool));
os->ind++;
@@ -653,24 +625,23 @@ sub_main(int argc, const char *argv[], apr_pool_t *pool)
/* Exactly 1 non-option argument */
if (os->ind + 1 != argc)
{
- err = svn_error_create(SVN_ERR_CL_ARG_PARSING_ERROR, NULL,
- ("Authz file argument required"));
- return EXIT_ERROR(err, EXIT_FAILURE);
+ return svn_error_create(SVN_ERR_CL_ARG_PARSING_ERROR, NULL,
+ ("Authz file argument required"));
}
/* Grab AUTHZ_FILE from argv. */
- SVN_INT_ERR(svn_utf_cstring_to_utf8(&opt_state.authz_file, os->argv[os->ind],
+ SVN_ERR(svn_utf_cstring_to_utf8(&opt_state.authz_file, os->argv[os->ind],
pool));
/* Canonicalize opt_state.authz_file appropriately. */
- SVN_INT_ERR(canonicalize_access_file(&opt_state.authz_file,
+ SVN_ERR(canonicalize_access_file(&opt_state.authz_file,
opt_state.authz_file,
opt_state.txn != NULL, pool));
/* Same for opt_state.groups_file if it is present. */
if (opt_state.groups_file)
{
- SVN_INT_ERR(canonicalize_access_file(&opt_state.groups_file,
+ SVN_ERR(canonicalize_access_file(&opt_state.groups_file,
opt_state.groups_file,
opt_state.txn != NULL, pool));
}
@@ -696,13 +667,14 @@ sub_main(int argc, const char *argv[], apr_pool_t *pool)
pool);
svn_opt_format_option(&optstr, badopt, FALSE, pool);
if (subcommand->name[0] == '-')
- SVN_INT_ERR(subcommand_help(NULL, NULL, pool));
+ SVN_ERR(subcommand_help(NULL, NULL, pool));
else
svn_error_clear(svn_cmdline_fprintf(stderr, pool,
("Subcommand '%s' doesn't accept option '%s'\n"
"Type 'svnauthz help %s' for usage.\n"),
subcommand->name, optstr, subcommand->name));
- return EXIT_FAILURE;
+ *exit_code = EXIT_FAILURE;
+ return SVN_NO_ERROR;
}
}
@@ -724,7 +696,8 @@ sub_main(int argc, const char *argv[], apr_pool_t *pool)
{
/* Follow our contract that says we exit with 1 if the file does not
validate. */
- return EXIT_ERROR(err, 1);
+ *exit_code = 1;
+ return err;
}
else if (err->apr_err == SVN_ERR_AUTHZ_UNREADABLE
|| err->apr_err == SVN_ERR_AUTHZ_UNWRITABLE
@@ -732,31 +705,22 @@ sub_main(int argc, const char *argv[], apr_pool_t *pool)
{
/* Follow our contract that says we exit with 3 if --is does not
* match. */
- return EXIT_ERROR(err, 3);
+ *exit_code = 3;
+ return err;
}
-
- return EXIT_ERROR(err, EXIT_FAILURE);
- }
- else
- {
- /* Ensure that everything is written to stdout, so the user will
- see any print errors. */
- err = svn_cmdline_fflush(stdout);
- if (err)
- {
- return EXIT_ERROR(err, EXIT_FAILURE);
- }
- return EXIT_SUCCESS;
+ return err;
}
+ return SVN_NO_ERROR;
}
int
main(int argc, const char *argv[])
{
apr_pool_t *pool;
- int exit_code;
+ int exit_code = EXIT_SUCCESS;
+ svn_error_t *err;
/* Initialize the app. Send all error messages to 'stderr'. */
if (svn_cmdline_init(argv[0], stderr) != EXIT_SUCCESS)
@@ -764,7 +728,18 @@ main(int argc, const char *argv[])
pool = svn_pool_create(NULL);
- exit_code = sub_main(argc, argv, pool);
+ err = sub_main(&exit_code, argc, argv, pool);
+
+ /* Flush stdout and report if it fails. It would be flushed on exit anyway
+ but this makes sure that output is not silently lost if it fails. */
+ err = svn_error_compose_create(err, svn_cmdline_fflush(stdout));
+
+ if (err)
+ {
+ if (exit_code == 0)
+ exit_code = EXIT_FAILURE;
+ svn_cmdline_handle_exit_error(err, NULL, "svnauthz: ");
+ }
svn_pool_destroy(pool);
return exit_code;
diff --git a/tools/server-side/svnpredumpfilter.py b/tools/server-side/svnpredumpfilter.py
index 5a74755..f6a97c2 100755
--- a/tools/server-side/svnpredumpfilter.py
+++ b/tools/server-side/svnpredumpfilter.py
@@ -38,6 +38,10 @@ Use the default ordering of revisions (that is, '-r HEAD:0').
Return errorcode 0 if there are no additional dependencies found, 1 if
there were; any other errorcode indicates a fatal error.
+Paths in mergeinfo are not considered as additional dependencies so the
+--skip-missing-merge-sources option of 'svndumpfilter' may be required
+for successful filtering with the resulting path list.
+
Options:
--help (-h) Show this usage message and exit.
@@ -68,7 +72,7 @@ def sanitize_path(path):
def subsumes(path, maybe_child):
if path == maybe_child:
return True
- if maybe_child.find(path + '/') == 0:
+ if maybe_child.startswith(path + '/'):
return True
return False
@@ -117,20 +121,35 @@ def log(msg, min_verbosity):
class DependencyTracker:
def __init__(self, include_paths):
- self.include_paths = include_paths[:]
- self.dependent_paths = []
+ self.include_paths = set(include_paths)
+ self.dependent_paths = set()
def path_included(self, path):
- for include_path in self.include_paths + self.dependent_paths:
+ for include_path in self.include_paths | self.dependent_paths:
if subsumes(include_path, path):
return True
return False
- def handle_changes(self, path_copies):
- for path, copyfrom_path in path_copies.items():
- if self.path_included(path) and copyfrom_path:
- if not self.path_included(copyfrom_path):
- self.dependent_paths.append(copyfrom_path)
+ def include_missing_copies(self, path_copies):
+ while True:
+ log("Cross-checking %d included paths with %d copies "
+ "for missing path dependencies..." % (
+ len(self.include_paths) + len(self.dependent_paths),
+ len(path_copies)),
+ 1)
+ included_copies = []
+ for path, copyfrom_path in path_copies:
+ if self.path_included(path):
+ log("Adding copy '%s' -> '%s'" % (copyfrom_path, path), 1)
+ self.dependent_paths.add(copyfrom_path)
+ included_copies.append((path, copyfrom_path))
+ if not included_copies:
+ log("Found all missing path dependencies", 1)
+ break
+ for path, copyfrom_path in included_copies:
+ path_copies.remove((path, copyfrom_path))
+ log("Found %d new copy dependencies, need to re-check for more"
+ % len(included_copies), 1)
def readline(stream):
line = stream.readline()
@@ -151,7 +170,7 @@ def svn_log_stream_get_dependencies(stream, included_paths):
line_buf = None
last_revision = 0
eof = False
- path_copies = {}
+ path_copies = set()
found_changed_path = False
while not eof:
@@ -195,16 +214,15 @@ def svn_log_stream_get_dependencies(stream, included_paths):
except EOFError:
eof = True
break
- match = action_re.search(line)
+ match = copy_action_re.search(line)
if match:
found_changed_path = True
- match = copy_action_re.search(line)
- if match:
- path_copies[sanitize_path(match.group(1))] = \
- sanitize_path(match.group(2))
+ path_copies.add((sanitize_path(match.group(1)),
+ sanitize_path(match.group(2))))
+ elif action_re.search(line):
+ found_changed_path = True
else:
break
- dt.handle_changes(path_copies)
# Finally, skip any log message lines. (If there are none,
# remember the last line we read, because it probably has
@@ -221,6 +239,7 @@ def svn_log_stream_get_dependencies(stream, included_paths):
"'svn log' with the --verbose (-v) option when "
"generating the input to this script?")
+ dt.include_missing_copies(path_copies)
return dt
def analyze_logs(included_paths):
diff --git a/tools/server-side/svnpubsub/commit-hook.py b/tools/server-side/svnpubsub/commit-hook.py
index 4a1a3f3..4e6a1cc 100755
--- a/tools/server-side/svnpubsub/commit-hook.py
+++ b/tools/server-side/svnpubsub/commit-hook.py
@@ -23,7 +23,6 @@ HOST="127.0.0.1"
PORT=2069
import sys
-import subprocess
try:
import simplejson as json
except ImportError:
@@ -31,32 +30,32 @@ except ImportError:
import urllib2
-def svncmd(cmd):
- return subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
+import svnpubsub.util
-def svncmd_uuid(repo):
- cmd = "%s uuid %s" % (SVNLOOK, repo)
- p = svncmd(cmd)
- return p.stdout.read().strip()
+def svnlook(cmd, **kwargs):
+ args = [SVNLOOK] + cmd
+ return svnpubsub.util.check_output(args, **kwargs)
-def svncmd_info(repo, revision):
- cmd = "%s info -r %s %s" % (SVNLOOK, revision, repo)
- p = svncmd(cmd)
- data = p.stdout.read().split("\n")
+def svnlook_uuid(repo):
+ cmd = ["uuid", "--", repo]
+ return svnlook(cmd).strip()
+
+def svnlook_info(repo, revision):
+ cmd = ["info", "-r", revision, "--", repo]
+ data = svnlook(cmd, universal_newlines=True).split("\n")
#print data
return {'author': data[0].strip(),
'date': data[1].strip(),
'log': "\n".join(data[3:]).strip()}
-def svncmd_changed(repo, revision):
- cmd = "%s changed -r %s %s" % (SVNLOOK, revision, repo)
- p = svncmd(cmd)
+def svnlook_changed(repo, revision):
+ cmd = ["changed", "-r", revision, "--", repo]
+ lines = svnlook(cmd, universal_newlines=True).split("\n")
changed = {}
- while True:
- line = p.stdout.readline()
- if not line:
- break
+ for line in lines:
line = line.strip()
+ if not line:
+ continue
(flags, filename) = (line[0:3], line[4:])
changed[filename] = {'flags': flags}
return changed
@@ -71,23 +70,23 @@ def do_put(body):
def main(repo, revision):
revision = revision.lstrip('r')
- i = svncmd_info(repo, revision)
+ i = svnlook_info(repo, revision)
data = {'type': 'svn',
'format': 1,
'id': int(revision),
'changed': {},
- 'repository': svncmd_uuid(repo),
+ 'repository': svnlook_uuid(repo),
'committer': i['author'],
'log': i['log'],
'date': i['date'],
}
- data['changed'].update(svncmd_changed(repo, revision))
+ data['changed'].update(svnlook_changed(repo, revision))
body = json.dumps(data)
do_put(body)
if __name__ == "__main__":
if len(sys.argv) not in (3, 4):
sys.stderr.write("invalid args\n")
- sys.exit(0)
+ sys.exit(1)
main(*sys.argv[1:3])
diff --git a/tools/server-side/svnpubsub/daemonize.py b/tools/server-side/svnpubsub/daemonize.py
index 8b85258..41b1bec 100644
--- a/tools/server-side/svnpubsub/daemonize.py
+++ b/tools/server-side/svnpubsub/daemonize.py
@@ -24,6 +24,7 @@ import os
import signal
import sys
import time
+import multiprocessing # requires Python 2.6
# possible return values from Daemon.daemonize()
@@ -50,11 +51,11 @@ class Daemon(object):
def daemonize_exit(self):
try:
result = self.daemonize()
- except (ChildFailed, DaemonFailed) as e:
+ except (ChildFailed, DaemonFailed), e:
# duplicate the exit code
sys.exit(e.code)
except (ChildTerminatedAbnormally, ChildForkFailed,
- DaemonTerminatedAbnormally, DaemonForkFailed) as e:
+ DaemonTerminatedAbnormally, DaemonForkFailed), e:
sys.stderr.write('ERROR: %s\n' % e)
sys.exit(1)
except ChildResumedIncorrectly:
@@ -71,29 +72,41 @@ class Daemon(object):
# in original process. daemon is up and running. we're done.
def daemonize(self):
- # fork off a child that can detach itself from this process.
- try:
- pid = os.fork()
- except OSError as e:
- raise ChildForkFailed(e.errno, e.strerror)
-
- if pid > 0:
- # we're in the parent. let's wait for the child to finish setting
- # things up -- on our exit, we want to ensure the child is accepting
- # connections.
- cpid, status = os.waitpid(pid, 0)
- assert pid == cpid
- if os.WIFEXITED(status):
- code = os.WEXITSTATUS(status)
- if code:
- raise ChildFailed(code)
- return DAEMON_RUNNING
-
- # the child did not exit cleanly.
- raise ChildTerminatedAbnormally(status)
-
+ ### review error situations. map to backwards compat. ??
+ ### be mindful of daemonize_exit().
+ ### we should try and raise ChildFailed / ChildTerminatedAbnormally.
+ ### ref: older revisions. OR: remove exceptions.
+
+ child_is_ready = multiprocessing.Event()
+ child_completed = multiprocessing.Event()
+
+ p = multiprocessing.Process(target=self._first_child,
+ args=(child_is_ready, child_completed))
+ p.start()
+
+ # Wait for the child to finish setting things up (in case we need
+ # to communicate with it). It will only exit when ready.
+ ### use a timeout here! (parameterized, of course)
+ p.join()
+
+ ### need to propagate errors, to adjust the return codes
+ if child_completed.is_set():
+ ### what was the exit status?
+ return DAEMON_COMPLETE
+ if child_is_ready.is_set():
+ return DAEMON_RUNNING
+
+ ### how did we get here?! the immediate child should not exit without
+ ### signalling ready/complete. some kind of error.
+ return DAEMON_STARTED
+
+ def _first_child(self, child_is_ready, child_completed):
# we're in the child.
+ ### NOTE: the original design was a bit bunk. Exceptions raised from
+ ### this point are within the child processes. We need to signal the
+ ### errors to the parent in other ways.
+
# decouple from the parent process
os.chdir('/')
os.umask(0)
@@ -102,63 +115,86 @@ class Daemon(object):
# remember this pid so the second child can signal it.
thispid = os.getpid()
- # register a signal handler so the SIGUSR1 doesn't stop the process.
- # this object will also record whether if got signalled.
- daemon_accepting = SignalCatcher(signal.SIGUSR1)
-
- # if the daemon process exits before sending SIGUSR1, then we need to see
- # the problem. trap SIGCHLD with a SignalCatcher.
+ # if the daemon process exits before signalling readiness, then we
+ # need to see the problem. trap SIGCHLD with a SignalCatcher.
daemon_exit = SignalCatcher(signal.SIGCHLD)
# perform the second fork
try:
pid = os.fork()
- except OSError as e:
+ except OSError, e:
+ ### this won't make it to the parent process
raise DaemonForkFailed(e.errno, e.strerror)
if pid > 0:
# in the parent.
- # we want to wait for the daemon to signal that it has created and
- # bound the socket, and is (thus) ready for connections. if the
- # daemon improperly exits before serving, we'll see SIGCHLD and the
- # .pause will return.
- ### we should add a timeout to this. allow an optional parameter to
- ### specify the timeout, in case it takes a long time to start up.
- signal.pause()
+
+ # Wait for the child to be ready for operation.
+ while True:
+ # The readiness event will invariably be signalled early/first.
+ # If it *doesn't* get signalled because the child has prematurely
+ # exited, then we will pause 10ms before noticing the exit. The
+ # pause is acceptable since that is aberrant/unexpected behavior.
+ ### is there a way to break this wait() on a signal such as SIGCHLD?
+ ### parameterize this wait, in case the app knows children may
+ ### fail quickly?
+ if child_is_ready.wait(timeout=0.010):
+ # The child signalled readiness. Yay!
+ break
+ if daemon_exit.signalled:
+ # Whoops. The child exited without signalling :-(
+ break
+ # Python 2.6 compat: .wait() may exit when set, but return None
+ if child_is_ready.is_set():
+ break
+ # A simple timeout. The child is taking a while to prepare. Go
+ # back and wait for readiness.
if daemon_exit.signalled:
+ # Tell the parent that the child has exited.
+ ### we need to communicate the exit status, if possible.
+ child_completed.set()
+
# reap the daemon process, getting its exit code. bubble it up.
cpid, status = os.waitpid(pid, 0)
assert pid == cpid
if os.WIFEXITED(status):
code = os.WEXITSTATUS(status)
if code:
+ ### this won't make it to the parent process
raise DaemonFailed(code)
+ ### this return value is ignored
return DAEMON_NOT_RUNNING
# the daemon did not exit cleanly.
+ ### this won't make it to the parent process
raise DaemonTerminatedAbnormally(status)
- if daemon_accepting.signalled:
- # the daemon is up and running, so save the pid and return success.
- if self.pidfile:
- # Be wary of symlink attacks
- try:
- os.remove(self.pidfile)
- except OSError:
- pass
- fd = os.open(self.pidfile, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0444)
- os.write(fd, '%d\n' % pid)
- os.close(fd)
- return DAEMON_STARTED
-
+ # child_is_ready got asserted. the daemon is up and running, so
+ # save the pid and return success.
+ if self.pidfile:
+ # Be wary of symlink attacks
+ try:
+ os.remove(self.pidfile)
+ except OSError:
+ pass
+ fd = os.open(self.pidfile, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0444)
+ os.write(fd, '%d\n' % pid)
+ os.close(fd)
+
+ ### this return value is ignored
+ return DAEMON_STARTED
+
+ ### old code. what to do with this? throw ChildResumedIncorrectly
+ ### or just toss this and the exception.
# some other signal popped us out of the pause. the daemon might not
# be running.
+ ### this won't make it to the parent process
raise ChildResumedIncorrectly()
- # we're a deamon now. get rid of the final remnants of the parent.
- # start by restoring default signal handlers
+ # we're a daemon now. get rid of the final remnants of the parent:
+ # restore the signal handlers and switch std* to the proper files.
signal.signal(signal.SIGUSR1, signal.SIG_DFL)
signal.signal(signal.SIGCHLD, signal.SIG_DFL)
sys.stdout.flush()
@@ -176,30 +212,31 @@ class Daemon(object):
so.close()
se.close()
- # TEST: don't release the parent immediately. the whole parent stack
- # should pause along with this sleep.
+ ### TEST: don't release the parent immediately. the whole parent stack
+ ### should pause along with this sleep.
#time.sleep(10)
# everything is set up. call the initialization function.
self.setup()
- # sleep for one second before signalling. we want to make sure the
- # parent has called signal.pause()
- ### we should think of a better wait around the race condition.
- time.sleep(1)
+ ### TEST: exit before signalling.
+ #sys.exit(0)
+ #sys.exit(1)
- # okay. the daemon is ready. signal the parent to tell it we're set.
- os.kill(thispid, signal.SIGUSR1)
+ # the child is now ready for parent/anyone to communicate with it.
+ child_is_ready.set()
# start the daemon now.
self.run()
# The daemon is shutting down, so toss the pidfile.
- try:
- os.remove(self.pidfile)
- except OSError:
- pass
+ if self.pidfile:
+ try:
+ os.remove(self.pidfile)
+ except OSError:
+ pass
+ ### this return value is ignored
return DAEMON_COMPLETE
def setup(self):
@@ -209,6 +246,34 @@ class Daemon(object):
raise NotImplementedError
+class _Detacher(Daemon):
+ def __init__(self, target, logfile='/dev/null', pidfile=None,
+ args=(), kwargs={}):
+ Daemon.__init__(self, logfile, pidfile)
+ self.target = target
+ self.args = args
+ self.kwargs = kwargs
+
+ def setup(self):
+ pass
+
+ def run(self):
+ self.target(*self.args, **self.kwargs)
+
+
+def run_detached(target, *args, **kwargs):
+ """Simple function to run TARGET as a detached daemon.
+
+ The additional arguments/keywords will be passed along. This function
+ does not return -- sys.exit() will be called as appropriate.
+
+ (capture SystemExit if logging/reporting is necessary)
+ ### if needed, a variant of this func could be written to not exit
+ """
+ d = _Detacher(target, args=args, kwargs=kwargs)
+ d.daemonize_exit()
+
+
class SignalCatcher(object):
def __init__(self, signum):
self.signalled = False
diff --git a/tools/server-side/svnpubsub/rc.d/svnpubsub.freebsd b/tools/server-side/svnpubsub/rc.d/svnpubsub.freebsd
index 71fc8c8..79b5901 100755
--- a/tools/server-side/svnpubsub/rc.d/svnpubsub.freebsd
+++ b/tools/server-side/svnpubsub/rc.d/svnpubsub.freebsd
@@ -26,7 +26,7 @@ pidfile="${svnpubsub_pidfile}"
export PYTHON_EGG_CACHE="/home/svn/.python-eggs"
command="/usr/local/bin/twistd"
-command_interpreter="/usr/local/bin/${svnwcsub_cmd_int}"
+command_interpreter="/usr/local/bin/${svnpubsub_cmd_int}"
command_args="-y /usr/local/svnpubsub/svnpubsub.tac \
--logfile=/var/log/vc/svnpubsub.log \
--pidfile=${pidfile} \
diff --git a/tools/server-side/svnpubsub/revprop-change-hook.py b/tools/server-side/svnpubsub/revprop-change-hook.py
new file mode 100755
index 0000000..3aa857b
--- /dev/null
+++ b/tools/server-side/svnpubsub/revprop-change-hook.py
@@ -0,0 +1,90 @@
+#!/usr/local/bin/python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+SVNLOOK="/usr/local/svn-install/current/bin/svnlook"
+#SVNLOOK="/usr/local/bin/svnlook"
+
+HOST="127.0.0.1"
+PORT=2069
+
+import sys
+try:
+ import simplejson as json
+except ImportError:
+ import json
+
+import urllib2
+
+
+import svnpubsub.util
+
+def svnlook(cmd, **kwargs):
+ args = [SVNLOOK] + cmd
+ return svnpubsub.util.check_output(args, **kwargs)
+
+def svnlook_uuid(repo):
+ cmd = ["uuid", "--", repo]
+ return svnlook(cmd).strip()
+
+def svnlook_revprop(repo, revision, propname):
+ cmd = ["propget", "-r", revision, "--revprop", "--", repo, propname]
+ data = svnlook(cmd)
+ #print data
+ return data
+
+def do_put(body):
+ opener = urllib2.build_opener(urllib2.HTTPHandler)
+ request = urllib2.Request("http://%s:%d/metadata" %(HOST, PORT), data=body)
+ request.add_header('Content-Type', 'application/json')
+ request.get_method = lambda: 'PUT'
+ url = opener.open(request)
+
+
+def main(repo, revision, author, propname, action):
+ revision = revision.lstrip('r')
+ if action in ('A', 'M'):
+ new_value = svnlook_revprop(repo, revision, propname)
+ elif action == 'D':
+ new_value = None
+ else:
+ sys.stderr.write('Unknown revprop change action "%s"\n' % action)
+ sys.exit(1)
+ if action in ('D', 'M'):
+ old_value = sys.stdin.read()
+ else:
+ old_value = None
+ data = {'type': 'svn',
+ 'format': 1,
+ 'id': int(revision),
+ 'repository': svnlook_uuid(repo),
+ 'revprop': {
+ 'name': propname,
+ 'committer': author,
+ 'value': new_value,
+ 'old_value': old_value,
+ }
+ }
+ body = json.dumps(data)
+ do_put(body)
+
+if __name__ == "__main__":
+ if len(sys.argv) != 6:
+ sys.stderr.write("invalid args\n")
+ sys.exit(1)
+
+ main(*sys.argv[1:6])
diff --git a/tools/server-side/svnpubsub/svnpubsub/client.py b/tools/server-side/svnpubsub/svnpubsub/client.py
index c1631d6..871a5e9 100644
--- a/tools/server-side/svnpubsub/svnpubsub/client.py
+++ b/tools/server-side/svnpubsub/svnpubsub/client.py
@@ -62,7 +62,8 @@ class SvnpubsubClientException(Exception):
class Client(asynchat.async_chat):
- def __init__(self, url, commit_callback, event_callback):
+ def __init__(self, url, commit_callback, event_callback,
+ metadata_callback = None):
asynchat.async_chat.__init__(self)
self.last_activity = time.time()
@@ -82,7 +83,8 @@ class Client(asynchat.async_chat):
self.event_callback = event_callback
- self.parser = JSONRecordHandler(commit_callback, event_callback)
+ self.parser = JSONRecordHandler(commit_callback, event_callback,
+ metadata_callback)
# Wait for the end of headers. Then we start parsing JSON.
self.set_terminator(b'\r\n\r\n')
@@ -126,36 +128,50 @@ class Client(asynchat.async_chat):
self.ibuffer.append(data)
+class Notification(object):
+ def __init__(self, data):
+ self.__dict__.update(data)
+
+class Commit(Notification):
+ KIND = 'COMMIT'
+
+class Metadata(Notification):
+ KIND = 'METADATA'
+
+
class JSONRecordHandler:
- def __init__(self, commit_callback, event_callback):
+ def __init__(self, commit_callback, event_callback, metadata_callback):
self.commit_callback = commit_callback
self.event_callback = event_callback
+ self.metadata_callback = metadata_callback
+
+ EXPECTED_VERSION = 1
def feed(self, record):
obj = json.loads(record)
if 'svnpubsub' in obj:
actual_version = obj['svnpubsub'].get('version')
- EXPECTED_VERSION = 1
- if actual_version != EXPECTED_VERSION:
- raise SvnpubsubClientException("Unknown svnpubsub format: %r != %d"
- % (actual_format, expected_format))
+ if actual_version != self.EXPECTED_VERSION:
+ raise SvnpubsubClientException(
+ "Unknown svnpubsub format: %r != %d"
+ % (actual_version, self.EXPECTED_VERSION))
self.event_callback('version', obj['svnpubsub']['version'])
elif 'commit' in obj:
commit = Commit(obj['commit'])
self.commit_callback(commit)
elif 'stillalive' in obj:
self.event_callback('ping', obj['stillalive'])
-
-
-class Commit(object):
- def __init__(self, commit):
- self.__dict__.update(commit)
+ elif 'metadata' in obj and self.metadata_callback:
+ metadata = Metadata(obj['metadata'])
+ self.metadata_callback(metadata)
class MultiClient(object):
- def __init__(self, urls, commit_callback, event_callback):
+ def __init__(self, urls, commit_callback, event_callback,
+ metadata_callback = None):
self.commit_callback = commit_callback
self.event_callback = event_callback
+ self.metadata_callback = metadata_callback
# No target time, as no work to do
self.target_time = 0
@@ -185,9 +201,15 @@ class MultiClient(object):
def _add_channel(self, url):
# Simply instantiating the client will install it into the global map
# for processing in the main event loop.
- Client(url,
- functools.partial(self.commit_callback, url),
- functools.partial(self._reconnect, url))
+ if self.metadata_callback:
+ Client(url,
+ functools.partial(self.commit_callback, url),
+ functools.partial(self._reconnect, url),
+ functools.partial(self.metadata_callback, url))
+ else:
+ Client(url,
+ functools.partial(self.commit_callback, url),
+ functools.partial(self._reconnect, url))
def _check_stale(self):
now = time.time()
diff --git a/tools/server-side/svnpubsub/svnpubsub/server.py b/tools/server-side/svnpubsub/svnpubsub/server.py
index faee423..d0cdff9 100644
--- a/tools/server-side/svnpubsub/svnpubsub/server.py
+++ b/tools/server-side/svnpubsub/svnpubsub/server.py
@@ -25,20 +25,27 @@
# Instead of using a complicated XMPP/AMPQ/JMS/super messaging service,
# we have simple HTTP GETs and PUTs to get data in and out.
#
-# Currently supports both XML and JSON serialization.
+# Currently supports JSON serialization.
#
# Example Sub clients:
-# curl -sN http://127.0.0.1:2069/commits
-# curl -sN http://127.0.0.1:2069/commits/svn/*
-# curl -sN http://127.0.0.1:2069/commits/svn
-# curl -sN http://127.0.0.1:2069/commits/*/13f79535-47bb-0310-9956-ffa450edef68
-# curl -sN http://127.0.0.1:2069/commits/svn/13f79535-47bb-0310-9956-ffa450edef68
+# curl -sN http://127.0.0.1:2069/commits
+# curl -sN 'http://127.0.0.1:2069/commits/svn/*'
+# curl -sN http://127.0.0.1:2069/commits/svn
+# curl -sN 'http://127.0.0.1:2069/commits/*/13f79535-47bb-0310-9956-ffa450edef68'
+# curl -sN http://127.0.0.1:2069/commits/svn/13f79535-47bb-0310-9956-ffa450edef68
#
-# URL is built into 2 parts:
-# /commits/${optional_type}/${optional_repository}
+# curl -sN http://127.0.0.1:2069/metadata
+# curl -sN 'http://127.0.0.1:2069/metadata/svn/*'
+# curl -sN http://127.0.0.1:2069/metadata/svn
+# curl -sN 'http://127.0.0.1:2069/metadata/*/13f79535-47bb-0310-9956-ffa450edef68'
+# curl -sN http://127.0.0.1:2069/metadata/svn/13f79535-47bb-0310-9956-ffa450edef68
#
-# If the type is included in the URL, you will only get commits of that type.
-# The type can be * and then you will receive commits of any type.
+# URLs are constructed from 3 parts:
+# /${notification}/${optional_type}/${optional_repository}
+#
+# Notifications can be sent for commits or metadata (e.g., revprop) changes.
+# If the type is included in the URL, you will only get notifications of that type.
+# The type can be * and then you will receive notifications of any type.
#
# If the repository is included in the URL, you will only receive
# messages about that repository. The repository can be * and then you
@@ -71,7 +78,7 @@ from twisted.python import log
import time
-class Commit:
+class Notification(object):
def __init__(self, r):
self.__dict__.update(r)
if not self.check_value('repository'):
@@ -86,7 +93,16 @@ class Commit:
def check_value(self, k):
return hasattr(self, k) and self.__dict__[k]
- def render_commit(self):
+ def render(self):
+ raise NotImplementedError
+
+ def render_log(self):
+ raise NotImplementedError
+
+class Commit(Notification):
+ KIND = 'COMMIT'
+
+ def render(self):
obj = {'commit': {}}
obj['commit'].update(self.__dict__)
return json.dumps(obj)
@@ -96,20 +112,32 @@ class Commit:
paths_changed = " %d paths changed" % len(self.changed)
except:
paths_changed = ""
- return "%s:%s repo '%s' id '%s'%s" % (self.type,
- self.format,
- self.repository,
- self.id,
- paths_changed)
+ return "commit %s:%s repo '%s' id '%s'%s" % (
+ self.type, self.format, self.repository, self.id,
+ paths_changed)
+
+class Metadata(Notification):
+ KIND = 'METADATA'
+
+ def render(self):
+ obj = {'metadata': {}}
+ obj['metadata'].update(self.__dict__)
+ return json.dumps(obj)
+
+ def render_log(self):
+ return "metadata %s:%s repo '%s' id '%s' revprop '%s'" % (
+ self.type, self.format, self.repository, self.id,
+ self.revprop['name'])
HEARTBEAT_TIME = 15
class Client(object):
- def __init__(self, pubsub, r, type, repository):
+ def __init__(self, pubsub, r, kind, type, repository):
self.pubsub = pubsub
r.notifyFinish().addErrback(self.finished)
self.r = r
+ self.kind = kind
self.type = type
self.repository = repository
self.alive = True
@@ -123,11 +151,14 @@ class Client(object):
except ValueError:
pass
- def interested_in(self, commit):
- if self.type and self.type != commit.type:
+ def interested_in(self, notification):
+ if self.kind != notification.KIND:
+ return False
+
+ if self.type and self.type != notification.type:
return False
- if self.repository and self.repository != commit.repository:
+ if self.repository and self.repository != notification.repository:
return False
return True
@@ -164,6 +195,13 @@ class SvnPubSub(resource.Resource):
isLeaf = True
clients = []
+ __notification_uri_map = {'commits': Commit.KIND,
+ 'metadata': Metadata.KIND}
+
+ def __init__(self, notification_class):
+ resource.Resource.__init__(self)
+ self.__notification_class = notification_class
+
def cc(self):
return len(self.clients)
@@ -183,6 +221,11 @@ class SvnPubSub(resource.Resource):
request.setResponseCode(400)
return "Invalid path\n"
+ kind = self.__notification_uri_map.get(uri[1], None)
+ if kind is None:
+ request.setResponseCode(400)
+ return "Invalid path\n"
+
if uri_len >= 3:
type = uri[2]
@@ -195,17 +238,18 @@ class SvnPubSub(resource.Resource):
if repository == '*':
repository = None
- c = Client(self, request, type, repository)
+ c = Client(self, request, kind, type, repository)
self.clients.append(c)
c.start()
return twisted.web.server.NOT_DONE_YET
- def notifyAll(self, commit):
- data = commit.render_commit()
+ def notifyAll(self, notification):
+ data = notification.render()
- log.msg("COMMIT: %s (%d clients)" % (commit.render_log(), self.cc()))
+ log.msg("%s: %s (%d clients)"
+ % (notification.KIND, notification.render_log(), self.cc()))
for client in self.clients:
- if client.interested_in(commit):
+ if client.interested_in(notification):
client.write_data(data)
def render_PUT(self, request):
@@ -218,19 +262,23 @@ class SvnPubSub(resource.Resource):
#import pdb;pdb.set_trace()
#print "input: %s" % (input)
try:
- c = json.loads(input)
- commit = Commit(c)
+ data = json.loads(input)
+ notification = self.__notification_class(data)
except ValueError as e:
request.setResponseCode(400)
- log.msg("COMMIT: failed due to: %s" % str(e))
- return str(e)
- self.notifyAll(commit)
+ errstr = str(e)
+ log.msg("%s: failed due to: %s" % (notification.KIND, errstr))
+ return errstr
+ self.notifyAll(notification)
return "Ok"
+
def svnpubsub_server():
root = resource.Resource()
- s = SvnPubSub()
- root.putChild("commits", s)
+ c = SvnPubSub(Commit)
+ m = SvnPubSub(Metadata)
+ root.putChild('commits', c)
+ root.putChild('metadata', m)
return server.Site(root)
if __name__ == "__main__":
diff --git a/tools/server-side/svnpubsub/svnpubsub/util.py b/tools/server-side/svnpubsub/svnpubsub/util.py
new file mode 100644
index 0000000..e254f8b
--- /dev/null
+++ b/tools/server-side/svnpubsub/svnpubsub/util.py
@@ -0,0 +1,36 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import subprocess as __subprocess
+
+# check_output() is only available in Python 2.7. Allow us to run with
+# earlier versions
+try:
+ __check_output = __subprocess.check_output
+ def check_output(args, env=None, universal_newlines=False):
+ return __check_output(args, shell=False, env=env,
+ universal_newlines=universal_newlines)
+except AttributeError:
+ def check_output(args, env=None, universal_newlines=False):
+ # note: we only use these three args
+ pipe = __subprocess.Popen(args, shell=False, env=env,
+ stdout=__subprocess.PIPE,
+ universal_newlines=universal_newlines)
+ output, _ = pipe.communicate()
+ if pipe.returncode:
+ raise subprocess.CalledProcessError(pipe.returncode, args)
+ return output
diff --git a/tools/server-side/svnpubsub/svnwcsub.py b/tools/server-side/svnpubsub/svnwcsub.py
index 366df7c..8105d87 100755
--- a/tools/server-side/svnpubsub/svnwcsub.py
+++ b/tools/server-side/svnpubsub/svnwcsub.py
@@ -69,27 +69,22 @@ except ImportError:
import daemonize
import svnpubsub.client
-
-# check_output() is only available in Python 2.7. Allow us to run with
-# earlier versions
-try:
- check_output = subprocess.check_output
-except AttributeError:
- def check_output(args, env): # note: we only use these two args
- pipe = subprocess.Popen(args, stdout=subprocess.PIPE, env=env)
- output, _ = pipe.communicate()
- if pipe.returncode:
- raise subprocess.CalledProcessError(pipe.returncode, args)
- return output
+import svnpubsub.util
assert hasattr(subprocess, 'check_call')
def check_call(*args, **kwds):
- """Wrapper around subprocess.check_call() that logs stderr upon failure."""
+ """Wrapper around subprocess.check_call() that logs stderr upon failure,
+ with an optional list of exit codes to consider non-failure."""
assert 'stderr' not in kwds
+ if '__okayexits' in kwds:
+ __okayexits = kwds['__okayexits']
+ del kwds['__okayexits']
+ else:
+ __okayexits = set([0]) # EXIT_SUCCESS
kwds.update(stderr=subprocess.PIPE)
pipe = subprocess.Popen(*args, **kwds)
output, errput = pipe.communicate()
- if pipe.returncode:
+ if pipe.returncode not in __okayexits:
cmd = args[0] if len(args) else kwds.get('args', '(no command)')
# TODO: log stdout too?
logging.error('Command failed: returncode=%d command=%r stderr=%r',
@@ -103,7 +98,7 @@ def check_call(*args, **kwds):
def svn_info(svnbin, env, path):
"Run 'svn info' on the target path, returning a dict of info data."
args = [svnbin, "info", "--non-interactive", "--", path]
- output = check_output(args, env=env).strip()
+ output = svnpubsub.util.check_output(args, env=env).strip()
info = { }
for line in output.split('\n'):
idx = line.index(':')
@@ -303,6 +298,21 @@ class BackgroundWorker(threading.Thread):
logging.info("updating: %s", wc.path)
+ ## Run the hook
+ HEAD = svn_info(self.svnbin, self.env, wc.url)['Revision']
+ if self.hook:
+ hook_mode = ['pre-update', 'pre-boot'][boot]
+ logging.info('running hook: %s at %s',
+ wc.path, hook_mode)
+ args = [self.hook, hook_mode, wc.path, HEAD, wc.url]
+ rc = check_call(args, env=self.env, __okayexits=[0, 1])
+ if rc == 1:
+ # TODO: log stderr
+ logging.warn('hook denied update of %s at %s',
+ wc.path, hook_mode)
+ return
+ del rc
+
### we need to move some of these args into the config. these are
### still specific to the ASF setup.
args = [self.svnbin, 'switch',
@@ -313,12 +323,13 @@ class BackgroundWorker(threading.Thread):
'--config-option',
'config:miscellany:use-commit-times=on',
'--',
- wc.url,
+ wc.url + '@' + HEAD,
wc.path]
check_call(args, env=self.env)
### check the loglevel before running 'svn info'?
info = svn_info(self.svnbin, self.env, wc.path)
+ assert info['Revision'] == HEAD
logging.info("updated: %s now at r%s", wc.path, info['Revision'])
## Run the hook
@@ -533,7 +544,8 @@ def main(args):
# We manage the logfile ourselves (along with possible rotation). The
# daemon process can just drop stdout/stderr into /dev/null.
- d = Daemon('/dev/null', options.pidfile, options.umask, bdec)
+ d = Daemon('/dev/null', os.path.abspath(options.pidfile),
+ options.umask, bdec)
if options.daemon:
# Daemonize the process and call sys.exit() with appropriate code
d.daemonize_exit()
diff --git a/tools/server-side/svnpubsub/watcher.py b/tools/server-side/svnpubsub/watcher.py
index 340b100..11bf066 100755
--- a/tools/server-side/svnpubsub/watcher.py
+++ b/tools/server-side/svnpubsub/watcher.py
@@ -35,6 +35,9 @@ def _commit(url, commit):
print('COMMIT: from %s' % url)
pprint.pprint(vars(commit), indent=2)
+def _metadata(url, metadata):
+ print('METADATA: from %s' % url)
+ pprint.pprint(vars(metadata), indent=2)
def _event(url, event_name, event_arg):
if event_arg:
@@ -44,7 +47,7 @@ def _event(url, event_name, event_arg):
def main(urls):
- mc = svnpubsub.client.MultiClient(urls, _commit, _event)
+ mc = svnpubsub.client.MultiClient(urls, _commit, _event, _metadata)
mc.run_forever()