diff options
Diffstat (limited to 'tools/server-side')
-rwxr-xr-x | tools/server-side/fsfs-reshard.py | 2 | ||||
-rw-r--r-- | tools/server-side/fsfs-stats.c | 2181 | ||||
-rw-r--r-- | tools/server-side/mod_dontdothat/mod_dontdothat.c | 41 | ||||
-rw-r--r-- | tools/server-side/svn-populate-node-origins-index.c | 2 | ||||
-rw-r--r-- | tools/server-side/svn-rep-sharing-stats.c | 530 | ||||
-rw-r--r-- | tools/server-side/svnauthz.c | 147 | ||||
-rwxr-xr-x | tools/server-side/svnpredumpfilter.py | 51 | ||||
-rwxr-xr-x | tools/server-side/svnpubsub/commit-hook.py | 43 | ||||
-rw-r--r-- | tools/server-side/svnpubsub/daemonize.py | 193 | ||||
-rwxr-xr-x | tools/server-side/svnpubsub/rc.d/svnpubsub.freebsd | 2 | ||||
-rwxr-xr-x | tools/server-side/svnpubsub/revprop-change-hook.py | 90 | ||||
-rw-r--r-- | tools/server-side/svnpubsub/svnpubsub/client.py | 54 | ||||
-rw-r--r-- | tools/server-side/svnpubsub/svnpubsub/server.py | 114 | ||||
-rw-r--r-- | tools/server-side/svnpubsub/svnpubsub/util.py | 36 | ||||
-rwxr-xr-x | tools/server-side/svnpubsub/svnwcsub.py | 46 | ||||
-rwxr-xr-x | tools/server-side/svnpubsub/watcher.py | 5 |
16 files changed, 567 insertions, 2970 deletions
diff --git a/tools/server-side/fsfs-reshard.py b/tools/server-side/fsfs-reshard.py index 16d2fcd..bd82080 100755 --- a/tools/server-side/fsfs-reshard.py +++ b/tools/server-side/fsfs-reshard.py @@ -46,7 +46,7 @@ # under the License. # ==================================================================== # -# $HeadURL: http://svn.apache.org/repos/asf/subversion/branches/1.8.x/tools/server-side/fsfs-reshard.py $ +# $HeadURL: https://svn.apache.org/repos/asf/subversion/branches/1.9.x/tools/server-side/fsfs-reshard.py $ # $LastChangedDate: 2009-11-16 19:07:17 +0000 (Mon, 16 Nov 2009) $ # $LastChangedBy: hwright $ # $LastChangedRevision: 880911 $ diff --git a/tools/server-side/fsfs-stats.c b/tools/server-side/fsfs-stats.c deleted file mode 100644 index 80a09f9..0000000 --- a/tools/server-side/fsfs-stats.c +++ /dev/null @@ -1,2181 +0,0 @@ -/* fsfs-stats.c -- gather size statistics on FSFS repositories - * - * ==================================================================== - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - * ==================================================================== - */ - - -#include <assert.h> - -#include <apr.h> -#include <apr_general.h> -#include <apr_file_io.h> -#include <apr_poll.h> - -#include "svn_pools.h" -#include "svn_diff.h" -#include "svn_io.h" -#include "svn_utf.h" -#include "svn_dirent_uri.h" -#include "svn_sorts.h" -#include "svn_delta.h" -#include "svn_hash.h" -#include "svn_cache_config.h" - -#include "private/svn_string_private.h" -#include "private/svn_subr_private.h" -#include "private/svn_dep_compat.h" -#include "private/svn_cache.h" - -#ifndef _ -#define _(x) x -#endif - -#define ERROR_TAG "fsfs-stats: " - -/* We group representations into 2x2 different kinds plus one default: - * [dir / file] x [text / prop]. The assignment is done by the first node - * that references the respective representation. - */ -typedef enum rep_kind_t -{ - /* The representation is _directly_ unused, i.e. not referenced by any - * noderev. However, some other representation may use it as delta base. - * null value. Should not occur in real-word repositories. */ - unused_rep, - - /* a properties on directory rep */ - dir_property_rep, - - /* a properties on file rep */ - file_property_rep, - - /* a directory rep */ - dir_rep, - - /* a file rep */ - file_rep -} rep_kind_t; - -/* A representation fragment. - */ -typedef struct representation_t -{ - /* absolute offset in the file */ - apr_size_t offset; - - /* item length in bytes */ - apr_size_t size; - - /* item length after de-deltification */ - apr_size_t expanded_size; - - /* deltification base, or NULL if there is none */ - struct representation_t *delta_base; - - /* revision that contains this representation - * (may be referenced by other revisions, though) */ - svn_revnum_t revision; - - /* number of nodes that reference this representation */ - apr_uint32_t ref_count; - - /* length of the PLAIN / DELTA line in the source file in bytes */ - apr_uint16_t header_size; - - /* classification of the representation. values of rep_kind_t */ - char kind; - - /* the source content has a PLAIN header, so we may simply copy the - * source content into the target */ - char is_plain; - -} representation_t; - -/* Represents a single revision. - * There will be only one instance per revision. */ -typedef struct revision_info_t -{ - /* number of this revision */ - svn_revnum_t revision; - - /* pack file offset (manifest value), 0 for non-packed files */ - apr_size_t offset; - - /* offset of the changes list relative to OFFSET */ - apr_size_t changes; - - /* length of the changes list on bytes */ - apr_size_t changes_len; - - /* offset of the changes list relative to OFFSET */ - apr_size_t change_count; - - /* first offset behind the revision data in the pack file (file length - * for non-packed revs) */ - apr_size_t end; - - /* number of directory noderevs in this revision */ - apr_size_t dir_noderev_count; - - /* number of file noderevs in this revision */ - apr_size_t file_noderev_count; - - /* total size of directory noderevs (i.e. the structs - not the rep) */ - apr_size_t dir_noderev_size; - - /* total size of file noderevs (i.e. the structs - not the rep) */ - apr_size_t file_noderev_size; - - /* all representation_t of this revision (in no particular order), - * i.e. those that point back to this struct */ - apr_array_header_t *representations; -} revision_info_t; - -/* Data type to identify a representation. It will be used to address - * cached combined (un-deltified) windows. - */ -typedef struct window_cache_key_t -{ - /* revision of the representation */ - svn_revnum_t revision; - - /* its offset */ - apr_size_t offset; -} window_cache_key_t; - -/* Description of one large representation. It's content will be reused / - * overwritten when it gets replaced by an even larger representation. - */ -typedef struct large_change_info_t -{ - /* size of the (deltified) representation */ - apr_size_t size; - - /* revision of the representation */ - svn_revnum_t revision; - - /* node path. "" for unused instances */ - svn_stringbuf_t *path; -} large_change_info_t; - -/* Container for the largest representations found so far. The capacity - * is fixed and entries will be inserted by reusing the last one and - * reshuffling the entry pointers. - */ -typedef struct largest_changes_t -{ - /* number of entries allocated in CHANGES */ - apr_size_t count; - - /* size of the smallest change */ - apr_size_t min_size; - - /* changes kept in this struct */ - large_change_info_t **changes; -} largest_changes_t; - -/* Information we gather per size bracket. - */ -typedef struct histogram_line_t -{ - /* number of item that fall into this bracket */ - apr_int64_t count; - - /* sum of values in this bracket */ - apr_int64_t sum; -} histogram_line_t; - -/* A histogram of 64 bit integer values. - */ -typedef struct histogram_t -{ - /* total sum over all brackets */ - histogram_line_t total; - - /* one bracket per binary step. - * line[i] is the 2^(i-1) <= x < 2^i bracket */ - histogram_line_t lines[64]; -} histogram_t; - -/* Information we collect per file ending. - */ -typedef struct extension_info_t -{ - /* file extension, including leading "." - * "(none)" in the container for files w/o extension. */ - const char *extension; - - /* histogram of representation sizes */ - histogram_t rep_histogram; - - /* histogram of sizes of changed files */ - histogram_t node_histogram; -} extension_info_t; - -/* Root data structure containing all information about a given repository. - */ -typedef struct fs_fs_t -{ - /* repository to reorg */ - const char *path; - - /* revision to start at (must be 0, ATM) */ - svn_revnum_t start_revision; - - /* FSFS format number */ - int format; - - /* highest revision number in the repo */ - svn_revnum_t max_revision; - - /* first non-packed revision */ - svn_revnum_t min_unpacked_rev; - - /* sharing size*/ - int max_files_per_dir; - - /* all revisions */ - apr_array_header_t *revisions; - - /* empty representation. - * Used as a dummy base for DELTA reps without base. */ - representation_t *null_base; - - /* undeltified txdelta window cache */ - svn_cache__t *window_cache; - - /* track the biggest contributors to repo size */ - largest_changes_t *largest_changes; - - /* history of representation sizes */ - histogram_t rep_size_histogram; - - /* history of sizes of changed nodes */ - histogram_t node_size_histogram; - - /* history of unused representations */ - histogram_t unused_rep_histogram; - - /* history of sizes of changed files */ - histogram_t file_histogram; - - /* history of sizes of file representations */ - histogram_t file_rep_histogram; - - /* history of sizes of changed file property sets */ - histogram_t file_prop_histogram; - - /* history of sizes of file property representations */ - histogram_t file_prop_rep_histogram; - - /* history of sizes of changed directories (in bytes) */ - histogram_t dir_histogram; - - /* history of sizes of directories representations */ - histogram_t dir_rep_histogram; - - /* history of sizes of changed directories property sets */ - histogram_t dir_prop_histogram; - - /* history of sizes of directories property representations */ - histogram_t dir_prop_rep_histogram; - - /* extension -> extension_info_t* map */ - apr_hash_t *by_extension; -} fs_fs_t; - -/* Return the rev pack folder for revision REV in FS. - */ -static const char * -get_pack_folder(fs_fs_t *fs, - svn_revnum_t rev, - apr_pool_t *pool) -{ - return apr_psprintf(pool, "%s/db/revs/%ld.pack", - fs->path, rev / fs->max_files_per_dir); -} - -/* Return the path of the file containing revision REV in FS. - */ -static const char * -rev_or_pack_file_name(fs_fs_t *fs, - svn_revnum_t rev, - apr_pool_t *pool) -{ - return fs->min_unpacked_rev > rev - ? svn_dirent_join(get_pack_folder(fs, rev, pool), "pack", pool) - : apr_psprintf(pool, "%s/db/revs/%ld/%ld", fs->path, - rev / fs->max_files_per_dir, rev); -} - -/* Open the file containing revision REV in FS and return it in *FILE. - */ -static svn_error_t * -open_rev_or_pack_file(apr_file_t **file, - fs_fs_t *fs, - svn_revnum_t rev, - apr_pool_t *pool) -{ - return svn_io_file_open(file, - rev_or_pack_file_name(fs, rev, pool), - APR_READ | APR_BUFFERED, - APR_OS_DEFAULT, - pool); -} - -/* Return the length of FILE in *FILE_SIZE. Use POOL for allocations. -*/ -static svn_error_t * -get_file_size(apr_off_t *file_size, - apr_file_t *file, - apr_pool_t *pool) -{ - apr_finfo_t finfo; - - SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE, file, pool)); - - *file_size = finfo.size; - return SVN_NO_ERROR; -} - -/* Get the file content of revision REVISION in FS and return it in *CONTENT. - * Read the LEN bytes starting at file OFFSET. When provided, use FILE as - * packed or plain rev file. - * Use POOL for temporary allocations. - */ -static svn_error_t * -get_content(svn_stringbuf_t **content, - apr_file_t *file, - fs_fs_t *fs, - svn_revnum_t revision, - apr_off_t offset, - apr_size_t len, - apr_pool_t *pool) -{ - apr_pool_t * file_pool = svn_pool_create(pool); - apr_size_t large_buffer_size = 0x10000; - - if (file == NULL) - SVN_ERR(open_rev_or_pack_file(&file, fs, revision, file_pool)); - - *content = svn_stringbuf_create_ensure(len, pool); - (*content)->len = len; - -#if APR_VERSION_AT_LEAST(1,3,0) - /* for better efficiency use larger buffers on large reads */ - if ( (len >= large_buffer_size) - && (apr_file_buffer_size_get(file) < large_buffer_size)) - apr_file_buffer_set(file, - apr_palloc(apr_file_pool_get(file), - large_buffer_size), - large_buffer_size); -#endif - - SVN_ERR(svn_io_file_seek(file, APR_SET, &offset, pool)); - SVN_ERR(svn_io_file_read_full2(file, (*content)->data, len, - NULL, NULL, pool)); - svn_pool_destroy(file_pool); - - return SVN_NO_ERROR; -} - -/* In *RESULT, return the cached txdelta window stored in REPRESENTATION - * within FS. If that has not been found in cache, return NULL. - * Allocate the result in POOL. - */ -static svn_error_t * -get_cached_window(svn_stringbuf_t **result, - fs_fs_t *fs, - representation_t *representation, - apr_pool_t *pool) -{ - svn_boolean_t found = FALSE; - window_cache_key_t key; - key.revision = representation->revision; - key.offset = representation->offset; - - *result = NULL; - return svn_error_trace(svn_cache__get((void**)result, &found, - fs->window_cache, - &key, pool)); -} - -/* Cache the undeltified txdelta WINDOW for REPRESENTATION within FS. - * Use POOL for temporaries. - */ -static svn_error_t * -set_cached_window(fs_fs_t *fs, - representation_t *representation, - svn_stringbuf_t *window, - apr_pool_t *pool) -{ - /* select entry */ - window_cache_key_t key; - key.revision = representation->revision; - key.offset = representation->offset; - - return svn_error_trace(svn_cache__set(fs->window_cache, &key, window, - pool)); -} - -/* Initialize the LARGEST_CHANGES member in FS with a capacity of COUNT - * entries. Use POOL for allocations. - */ -static void -initialize_largest_changes(fs_fs_t *fs, - apr_size_t count, - apr_pool_t *pool) -{ - apr_size_t i; - - fs->largest_changes = apr_pcalloc(pool, sizeof(*fs->largest_changes)); - fs->largest_changes->count = count; - fs->largest_changes->min_size = 1; - fs->largest_changes->changes - = apr_palloc(pool, count * sizeof(*fs->largest_changes->changes)); - - /* allocate *all* entries before the path stringbufs. This increases - * cache locality and enhances performance significantly. */ - for (i = 0; i < count; ++i) - fs->largest_changes->changes[i] - = apr_palloc(pool, sizeof(**fs->largest_changes->changes)); - - /* now initialize them and allocate the stringbufs */ - for (i = 0; i < count; ++i) - { - fs->largest_changes->changes[i]->size = 0; - fs->largest_changes->changes[i]->revision = SVN_INVALID_REVNUM; - fs->largest_changes->changes[i]->path - = svn_stringbuf_create_ensure(1024, pool); - } -} - -/* Add entry for SIZE to HISTOGRAM. - */ -static void -add_to_histogram(histogram_t *histogram, - apr_int64_t size) -{ - apr_int64_t shift = 0; - - while (((apr_int64_t)(1) << shift) <= size) - shift++; - - histogram->total.count++; - histogram->total.sum += size; - histogram->lines[(apr_size_t)shift].count++; - histogram->lines[(apr_size_t)shift].sum += size; -} - -/* Update data aggregators in FS with this representation of type KIND, on- - * disk REP_SIZE and expanded node size EXPANDED_SIZE for PATH in REVSION. - */ -static void -add_change(fs_fs_t *fs, - apr_int64_t rep_size, - apr_int64_t expanded_size, - svn_revnum_t revision, - const char *path, - rep_kind_t kind) -{ - /* identify largest reps */ - if (rep_size >= fs->largest_changes->min_size) - { - apr_size_t i; - large_change_info_t *info - = fs->largest_changes->changes[fs->largest_changes->count - 1]; - info->size = rep_size; - info->revision = revision; - svn_stringbuf_set(info->path, path); - - /* linear insertion but not too bad since count is low and insertions - * near the end are more likely than close to front */ - for (i = fs->largest_changes->count - 1; i > 0; --i) - if (fs->largest_changes->changes[i-1]->size >= rep_size) - break; - else - fs->largest_changes->changes[i] = fs->largest_changes->changes[i-1]; - - fs->largest_changes->changes[i] = info; - fs->largest_changes->min_size - = fs->largest_changes->changes[fs->largest_changes->count-1]->size; - } - - /* global histograms */ - add_to_histogram(&fs->rep_size_histogram, rep_size); - add_to_histogram(&fs->node_size_histogram, expanded_size); - - /* specific histograms by type */ - switch (kind) - { - case unused_rep: add_to_histogram(&fs->unused_rep_histogram, - rep_size); - break; - case dir_property_rep: add_to_histogram(&fs->dir_prop_rep_histogram, - rep_size); - add_to_histogram(&fs->dir_prop_histogram, - expanded_size); - break; - case file_property_rep: add_to_histogram(&fs->file_prop_rep_histogram, - rep_size); - add_to_histogram(&fs->file_prop_histogram, - expanded_size); - break; - case dir_rep: add_to_histogram(&fs->dir_rep_histogram, - rep_size); - add_to_histogram(&fs->dir_histogram, - expanded_size); - break; - case file_rep: add_to_histogram(&fs->file_rep_histogram, - rep_size); - add_to_histogram(&fs->file_histogram, - expanded_size); - break; - } - - /* by extension */ - if (kind == file_rep) - { - /* determine extension */ - extension_info_t *info; - const char * file_name = strrchr(path, '/'); - const char * extension = file_name ? strrchr(file_name, '.') : NULL; - - if (extension == NULL || extension == file_name + 1) - extension = "(none)"; - - /* get / auto-insert entry for this extension */ - info = apr_hash_get(fs->by_extension, extension, APR_HASH_KEY_STRING); - if (info == NULL) - { - apr_pool_t *pool = apr_hash_pool_get(fs->by_extension); - info = apr_pcalloc(pool, sizeof(*info)); - info->extension = apr_pstrdup(pool, extension); - - apr_hash_set(fs->by_extension, info->extension, - APR_HASH_KEY_STRING, info); - } - - /* update per-extension histogram */ - add_to_histogram(&info->node_histogram, expanded_size); - add_to_histogram(&info->rep_histogram, rep_size); - } -} - -/* Given rev pack PATH in FS, read the manifest file and return the offsets - * in *MANIFEST. Use POOL for allocations. - */ -static svn_error_t * -read_manifest(apr_array_header_t **manifest, - fs_fs_t *fs, - const char *path, - apr_pool_t *pool) -{ - svn_stream_t *manifest_stream; - apr_pool_t *iterpool; - - /* Open the manifest file. */ - SVN_ERR(svn_stream_open_readonly(&manifest_stream, - svn_dirent_join(path, "manifest", pool), - pool, pool)); - - /* While we're here, let's just read the entire manifest file into an array, - so we can cache the entire thing. */ - iterpool = svn_pool_create(pool); - *manifest = apr_array_make(pool, fs->max_files_per_dir, sizeof(apr_size_t)); - while (1) - { - svn_stringbuf_t *sb; - svn_boolean_t eof; - apr_uint64_t val; - svn_error_t *err; - - svn_pool_clear(iterpool); - SVN_ERR(svn_stream_readline(manifest_stream, &sb, "\n", &eof, iterpool)); - if (eof) - break; - - err = svn_cstring_strtoui64(&val, sb->data, 0, APR_SIZE_MAX, 10); - if (err) - return svn_error_createf(SVN_ERR_FS_CORRUPT, err, - _("Manifest offset '%s' too large"), - sb->data); - APR_ARRAY_PUSH(*manifest, apr_size_t) = (apr_size_t)val; - } - svn_pool_destroy(iterpool); - - return svn_stream_close(manifest_stream); -} - -/* Read header information for the revision stored in FILE_CONTENT (one - * whole revision). Return the offsets within FILE_CONTENT for the - * *ROOT_NODEREV, the list of *CHANGES and its len in *CHANGES_LEN. - * Use POOL for temporary allocations. */ -static svn_error_t * -read_revision_header(apr_size_t *changes, - apr_size_t *changes_len, - apr_size_t *root_noderev, - svn_stringbuf_t *file_content, - apr_pool_t *pool) -{ - char buf[64]; - const char *line; - char *space; - apr_uint64_t val; - apr_size_t len; - - /* Read in this last block, from which we will identify the last line. */ - len = sizeof(buf); - if (len > file_content->len) - len = file_content->len; - - memcpy(buf, file_content->data + file_content->len - len, len); - - /* The last byte should be a newline. */ - if (buf[(apr_ssize_t)len - 1] != '\n') - return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, - _("Revision lacks trailing newline")); - - /* Look for the next previous newline. */ - buf[len - 1] = 0; - line = strrchr(buf, '\n'); - if (line == NULL) - return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, - _("Final line in revision file longer " - "than 64 characters")); - - space = strchr(line, ' '); - if (space == NULL) - return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, - _("Final line in revision file missing space")); - - /* terminate the header line */ - *space = 0; - - /* extract information */ - SVN_ERR(svn_cstring_strtoui64(&val, line+1, 0, APR_SIZE_MAX, 10)); - *root_noderev = (apr_size_t)val; - SVN_ERR(svn_cstring_strtoui64(&val, space+1, 0, APR_SIZE_MAX, 10)); - *changes = (apr_size_t)val; - *changes_len = file_content->len - *changes - (buf + len - line) + 1; - - return SVN_NO_ERROR; -} - -/* Read the FSFS format number and sharding size from the format file at - * PATH and return it in *PFORMAT and *MAX_FILES_PER_DIR respectively. - * Use POOL for temporary allocations. - */ -static svn_error_t * -read_format(int *pformat, int *max_files_per_dir, - const char *path, apr_pool_t *pool) -{ - svn_error_t *err; - apr_file_t *file; - char buf[80]; - apr_size_t len; - - /* open format file and read the first line */ - err = svn_io_file_open(&file, path, APR_READ | APR_BUFFERED, - APR_OS_DEFAULT, pool); - if (err && APR_STATUS_IS_ENOENT(err->apr_err)) - { - /* Treat an absent format file as format 1. Do not try to - create the format file on the fly, because the repository - might be read-only for us, or this might be a read-only - operation, and the spirit of FSFS is to make no changes - whatseover in read-only operations. See thread starting at - http://subversion.tigris.org/servlets/ReadMsg?list=dev&msgNo=97600 - for more. */ - svn_error_clear(err); - *pformat = 1; - *max_files_per_dir = 0; - - return SVN_NO_ERROR; - } - SVN_ERR(err); - - len = sizeof(buf); - err = svn_io_read_length_line(file, buf, &len, pool); - if (err && APR_STATUS_IS_EOF(err->apr_err)) - { - /* Return a more useful error message. */ - svn_error_clear(err); - return svn_error_createf(SVN_ERR_BAD_VERSION_FILE_FORMAT, NULL, - _("Can't read first line of format file '%s'"), - svn_dirent_local_style(path, pool)); - } - SVN_ERR(err); - - /* Check that the first line contains only digits. */ - SVN_ERR(svn_cstring_atoi(pformat, buf)); - - /* Set the default values for anything that can be set via an option. */ - *max_files_per_dir = 0; - - /* Read any options. */ - while (1) - { - len = sizeof(buf); - err = svn_io_read_length_line(file, buf, &len, pool); - if (err && APR_STATUS_IS_EOF(err->apr_err)) - { - /* No more options; that's okay. */ - svn_error_clear(err); - break; - } - SVN_ERR(err); - - if (strncmp(buf, "layout ", 7) == 0) - { - if (strcmp(buf+7, "linear") == 0) - { - *max_files_per_dir = 0; - continue; - } - - if (strncmp(buf+7, "sharded ", 8) == 0) - { - /* Check that the argument is numeric. */ - SVN_ERR(svn_cstring_atoi(max_files_per_dir, buf + 15)); - continue; - } - } - - return svn_error_createf(SVN_ERR_BAD_VERSION_FILE_FORMAT, NULL, - _("'%s' contains invalid filesystem format option '%s'"), - svn_dirent_local_style(path, pool), buf); - } - - return svn_io_file_close(file, pool); -} - -/* Read the content of the file at PATH and return it in *RESULT. - * Use POOL for temporary allocations. - */ -static svn_error_t * -read_number(svn_revnum_t *result, const char *path, apr_pool_t *pool) -{ - svn_stringbuf_t *content; - apr_uint64_t number; - - SVN_ERR(svn_stringbuf_from_file2(&content, path, pool)); - - content->data[content->len-1] = 0; - SVN_ERR(svn_cstring_strtoui64(&number, content->data, 0, LONG_MAX, 10)); - *result = (svn_revnum_t)number; - - return SVN_NO_ERROR; -} - -/* Create *FS for the repository at PATH and read the format and size info. - * Use POOL for temporary allocations. - */ -static svn_error_t * -fs_open(fs_fs_t **fs, const char *path, apr_pool_t *pool) -{ - *fs = apr_pcalloc(pool, sizeof(**fs)); - (*fs)->path = apr_pstrdup(pool, path); - (*fs)->max_files_per_dir = 1000; - - /* Read the FS format number. */ - SVN_ERR(read_format(&(*fs)->format, - &(*fs)->max_files_per_dir, - svn_dirent_join(path, "db/format", pool), - pool)); - if (((*fs)->format != 4) && ((*fs)->format != 6)) - return svn_error_create(SVN_ERR_FS_UNSUPPORTED_FORMAT, NULL, NULL); - - /* read size (HEAD) info */ - SVN_ERR(read_number(&(*fs)->min_unpacked_rev, - svn_dirent_join(path, "db/min-unpacked-rev", pool), - pool)); - return read_number(&(*fs)->max_revision, - svn_dirent_join(path, "db/current", pool), - pool); -} - -/* Utility function that returns true if STRING->DATA matches KEY. - */ -static svn_boolean_t -key_matches(svn_string_t *string, const char *key) -{ - return strcmp(string->data, key) == 0; -} - -/* Comparator used for binary search comparing the absolute file offset - * of a representation to some other offset. DATA is a *representation_t, - * KEY is a pointer to an apr_size_t. - */ -static int -compare_representation_offsets(const void *data, const void *key) -{ - apr_ssize_t diff = (*(const representation_t *const *)data)->offset - - *(const apr_size_t *)key; - - /* sizeof(int) may be < sizeof(ssize_t) */ - if (diff < 0) - return -1; - return diff > 0 ? 1 : 0; -} - -/* Find the revision_info_t object to the given REVISION in FS and return - * it in *REVISION_INFO. For performance reasons, we skip the lookup if - * the info is already provided. - * - * In that revision, look for the representation_t object for offset OFFSET. - * If it already exists, set *IDX to its index in *REVISION_INFO's - * representations list and return the representation object. Otherwise, - * set the index to where it must be inserted and return NULL. - */ -static representation_t * -find_representation(int *idx, - fs_fs_t *fs, - revision_info_t **revision_info, - svn_revnum_t revision, - apr_size_t offset) -{ - revision_info_t *info; - *idx = -1; - - /* first let's find the revision */ - info = revision_info ? *revision_info : NULL; - if (info == NULL || info->revision != revision) - { - info = APR_ARRAY_IDX(fs->revisions, - revision - fs->start_revision, - revision_info_t*); - if (revision_info) - *revision_info = info; - } - - /* not found -> no result */ - if (info == NULL) - return NULL; - - assert(revision == info->revision); - - /* look for the representation */ - *idx = svn_sort__bsearch_lower_bound(&offset, - info->representations, - compare_representation_offsets); - if (*idx < info->representations->nelts) - { - /* return the representation, if this is the one we were looking for */ - representation_t *result - = APR_ARRAY_IDX(info->representations, *idx, representation_t *); - if (result->offset == offset) - return result; - } - - /* not parsed, yet */ - return NULL; -} - -/* Read the representation header in FILE_CONTENT at OFFSET. Return its - * size in *HEADER_SIZE, set *IS_PLAIN if no deltification was used and - * return the deltification base representation in *REPRESENTATION. If - * there is none, set it to NULL. Use FS to it look up. - * - * Use POOL for allocations and SCRATCH_POOL for temporaries. - */ -static svn_error_t * -read_rep_base(representation_t **representation, - apr_size_t *header_size, - svn_boolean_t *is_plain, - fs_fs_t *fs, - svn_stringbuf_t *file_content, - apr_size_t offset, - apr_pool_t *pool, - apr_pool_t *scratch_pool) -{ - char *str, *last_str; - int idx; - svn_revnum_t revision; - apr_uint64_t temp; - - /* identify representation header (1 line) */ - const char *buffer = file_content->data + offset; - const char *line_end = strchr(buffer, '\n'); - *header_size = line_end - buffer + 1; - - /* check for PLAIN rep */ - if (strncmp(buffer, "PLAIN\n", *header_size) == 0) - { - *is_plain = TRUE; - *representation = NULL; - return SVN_NO_ERROR; - } - - /* check for DELTA against empty rep */ - *is_plain = FALSE; - if (strncmp(buffer, "DELTA\n", *header_size) == 0) - { - /* This is a delta against the empty stream. */ - *representation = fs->null_base; - return SVN_NO_ERROR; - } - - str = apr_pstrndup(scratch_pool, buffer, line_end - buffer); - last_str = str; - - /* parse it. */ - str = svn_cstring_tokenize(" ", &last_str); - str = svn_cstring_tokenize(" ", &last_str); - SVN_ERR(svn_revnum_parse(&revision, str, NULL)); - - str = svn_cstring_tokenize(" ", &last_str); - SVN_ERR(svn_cstring_strtoui64(&temp, str, 0, APR_SIZE_MAX, 10)); - - /* it should refer to a rep in an earlier revision. Look it up */ - *representation = find_representation(&idx, fs, NULL, revision, (apr_size_t)temp); - return SVN_NO_ERROR; -} - -/* Parse the representation reference (text: or props:) in VALUE, look - * it up in FS and return it in *REPRESENTATION. To be able to parse the - * base rep, we pass the FILE_CONTENT as well. - * - * If necessary, allocate the result in POOL; use SCRATCH_POOL for temp. - * allocations. - */ -static svn_error_t * -parse_representation(representation_t **representation, - fs_fs_t *fs, - svn_stringbuf_t *file_content, - svn_string_t *value, - revision_info_t *revision_info, - apr_pool_t *pool, - apr_pool_t *scratch_pool) -{ - representation_t *result; - svn_revnum_t revision; - - apr_uint64_t offset; - apr_uint64_t size; - apr_uint64_t expanded_size; - int idx; - - /* read location (revision, offset) and size */ - char *c = (char *)value->data; - SVN_ERR(svn_revnum_parse(&revision, svn_cstring_tokenize(" ", &c), NULL)); - SVN_ERR(svn_cstring_strtoui64(&offset, svn_cstring_tokenize(" ", &c), 0, APR_SIZE_MAX, 10)); - SVN_ERR(svn_cstring_strtoui64(&size, svn_cstring_tokenize(" ", &c), 0, APR_SIZE_MAX, 10)); - SVN_ERR(svn_cstring_strtoui64(&expanded_size, svn_cstring_tokenize(" ", &c), 0, APR_SIZE_MAX, 10)); - - /* look it up */ - result = find_representation(&idx, fs, &revision_info, revision, (apr_size_t)offset); - if (!result) - { - /* not parsed, yet (probably a rep in the same revision). - * Create a new rep object and determine its base rep as well. - */ - apr_size_t header_size; - svn_boolean_t is_plain; - - result = apr_pcalloc(pool, sizeof(*result)); - result->revision = revision; - result->expanded_size = (apr_size_t)(expanded_size ? expanded_size : size); - result->offset = (apr_size_t)offset; - result->size = (apr_size_t)size; - SVN_ERR(read_rep_base(&result->delta_base, &header_size, - &is_plain, fs, file_content, - (apr_size_t)offset, - pool, scratch_pool)); - - result->header_size = header_size; - result->is_plain = is_plain; - svn_sort__array_insert(&result, revision_info->representations, idx); - } - - *representation = result; - - return SVN_NO_ERROR; -} - -/* Get the unprocessed (i.e. still deltified) content of REPRESENTATION in - * FS and return it in *CONTENT. If no NULL, FILE_CONTENT must contain - * the contents of the revision that also contains the representation. - * Use POOL for allocations. - */ -static svn_error_t * -get_rep_content(svn_stringbuf_t **content, - fs_fs_t *fs, - representation_t *representation, - svn_stringbuf_t *file_content, - apr_pool_t *pool) -{ - apr_off_t offset; - svn_revnum_t revision = representation->revision; - revision_info_t *revision_info = APR_ARRAY_IDX(fs->revisions, - revision - fs->start_revision, - revision_info_t*); - - /* not in cache. Is the revision valid at all? */ - if (revision - fs->start_revision > fs->revisions->nelts) - return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, - _("Unknown revision %ld"), revision); - - if (file_content) - { - offset = representation->offset - + representation->header_size; - *content = svn_stringbuf_ncreate(file_content->data + offset, - representation->size, pool); - } - else - { - offset = revision_info->offset - + representation->offset - + representation->header_size; - SVN_ERR(get_content(content, NULL, fs, revision, offset, - representation->size, pool)); - } - - return SVN_NO_ERROR; -} - - -/* Read the delta window contents of all windows in REPRESENTATION in FS. - * If no NULL, FILE_CONTENT must contain the contents of the revision that - * also contains the representation. - * Return the data as svn_txdelta_window_t* instances in *WINDOWS. - * Use POOL for allocations. - */ -static svn_error_t * -read_windows(apr_array_header_t **windows, - fs_fs_t *fs, - representation_t *representation, - svn_stringbuf_t *file_content, - apr_pool_t *pool) -{ - svn_stringbuf_t *content; - svn_stream_t *stream; - char version; - apr_size_t len = sizeof(version); - - *windows = apr_array_make(pool, 0, sizeof(svn_txdelta_window_t *)); - - /* get the whole revision content */ - SVN_ERR(get_rep_content(&content, fs, representation, file_content, pool)); - - /* create a read stream and position it directly after the rep header */ - content->data += 3; - content->len -= 3; - stream = svn_stream_from_stringbuf(content, pool); - SVN_ERR(svn_stream_read(stream, &version, &len)); - - /* read the windows from that stream */ - while (TRUE) - { - svn_txdelta_window_t *window; - svn_stream_mark_t *mark; - char dummy; - - len = sizeof(dummy); - SVN_ERR(svn_stream_mark(stream, &mark, pool)); - SVN_ERR(svn_stream_read(stream, &dummy, &len)); - if (len == 0) - break; - - SVN_ERR(svn_stream_seek(stream, mark)); - SVN_ERR(svn_txdelta_read_svndiff_window(&window, stream, version, pool)); - APR_ARRAY_PUSH(*windows, svn_txdelta_window_t *) = window; - } - - return SVN_NO_ERROR; -} - -/* Get the undeltified representation that is a result of combining all - * deltas from the current desired REPRESENTATION in FS with its base - * representation. If no NULL, FILE_CONTENT must contain the contents of - * the revision that also contains the representation. Store the result - * in *CONTENT. Use POOL for allocations. - */ -static svn_error_t * -get_combined_window(svn_stringbuf_t **content, - fs_fs_t *fs, - representation_t *representation, - svn_stringbuf_t *file_content, - apr_pool_t *pool) -{ - int i; - apr_array_header_t *windows; - svn_stringbuf_t *base_content, *result; - const char *source; - apr_pool_t *sub_pool; - apr_pool_t *iter_pool; - - /* special case: no un-deltification necessary */ - if (representation->is_plain) - { - SVN_ERR(get_rep_content(content, fs, representation, file_content, - pool)); - SVN_ERR(set_cached_window(fs, representation, *content, pool)); - return SVN_NO_ERROR; - } - - /* special case: data already in cache */ - SVN_ERR(get_cached_window(content, fs, representation, pool)); - if (*content) - return SVN_NO_ERROR; - - /* read the delta windows for this representation */ - sub_pool = svn_pool_create(pool); - iter_pool = svn_pool_create(pool); - SVN_ERR(read_windows(&windows, fs, representation, file_content, sub_pool)); - - /* fetch the / create a base content */ - if (representation->delta_base && representation->delta_base->revision) - SVN_ERR(get_combined_window(&base_content, fs, - representation->delta_base, NULL, sub_pool)); - else - base_content = svn_stringbuf_create_empty(sub_pool); - - /* apply deltas */ - result = svn_stringbuf_create_empty(pool); - source = base_content->data; - - for (i = 0; i < windows->nelts; ++i) - { - svn_txdelta_window_t *window - = APR_ARRAY_IDX(windows, i, svn_txdelta_window_t *); - svn_stringbuf_t *buf - = svn_stringbuf_create_ensure(window->tview_len, iter_pool); - - buf->len = window->tview_len; - svn_txdelta_apply_instructions(window, window->src_ops ? source : NULL, - buf->data, &buf->len); - - svn_stringbuf_appendbytes(result, buf->data, buf->len); - source += window->sview_len; - - svn_pool_clear(iter_pool); - } - - /* cache result and return it */ - SVN_ERR(set_cached_window(fs, representation, result, sub_pool)); - *content = result; - - svn_pool_destroy(iter_pool); - svn_pool_destroy(sub_pool); - - return SVN_NO_ERROR; -} - -/* forward declaration */ -static svn_error_t * -read_noderev(fs_fs_t *fs, - svn_stringbuf_t *file_content, - apr_size_t offset, - revision_info_t *revision_info, - apr_pool_t *pool, - apr_pool_t *scratch_pool); - -/* Starting at the directory in REPRESENTATION in FILE_CONTENT, read all - * DAG nodes, directories and representations linked in that tree structure. - * Store them in FS and REVISION_INFO. Also, read them only once. - * - * Use POOL for persistent allocations and SCRATCH_POOL for temporaries. - */ -static svn_error_t * -parse_dir(fs_fs_t *fs, - svn_stringbuf_t *file_content, - representation_t *representation, - revision_info_t *revision_info, - apr_pool_t *pool, - apr_pool_t *scratch_pool) -{ - svn_stringbuf_t *text; - apr_pool_t *iter_pool; - apr_pool_t *text_pool; - const char *current; - const char *revision_key; - apr_size_t key_len; - - /* special case: empty dir rep */ - if (representation == NULL) - return SVN_NO_ERROR; - - /* get the directory as unparsed string */ - iter_pool = svn_pool_create(scratch_pool); - text_pool = svn_pool_create(scratch_pool); - - SVN_ERR(get_combined_window(&text, fs, representation, file_content, - text_pool)); - current = text->data; - - /* calculate some invariants */ - revision_key = apr_psprintf(text_pool, "r%ld/", representation->revision); - key_len = strlen(revision_key); - - /* Parse and process all directory entries. */ - while (*current != 'E') - { - char *next; - - /* skip "K ???\n<name>\nV ???\n" lines*/ - current = strchr(current, '\n'); - if (current) - current = strchr(current+1, '\n'); - if (current) - current = strchr(current+1, '\n'); - next = current ? strchr(++current, '\n') : NULL; - if (next == NULL) - return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, - _("Corrupt directory representation in r%ld at offset %ld"), - representation->revision, - (long)representation->offset); - - /* iff this entry refers to a node in the same revision as this dir, - * recurse into that node */ - *next = 0; - current = strstr(current, revision_key); - if (current) - { - /* recurse */ - apr_uint64_t offset; - - SVN_ERR(svn_cstring_strtoui64(&offset, current + key_len, 0, - APR_SIZE_MAX, 10)); - SVN_ERR(read_noderev(fs, file_content, (apr_size_t)offset, - revision_info, pool, iter_pool)); - - svn_pool_clear(iter_pool); - } - current = next+1; - } - - svn_pool_destroy(iter_pool); - svn_pool_destroy(text_pool); - return SVN_NO_ERROR; -} - -/* Starting at the noderev at OFFSET in FILE_CONTENT, read all DAG nodes, - * directories and representations linked in that tree structure. Store - * them in FS and REVISION_INFO. Also, read them only once. Return the - * result in *NODEREV. - * - * Use POOL for persistent allocations and SCRATCH_POOL for temporaries. - */ -static svn_error_t * -read_noderev(fs_fs_t *fs, - svn_stringbuf_t *file_content, - apr_size_t offset, - revision_info_t *revision_info, - apr_pool_t *pool, - apr_pool_t *scratch_pool) -{ - svn_string_t *line; - representation_t *text = NULL; - representation_t *props = NULL; - apr_size_t start_offset = offset; - svn_boolean_t is_dir = FALSE; - const char *path = "???"; - - scratch_pool = svn_pool_create(scratch_pool); - - /* parse the noderev line-by-line until we find an empty line */ - while (1) - { - /* for this line, extract key and value. Ignore invalid values */ - svn_string_t key; - svn_string_t value; - char *sep; - const char *start = file_content->data + offset; - const char *end = strchr(start, '\n'); - - line = svn_string_ncreate(start, end - start, scratch_pool); - offset += end - start + 1; - - /* empty line -> end of noderev data */ - if (line->len == 0) - break; - - sep = strchr(line->data, ':'); - if (sep == NULL) - continue; - - key.data = line->data; - key.len = sep - key.data; - *sep = 0; - - if (key.len + 2 > line->len) - continue; - - value.data = sep + 2; - value.len = line->len - (key.len + 2); - - /* translate (key, value) into noderev elements */ - if (key_matches(&key, "type")) - is_dir = strcmp(value.data, "dir") == 0; - else if (key_matches(&key, "text")) - { - SVN_ERR(parse_representation(&text, fs, file_content, - &value, revision_info, - pool, scratch_pool)); - - /* if we are the first to use this rep, mark it as "text rep" */ - if (++text->ref_count == 1) - text->kind = is_dir ? dir_rep : file_rep; - } - else if (key_matches(&key, "props")) - { - SVN_ERR(parse_representation(&props, fs, file_content, - &value, revision_info, - pool, scratch_pool)); - - /* if we are the first to use this rep, mark it as "prop rep" */ - if (++props->ref_count == 1) - props->kind = is_dir ? dir_property_rep : file_property_rep; - } - else if (key_matches(&key, "cpath")) - path = value.data; - } - - /* record largest changes */ - if (text && text->ref_count == 1) - add_change(fs, (apr_int64_t)text->size, (apr_int64_t)text->expanded_size, - text->revision, path, text->kind); - if (props && props->ref_count == 1) - add_change(fs, (apr_int64_t)props->size, (apr_int64_t)props->expanded_size, - props->revision, path, props->kind); - - /* if this is a directory and has not been processed, yet, read and - * process it recursively */ - if (is_dir && text && text->ref_count == 1) - SVN_ERR(parse_dir(fs, file_content, text, revision_info, - pool, scratch_pool)); - - /* update stats */ - if (is_dir) - { - revision_info->dir_noderev_size += offset - start_offset; - revision_info->dir_noderev_count++; - } - else - { - revision_info->file_noderev_size += offset - start_offset; - revision_info->file_noderev_count++; - } - svn_pool_destroy(scratch_pool); - - return SVN_NO_ERROR; -} - -/* Given the unparsed changes list in CHANGES with LEN chars, return the - * number of changed paths encoded in it. - */ -static apr_size_t -get_change_count(const char *changes, - apr_size_t len) -{ - apr_size_t lines = 0; - const char *end = changes + len; - - /* line count */ - for (; changes < end; ++changes) - if (*changes == '\n') - ++lines; - - /* two lines per change */ - return lines / 2; -} - -/* Simple utility to print a REVISION number and make it appear immediately. - */ -static void -print_progress(svn_revnum_t revision) -{ - printf("%8ld", revision); - fflush(stdout); -} - -/* Read the content of the pack file staring at revision BASE and store it - * in FS. Use POOL for allocations. - */ -static svn_error_t * -read_pack_file(fs_fs_t *fs, - svn_revnum_t base, - apr_pool_t *pool) -{ - apr_array_header_t *manifest = NULL; - apr_pool_t *local_pool = svn_pool_create(pool); - apr_pool_t *iter_pool = svn_pool_create(local_pool); - int i; - apr_off_t file_size = 0; - apr_file_t *file; - const char *pack_folder = get_pack_folder(fs, base, local_pool); - - /* parse the manifest file */ - SVN_ERR(read_manifest(&manifest, fs, pack_folder, local_pool)); - if (manifest->nelts != fs->max_files_per_dir) - return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, NULL); - - SVN_ERR(open_rev_or_pack_file(&file, fs, base, local_pool)); - SVN_ERR(get_file_size(&file_size, file, local_pool)); - - /* process each revision in the pack file */ - for (i = 0; i < manifest->nelts; ++i) - { - apr_size_t root_node_offset; - svn_stringbuf_t *rev_content; - - /* create the revision info for the current rev */ - revision_info_t *info = apr_pcalloc(pool, sizeof(*info)); - info->representations = apr_array_make(iter_pool, 4, sizeof(representation_t*)); - - info->revision = base + i; - info->offset = APR_ARRAY_IDX(manifest, i, apr_size_t); - info->end = i+1 < manifest->nelts - ? APR_ARRAY_IDX(manifest, i+1 , apr_size_t) - : file_size; - - SVN_ERR(get_content(&rev_content, file, fs, info->revision, - info->offset, - info->end - info->offset, - iter_pool)); - - SVN_ERR(read_revision_header(&info->changes, - &info->changes_len, - &root_node_offset, - rev_content, - iter_pool)); - - info->change_count - = get_change_count(rev_content->data + info->changes, - info->changes_len); - SVN_ERR(read_noderev(fs, rev_content, - root_node_offset, info, pool, iter_pool)); - - info->representations = apr_array_copy(pool, info->representations); - APR_ARRAY_PUSH(fs->revisions, revision_info_t*) = info; - - /* destroy temps */ - svn_pool_clear(iter_pool); - } - - /* one more pack file processed */ - print_progress(base); - svn_pool_destroy(local_pool); - - return SVN_NO_ERROR; -} - -/* Read the content of the file for REVSION and store its contents in FS. - * Use POOL for allocations. - */ -static svn_error_t * -read_revision_file(fs_fs_t *fs, - svn_revnum_t revision, - apr_pool_t *pool) -{ - apr_size_t root_node_offset; - apr_pool_t *local_pool = svn_pool_create(pool); - svn_stringbuf_t *rev_content; - revision_info_t *info = apr_pcalloc(pool, sizeof(*info)); - apr_off_t file_size = 0; - apr_file_t *file; - - /* read the whole pack file into memory */ - SVN_ERR(open_rev_or_pack_file(&file, fs, revision, local_pool)); - SVN_ERR(get_file_size(&file_size, file, local_pool)); - - /* create the revision info for the current rev */ - info->representations = apr_array_make(pool, 4, sizeof(representation_t*)); - - info->revision = revision; - info->offset = 0; - info->end = file_size; - - SVN_ERR(get_content(&rev_content, file, fs, revision, 0, file_size, - local_pool)); - - SVN_ERR(read_revision_header(&info->changes, - &info->changes_len, - &root_node_offset, - rev_content, - local_pool)); - - /* put it into our containers */ - APR_ARRAY_PUSH(fs->revisions, revision_info_t*) = info; - - info->change_count - = get_change_count(rev_content->data + info->changes, - info->changes_len); - - /* parse the revision content recursively. */ - SVN_ERR(read_noderev(fs, rev_content, - root_node_offset, info, - pool, local_pool)); - - /* show progress every 1000 revs or so */ - if (revision % fs->max_files_per_dir == 0) - print_progress(revision); - - svn_pool_destroy(local_pool); - - return SVN_NO_ERROR; -} - -/* Read the repository at PATH beginning with revision START_REVISION and - * return the result in *FS. Allocate caches with MEMSIZE bytes total - * capacity. Use POOL for non-cache allocations. - */ -static svn_error_t * -read_revisions(fs_fs_t **fs, - const char *path, - svn_revnum_t start_revision, - apr_size_t memsize, - apr_pool_t *pool) -{ - svn_revnum_t revision; - svn_cache_config_t cache_config = *svn_cache_config_get(); - - /* determine cache sizes */ - - if (memsize < 100) - memsize = 100; - - cache_config.cache_size = memsize * 1024 * 1024; - svn_cache_config_set(&cache_config); - - SVN_ERR(fs_open(fs, path, pool)); - - /* create data containers and caches */ - (*fs)->start_revision = start_revision - - (start_revision % (*fs)->max_files_per_dir); - (*fs)->revisions = apr_array_make(pool, - (*fs)->max_revision + 1 - (*fs)->start_revision, - sizeof(revision_info_t *)); - (*fs)->null_base = apr_pcalloc(pool, sizeof(*(*fs)->null_base)); - initialize_largest_changes(*fs, 64, pool); - (*fs)->by_extension = apr_hash_make(pool); - - SVN_ERR(svn_cache__create_membuffer_cache(&(*fs)->window_cache, - svn_cache__get_global_membuffer_cache(), - NULL, NULL, - sizeof(window_cache_key_t), - "", FALSE, pool)); - - /* read all packed revs */ - for ( revision = start_revision - ; revision < (*fs)->min_unpacked_rev - ; revision += (*fs)->max_files_per_dir) - SVN_ERR(read_pack_file(*fs, revision, pool)); - - /* read non-packed revs */ - for ( ; revision <= (*fs)->max_revision; ++revision) - SVN_ERR(read_revision_file(*fs, revision, pool)); - - return SVN_NO_ERROR; -} - -/* Compression statistics we collect over a given set of representations. - */ -typedef struct rep_pack_stats_t -{ - /* number of representations */ - apr_int64_t count; - - /* total size after deltification (i.e. on disk size) */ - apr_int64_t packed_size; - - /* total size after de-deltification (i.e. plain text size) */ - apr_int64_t expanded_size; - - /* total on-disk header size */ - apr_int64_t overhead_size; -} rep_pack_stats_t; - -/* Statistics we collect over a given set of representations. - * We group them into shared and non-shared ("unique") reps. - */ -typedef struct representation_stats_t -{ - /* stats over all representations */ - rep_pack_stats_t total; - - /* stats over those representations with ref_count == 1 */ - rep_pack_stats_t uniques; - - /* stats over those representations with ref_count > 1 */ - rep_pack_stats_t shared; - - /* sum of all ref_counts */ - apr_int64_t references; - - /* sum of ref_count * expanded_size, - * i.e. total plaintext content if there was no rep sharing */ - apr_int64_t expanded_size; -} representation_stats_t; - -/* Basic statistics we collect over a given set of noderevs. - */ -typedef struct node_stats_t -{ - /* number of noderev structs */ - apr_int64_t count; - - /* their total size on disk (structs only) */ - apr_int64_t size; -} node_stats_t; - -/* Accumulate stats of REP in STATS. - */ -static void -add_rep_pack_stats(rep_pack_stats_t *stats, - representation_t *rep) -{ - stats->count++; - - stats->packed_size += rep->size; - stats->expanded_size += rep->expanded_size; - stats->overhead_size += rep->header_size + 7 /* ENDREP\n */; -} - -/* Accumulate stats of REP in STATS. - */ -static void -add_rep_stats(representation_stats_t *stats, - representation_t *rep) -{ - add_rep_pack_stats(&stats->total, rep); - if (rep->ref_count == 1) - add_rep_pack_stats(&stats->uniques, rep); - else - add_rep_pack_stats(&stats->shared, rep); - - stats->references += rep->ref_count; - stats->expanded_size += rep->ref_count * rep->expanded_size; -} - -/* Print statistics for the given group of representations to console. - * Use POOL for allocations. - */ -static void -print_rep_stats(representation_stats_t *stats, - apr_pool_t *pool) -{ - printf(_("%20s bytes in %12s reps\n" - "%20s bytes in %12s shared reps\n" - "%20s bytes expanded size\n" - "%20s bytes expanded shared size\n" - "%20s bytes with rep-sharing off\n" - "%20s shared references\n"), - svn__i64toa_sep(stats->total.packed_size, ',', pool), - svn__i64toa_sep(stats->total.count, ',', pool), - svn__i64toa_sep(stats->shared.packed_size, ',', pool), - svn__i64toa_sep(stats->shared.count, ',', pool), - svn__i64toa_sep(stats->total.expanded_size, ',', pool), - svn__i64toa_sep(stats->shared.expanded_size, ',', pool), - svn__i64toa_sep(stats->expanded_size, ',', pool), - svn__i64toa_sep(stats->references - stats->total.count, ',', pool)); -} - -/* Print the (used) contents of CHANGES. Use POOL for allocations. - */ -static void -print_largest_reps(largest_changes_t *changes, - apr_pool_t *pool) -{ - apr_size_t i; - for (i = 0; i < changes->count && changes->changes[i]->size; ++i) - printf(_("%12s r%-8ld %s\n"), - svn__i64toa_sep(changes->changes[i]->size, ',', pool), - changes->changes[i]->revision, - changes->changes[i]->path->data); -} - -/* Print the non-zero section of HISTOGRAM to console. - * Use POOL for allocations. - */ -static void -print_histogram(histogram_t *histogram, - apr_pool_t *pool) -{ - int first = 0; - int last = 63; - int i; - - /* identify non-zero range */ - while (last > 0 && histogram->lines[last].count == 0) - --last; - - while (first <= last && histogram->lines[first].count == 0) - ++first; - - /* display histogram lines */ - for (i = last; i >= first; --i) - printf(_(" [2^%2d, 2^%2d) %15s (%2d%%) bytes in %12s (%2d%%) items\n"), - i-1, i, - svn__i64toa_sep(histogram->lines[i].sum, ',', pool), - (int)(histogram->lines[i].sum * 100 / histogram->total.sum), - svn__i64toa_sep(histogram->lines[i].count, ',', pool), - (int)(histogram->lines[i].count * 100 / histogram->total.count)); -} - -/* COMPARISON_FUNC for svn_sort__hash. - * Sort extension_info_t values by total count in descending order. - */ -static int -compare_count(const svn_sort__item_t *a, - const svn_sort__item_t *b) -{ - const extension_info_t *lhs = a->value; - const extension_info_t *rhs = b->value; - apr_int64_t diff = lhs->node_histogram.total.count - - rhs->node_histogram.total.count; - - return diff > 0 ? -1 : (diff < 0 ? 1 : 0); -} - -/* COMPARISON_FUNC for svn_sort__hash. - * Sort extension_info_t values by total uncompressed size in descending order. - */ -static int -compare_node_size(const svn_sort__item_t *a, - const svn_sort__item_t *b) -{ - const extension_info_t *lhs = a->value; - const extension_info_t *rhs = b->value; - apr_int64_t diff = lhs->node_histogram.total.sum - - rhs->node_histogram.total.sum; - - return diff > 0 ? -1 : (diff < 0 ? 1 : 0); -} - -/* COMPARISON_FUNC for svn_sort__hash. - * Sort extension_info_t values by total prep count in descending order. - */ -static int -compare_rep_size(const svn_sort__item_t *a, - const svn_sort__item_t *b) -{ - const extension_info_t *lhs = a->value; - const extension_info_t *rhs = b->value; - apr_int64_t diff = lhs->rep_histogram.total.sum - - rhs->rep_histogram.total.sum; - - return diff > 0 ? -1 : (diff < 0 ? 1 : 0); -} - -/* Return an array of extension_info_t* for the (up to) 16 most prominent - * extensions in FS according to the sort criterion COMPARISON_FUNC. - * Allocate results in POOL. - */ -static apr_array_header_t * -get_by_extensions(fs_fs_t *fs, - int (*comparison_func)(const svn_sort__item_t *, - const svn_sort__item_t *), - apr_pool_t *pool) -{ - /* sort all data by extension */ - apr_array_header_t *sorted - = svn_sort__hash(fs->by_extension, comparison_func, pool); - - /* select the top (first) 16 entries */ - int count = MIN(sorted->nelts, 16); - apr_array_header_t *result - = apr_array_make(pool, count, sizeof(extension_info_t*)); - int i; - - for (i = 0; i < count; ++i) - APR_ARRAY_PUSH(result, extension_info_t*) - = APR_ARRAY_IDX(sorted, i, svn_sort__item_t).value; - - return result; -} - -/* Add all extension_info_t* entries of TO_ADD not already in TARGET to - * TARGET. - */ -static void -merge_by_extension(apr_array_header_t *target, - apr_array_header_t *to_add) -{ - int i, k, count; - - count = target->nelts; - for (i = 0; i < to_add->nelts; ++i) - { - extension_info_t *info = APR_ARRAY_IDX(to_add, i, extension_info_t *); - for (k = 0; k < count; ++k) - if (info == APR_ARRAY_IDX(target, k, extension_info_t *)) - break; - - if (k == count) - APR_ARRAY_PUSH(target, extension_info_t*) = info; - } -} - -/* Print the (up to) 16 extensions in FS with the most changes. - * Use POOL for allocations. - */ -static void -print_extensions_by_changes(fs_fs_t *fs, - apr_pool_t *pool) -{ - apr_array_header_t *data = get_by_extensions(fs, compare_count, pool); - apr_int64_t sum = 0; - int i; - - for (i = 0; i < data->nelts; ++i) - { - extension_info_t *info = APR_ARRAY_IDX(data, i, extension_info_t *); - sum += info->node_histogram.total.count; - printf(_(" %9s %12s (%2d%%) changes\n"), - info->extension, - svn__i64toa_sep(info->node_histogram.total.count, ',', pool), - (int)(info->node_histogram.total.count * 100 / - fs->file_histogram.total.count)); - } - - printf(_(" %9s %12s (%2d%%) changes\n"), - "(others)", - svn__i64toa_sep(fs->file_histogram.total.count - sum, ',', pool), - (int)((fs->file_histogram.total.count - sum) * 100 / - fs->file_histogram.total.count)); -} - -/* Print the (up to) 16 extensions in FS with the largest total size of - * changed file content. Use POOL for allocations. - */ -static void -print_extensions_by_nodes(fs_fs_t *fs, - apr_pool_t *pool) -{ - apr_array_header_t *data = get_by_extensions(fs, compare_node_size, pool); - apr_int64_t sum = 0; - int i; - - for (i = 0; i < data->nelts; ++i) - { - extension_info_t *info = APR_ARRAY_IDX(data, i, extension_info_t *); - sum += info->node_histogram.total.sum; - printf(_(" %9s %20s (%2d%%) bytes\n"), - info->extension, - svn__i64toa_sep(info->node_histogram.total.sum, ',', pool), - (int)(info->node_histogram.total.sum * 100 / - fs->file_histogram.total.sum)); - } - - printf(_(" %9s %20s (%2d%%) bytes\n"), - "(others)", - svn__i64toa_sep(fs->file_histogram.total.sum - sum, ',', pool), - (int)((fs->file_histogram.total.sum - sum) * 100 / - fs->file_histogram.total.sum)); -} - -/* Print the (up to) 16 extensions in FS with the largest total size of - * changed file content. Use POOL for allocations. - */ -static void -print_extensions_by_reps(fs_fs_t *fs, - apr_pool_t *pool) -{ - apr_array_header_t *data = get_by_extensions(fs, compare_rep_size, pool); - apr_int64_t sum = 0; - int i; - - for (i = 0; i < data->nelts; ++i) - { - extension_info_t *info = APR_ARRAY_IDX(data, i, extension_info_t *); - sum += info->rep_histogram.total.sum; - printf(_(" %9s %20s (%2d%%) bytes\n"), - info->extension, - svn__i64toa_sep(info->rep_histogram.total.sum, ',', pool), - (int)(info->rep_histogram.total.sum * 100 / - fs->rep_size_histogram.total.sum)); - } - - printf(_(" %9s %20s (%2d%%) bytes\n"), - "(others)", - svn__i64toa_sep(fs->rep_size_histogram.total.sum - sum, ',', pool), - (int)((fs->rep_size_histogram.total.sum - sum) * 100 / - fs->rep_size_histogram.total.sum)); -} - -/* Print per-extension histograms for the most frequent extensions in FS. - * Use POOL for allocations. */ -static void -print_histograms_by_extension(fs_fs_t *fs, - apr_pool_t *pool) -{ - apr_array_header_t *data = get_by_extensions(fs, compare_count, pool); - int i; - - merge_by_extension(data, get_by_extensions(fs, compare_node_size, pool)); - merge_by_extension(data, get_by_extensions(fs, compare_rep_size, pool)); - - for (i = 0; i < data->nelts; ++i) - { - extension_info_t *info = APR_ARRAY_IDX(data, i, extension_info_t *); - printf("\nHistogram of '%s' file sizes:\n", info->extension); - print_histogram(&info->node_histogram, pool); - printf("\nHistogram of '%s' file representation sizes:\n", - info->extension); - print_histogram(&info->rep_histogram, pool); - } -} - -/* Post-process stats for FS and print them to the console. - * Use POOL for allocations. - */ -static void -print_stats(fs_fs_t *fs, - apr_pool_t *pool) -{ - int i, k; - - /* initialize stats to collect */ - representation_stats_t file_rep_stats = { { 0 } }; - representation_stats_t dir_rep_stats = { { 0 } }; - representation_stats_t file_prop_rep_stats = { { 0 } }; - representation_stats_t dir_prop_rep_stats = { { 0 } }; - representation_stats_t total_rep_stats = { { 0 } }; - - node_stats_t dir_node_stats = { 0 }; - node_stats_t file_node_stats = { 0 }; - node_stats_t total_node_stats = { 0 }; - - apr_int64_t total_size = 0; - apr_int64_t change_count = 0; - apr_int64_t change_len = 0; - - /* aggregate info from all revisions */ - for (i = 0; i < fs->revisions->nelts; ++i) - { - revision_info_t *revision = APR_ARRAY_IDX(fs->revisions, i, - revision_info_t *); - - /* data gathered on a revision level */ - change_count += revision->change_count; - change_len += revision->changes_len; - total_size += revision->end - revision->offset; - - dir_node_stats.count += revision->dir_noderev_count; - dir_node_stats.size += revision->dir_noderev_size; - file_node_stats.count += revision->file_noderev_count; - file_node_stats.size += revision->file_noderev_size; - total_node_stats.count += revision->dir_noderev_count - + revision->file_noderev_count; - total_node_stats.size += revision->dir_noderev_size - + revision->file_noderev_size; - - /* process representations */ - for (k = 0; k < revision->representations->nelts; ++k) - { - representation_t *rep = APR_ARRAY_IDX(revision->representations, - k, representation_t *); - - /* accumulate in the right bucket */ - switch(rep->kind) - { - case file_rep: - add_rep_stats(&file_rep_stats, rep); - break; - case dir_rep: - add_rep_stats(&dir_rep_stats, rep); - break; - case file_property_rep: - add_rep_stats(&file_prop_rep_stats, rep); - break; - case dir_property_rep: - add_rep_stats(&dir_prop_rep_stats, rep); - break; - default: - break; - } - - add_rep_stats(&total_rep_stats, rep); - } - } - - /* print results */ - printf("\nGlobal statistics:\n"); - printf(_("%20s bytes in %12s revisions\n" - "%20s bytes in %12s changes\n" - "%20s bytes in %12s node revision records\n" - "%20s bytes in %12s representations\n" - "%20s bytes expanded representation size\n" - "%20s bytes with rep-sharing off\n"), - svn__i64toa_sep(total_size, ',', pool), - svn__i64toa_sep(fs->revisions->nelts, ',', pool), - svn__i64toa_sep(change_len, ',', pool), - svn__i64toa_sep(change_count, ',', pool), - svn__i64toa_sep(total_node_stats.size, ',', pool), - svn__i64toa_sep(total_node_stats.count, ',', pool), - svn__i64toa_sep(total_rep_stats.total.packed_size, ',', pool), - svn__i64toa_sep(total_rep_stats.total.count, ',', pool), - svn__i64toa_sep(total_rep_stats.total.expanded_size, ',', pool), - svn__i64toa_sep(total_rep_stats.expanded_size, ',', pool)); - - printf("\nNoderev statistics:\n"); - printf(_("%20s bytes in %12s nodes total\n" - "%20s bytes in %12s directory noderevs\n" - "%20s bytes in %12s file noderevs\n"), - svn__i64toa_sep(total_node_stats.size, ',', pool), - svn__i64toa_sep(total_node_stats.count, ',', pool), - svn__i64toa_sep(dir_node_stats.size, ',', pool), - svn__i64toa_sep(dir_node_stats.count, ',', pool), - svn__i64toa_sep(file_node_stats.size, ',', pool), - svn__i64toa_sep(file_node_stats.count, ',', pool)); - - printf("\nRepresentation statistics:\n"); - printf(_("%20s bytes in %12s representations total\n" - "%20s bytes in %12s directory representations\n" - "%20s bytes in %12s file representations\n" - "%20s bytes in %12s directory property representations\n" - "%20s bytes in %12s file property representations\n" - "%20s bytes in header & footer overhead\n"), - svn__i64toa_sep(total_rep_stats.total.packed_size, ',', pool), - svn__i64toa_sep(total_rep_stats.total.count, ',', pool), - svn__i64toa_sep(dir_rep_stats.total.packed_size, ',', pool), - svn__i64toa_sep(dir_rep_stats.total.count, ',', pool), - svn__i64toa_sep(file_rep_stats.total.packed_size, ',', pool), - svn__i64toa_sep(file_rep_stats.total.count, ',', pool), - svn__i64toa_sep(dir_prop_rep_stats.total.packed_size, ',', pool), - svn__i64toa_sep(dir_prop_rep_stats.total.count, ',', pool), - svn__i64toa_sep(file_prop_rep_stats.total.packed_size, ',', pool), - svn__i64toa_sep(file_prop_rep_stats.total.count, ',', pool), - svn__i64toa_sep(total_rep_stats.total.overhead_size, ',', pool)); - - printf("\nDirectory representation statistics:\n"); - print_rep_stats(&dir_rep_stats, pool); - printf("\nFile representation statistics:\n"); - print_rep_stats(&file_rep_stats, pool); - printf("\nDirectory property representation statistics:\n"); - print_rep_stats(&dir_prop_rep_stats, pool); - printf("\nFile property representation statistics:\n"); - print_rep_stats(&file_prop_rep_stats, pool); - - printf("\nLargest representations:\n"); - print_largest_reps(fs->largest_changes, pool); - printf("\nExtensions by number of changes:\n"); - print_extensions_by_changes(fs, pool); - printf("\nExtensions by size of changed files:\n"); - print_extensions_by_nodes(fs, pool); - printf("\nExtensions by size of representations:\n"); - print_extensions_by_reps(fs, pool); - - printf("\nHistogram of expanded node sizes:\n"); - print_histogram(&fs->node_size_histogram, pool); - printf("\nHistogram of representation sizes:\n"); - print_histogram(&fs->rep_size_histogram, pool); - printf("\nHistogram of file sizes:\n"); - print_histogram(&fs->file_histogram, pool); - printf("\nHistogram of file representation sizes:\n"); - print_histogram(&fs->file_rep_histogram, pool); - printf("\nHistogram of file property sizes:\n"); - print_histogram(&fs->file_prop_histogram, pool); - printf("\nHistogram of file property representation sizes:\n"); - print_histogram(&fs->file_prop_rep_histogram, pool); - printf("\nHistogram of directory sizes:\n"); - print_histogram(&fs->dir_histogram, pool); - printf("\nHistogram of directory representation sizes:\n"); - print_histogram(&fs->dir_rep_histogram, pool); - printf("\nHistogram of directory property sizes:\n"); - print_histogram(&fs->dir_prop_histogram, pool); - printf("\nHistogram of directory property representation sizes:\n"); - print_histogram(&fs->dir_prop_rep_histogram, pool); - - print_histograms_by_extension(fs, pool); -} - -/* Write tool usage info text to OSTREAM using PROGNAME as a prefix and - * POOL for allocations. - */ -static void -print_usage(svn_stream_t *ostream, const char *progname, - apr_pool_t *pool) -{ - svn_error_clear(svn_stream_printf(ostream, pool, - "\n" - "Usage: %s <repo> [cachesize]\n" - "\n" - "Read the repository at local path <repo> starting at revision 0,\n" - "count statistical information and write that data to stdout.\n" - "Use up to [cachesize] MB of memory for caching. This does not include\n" - "temporary representation of the repository structure, i.e. the actual\n" - "memory may be considerably higher. If not given, defaults to 100 MB.\n", - progname)); -} - -/* linear control flow */ -int main(int argc, const char *argv[]) -{ - apr_pool_t *pool; - svn_stream_t *ostream; - svn_error_t *svn_err; - const char *repo_path = NULL; - svn_revnum_t start_revision = 0; - apr_size_t memsize = 100; - apr_uint64_t temp = 0; - fs_fs_t *fs; - - apr_initialize(); - atexit(apr_terminate); - - pool = apr_allocator_owner_get(svn_pool_create_allocator(FALSE)); - - svn_err = svn_stream_for_stdout(&ostream, pool); - if (svn_err) - { - svn_handle_error2(svn_err, stdout, FALSE, ERROR_TAG); - return 2; - } - - if (argc < 2 || argc > 3) - { - print_usage(ostream, argv[0], pool); - return 2; - } - - if (argc == 3) - { - svn_err = svn_cstring_strtoui64(&temp, argv[2], 0, APR_SIZE_MAX, 10); - if (svn_err) - { - print_usage(ostream, argv[0], pool); - svn_error_clear(svn_err); - return 2; - } - - memsize = (apr_size_t)temp; - } - - repo_path = svn_dirent_canonicalize(argv[1], pool); - start_revision = 0; - - printf("Reading revisions\n"); - svn_err = read_revisions(&fs, repo_path, start_revision, memsize, pool); - printf("\n"); - - if (svn_err) - { - svn_handle_error2(svn_err, stdout, FALSE, ERROR_TAG); - return 2; - } - - print_stats(fs, pool); - - return 0; -} diff --git a/tools/server-side/mod_dontdothat/mod_dontdothat.c b/tools/server-side/mod_dontdothat/mod_dontdothat.c index b4801ed..b939ca7 100644 --- a/tools/server-side/mod_dontdothat/mod_dontdothat.c +++ b/tools/server-side/mod_dontdothat/mod_dontdothat.c @@ -40,7 +40,15 @@ #include "svn_path.h" #include "private/svn_fspath.h" -module AP_MODULE_DECLARE_DATA dontdothat_module; +extern module AP_MODULE_DECLARE_DATA dontdothat_module; + +#ifndef XML_VERSION_AT_LEAST +#define XML_VERSION_AT_LEAST(major,minor,patch) \ +(((major) < XML_MAJOR_VERSION) \ + || ((major) == XML_MAJOR_VERSION && (minor) < XML_MINOR_VERSION) \ + || ((major) == XML_MAJOR_VERSION && (minor) == XML_MINOR_VERSION && \ + (patch) <= XML_MICRO_VERSION)) +#endif /* XML_VERSION_AT_LEAST */ typedef struct dontdothat_config_rec { const char *config_file; @@ -551,6 +559,31 @@ end_element(void *baton, const char *name) } } +#if XML_VERSION_AT_LEAST(1, 95, 8) +static void +expat_entity_declaration(void *userData, + const XML_Char *entityName, + int is_parameter_entity, + const XML_Char *value, + int value_length, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId, + const XML_Char *notationName) +{ + dontdothat_filter_ctx *ctx = userData; + + /* Stop the parser if an entity declaration is hit. */ + XML_StopParser(ctx->xmlp, 0 /* resumable */); +} +#else +/* A noop default_handler. */ +static void +expat_default_handler(void *userData, const XML_Char *s, int len) +{ +} +#endif + static svn_boolean_t is_valid_wildcard(const char *wc) { @@ -696,6 +729,12 @@ dontdothat_insert_filters(request_rec *r) XML_SetElementHandler(ctx->xmlp, start_element, end_element); XML_SetCharacterDataHandler(ctx->xmlp, cdata); +#if XML_VERSION_AT_LEAST(1, 95, 8) + XML_SetEntityDeclHandler(ctx->xmlp, expat_entity_declaration); +#else + XML_SetDefaultHandler(ctx->xmlp, expat_default_handler); +#endif + ap_add_input_filter("DONTDOTHAT_FILTER", ctx, r, r->connection); } } diff --git a/tools/server-side/svn-populate-node-origins-index.c b/tools/server-side/svn-populate-node-origins-index.c index b9762c4..5d74c0c 100644 --- a/tools/server-side/svn-populate-node-origins-index.c +++ b/tools/server-side/svn-populate-node-origins-index.c @@ -122,7 +122,7 @@ build_index(const char *repos_path, apr_pool_t *pool) apr_pool_t *subpool; /* Open the repository. */ - SVN_ERR(svn_repos_open2(&repos, repos_path, NULL, pool)); + SVN_ERR(svn_repos_open3(&repos, repos_path, NULL, pool, pool)); /* Get a filesystem object. */ fs = svn_repos_fs(repos); diff --git a/tools/server-side/svn-rep-sharing-stats.c b/tools/server-side/svn-rep-sharing-stats.c deleted file mode 100644 index f610409..0000000 --- a/tools/server-side/svn-rep-sharing-stats.c +++ /dev/null @@ -1,530 +0,0 @@ -/* - * ==================================================================== - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - * ==================================================================== - */ - -#include <apr_signal.h> - -#include "svn_cmdline.h" -#include "svn_dirent_uri.h" -#include "svn_pools.h" -#include "svn_repos.h" -#include "svn_opt.h" -#include "svn_utf.h" -#include "svn_version.h" - -#include "../../subversion/libsvn_fs_fs/fs.h" -#include "../../subversion/libsvn_fs_fs/fs_fs.h" -/* for svn_fs_fs__id_* (used in assertions only) */ -#include "../../subversion/libsvn_fs_fs/id.h" - -#include "private/svn_cmdline_private.h" - -#include "svn_private_config.h" - - -/** Help messages and version checking. **/ - -static svn_error_t * -version(apr_pool_t *pool) -{ - return svn_opt_print_help4(NULL, "svn-rep-sharing-stats", TRUE, FALSE, FALSE, - NULL, NULL, NULL, NULL, NULL, NULL, pool); -} - -static void -usage(apr_pool_t *pool) -{ - svn_error_clear(svn_cmdline_fprintf - (stderr, pool, - _("Type 'svn-rep-sharing-stats --help' for usage.\n"))); -} - - -static void -help(const apr_getopt_option_t *options, apr_pool_t *pool) -{ - svn_error_clear - (svn_cmdline_fprintf - (stdout, pool, - _("usage: svn-rep-sharing-stats [OPTIONS] REPOS_PATH\n\n" - " Prints the reference count statistics for representations\n" - " in an FSFS repository.\n" - "\n" - " At least one of the options --data/--prop/--both must be specified.\n" - "\n" - "Valid options:\n"))); - while (options->description) - { - const char *optstr; - svn_opt_format_option(&optstr, options, TRUE, pool); - svn_error_clear(svn_cmdline_fprintf(stdout, pool, " %s\n", optstr)); - ++options; - } - svn_error_clear(svn_cmdline_fprintf(stdout, pool, "\n")); - exit(0); -} - - -/* Version compatibility check */ -static svn_error_t * -check_lib_versions(void) -{ - static const svn_version_checklist_t checklist[] = - { - /* ### check FSFS version */ - { "svn_subr", svn_subr_version }, - { "svn_fs", svn_fs_version }, - { NULL, NULL } - }; - SVN_VERSION_DEFINE(my_version); - - return svn_error_trace(svn_ver_check_list(&my_version, checklist)); -} - - - -/** Cancellation stuff, ### copied from subversion/svn/main.c */ - -/* A flag to see if we've been cancelled by the client or not. */ -static volatile sig_atomic_t cancelled = FALSE; - -/* A signal handler to support cancellation. */ -static void -signal_handler(int signum) -{ - apr_signal(signum, SIG_IGN); - cancelled = TRUE; -} - -/* Our cancellation callback. */ -static svn_error_t * -svn_cl__check_cancel(void *baton) -{ - if (cancelled) - return svn_error_create(SVN_ERR_CANCELLED, NULL, _("Caught signal")); - else - return SVN_NO_ERROR; -} - -static svn_cancel_func_t cancel_func = svn_cl__check_cancel; - -static void set_up_cancellation(void) -{ - /* Set up our cancellation support. */ - apr_signal(SIGINT, signal_handler); -#ifdef SIGBREAK - /* SIGBREAK is a Win32 specific signal generated by ctrl-break. */ - apr_signal(SIGBREAK, signal_handler); -#endif -#ifdef SIGHUP - apr_signal(SIGHUP, signal_handler); -#endif -#ifdef SIGTERM - apr_signal(SIGTERM, signal_handler); -#endif - -#ifdef SIGPIPE - /* Disable SIGPIPE generation for the platforms that have it. */ - apr_signal(SIGPIPE, SIG_IGN); -#endif - -#ifdef SIGXFSZ - /* Disable SIGXFSZ generation for the platforms that have it, otherwise - * working with large files when compiled against an APR that doesn't have - * large file support will crash the program, which is uncool. */ - apr_signal(SIGXFSZ, SIG_IGN); -#endif -} - - -/** Program-specific code. **/ -enum { - OPT_VERSION = SVN_OPT_FIRST_LONGOPT_ID, - OPT_DATA, - OPT_PROP, - OPT_BOTH -}; - -static svn_error_t *check_experimental(void) -{ - if (getenv("SVN_REP_SHARING_STATS_IS_EXPERIMENTAL")) - return SVN_NO_ERROR; - - return svn_error_create(APR_EGENERAL, NULL, - "This code is experimental and should not " - "be used on live data."); -} - -/* The parts of a rep that determine whether it's being shared. */ -struct key_t -{ - svn_revnum_t revision; - apr_off_t offset; -}; - -/* What we need to know about a rep. */ -struct value_t -{ - svn_checksum_t *sha1_checksum; - apr_uint64_t refcount; -}; - -/* Increment records[rep] if both are non-NULL and REP contains a sha1. - * Allocate keys and values in RESULT_POOL. - */ -static svn_error_t *record(apr_hash_t *records, - representation_t *rep, - apr_pool_t *result_pool) -{ - struct key_t *key; - struct value_t *value; - - /* Skip if we ignore this particular kind of reps, or if the rep doesn't - * exist or doesn't have the checksum we are after. (The latter case - * often corresponds to node_rev->kind == svn_node_dir.) - */ - if (records == NULL || rep == NULL || rep->sha1_checksum == NULL) - return SVN_NO_ERROR; - - /* Construct the key. - * - * Must use calloc() because apr_hash_* pay attention to padding bytes too. - */ - key = apr_pcalloc(result_pool, sizeof(*key)); - key->revision = rep->revision; - key->offset = rep->offset; - - /* Update or create the value. */ - if ((value = apr_hash_get(records, key, sizeof(*key)))) - { - /* Paranoia. */ - SVN_ERR_ASSERT(value->sha1_checksum != NULL); - SVN_ERR_ASSERT(svn_checksum_match(value->sha1_checksum, - rep->sha1_checksum)); - /* Real work. */ - value->refcount++; - } - else - { - value = apr_palloc(result_pool, sizeof(*value)); - value->sha1_checksum = svn_checksum_dup(rep->sha1_checksum, result_pool); - value->refcount = 1; - } - - /* Store them. */ - apr_hash_set(records, key, sizeof(*key), value); - - return SVN_NO_ERROR; -} - -/* Inspect the data and/or prop reps of revision REVNUM in FS. Store - * reference count tallies in passed hashes (allocated in RESULT_POOL). - * - * If PROP_REPS or DATA_REPS is NULL, the respective kind of reps are not - * tallied. - * - * Print progress report to STDERR unless QUIET is true. - * - * Use SCRATCH_POOL for temporary allocations. - */ -static svn_error_t * -process_one_revision(svn_fs_t *fs, - svn_revnum_t revnum, - svn_boolean_t quiet, - apr_hash_t *prop_reps, - apr_hash_t *data_reps, - apr_hash_t *both_reps, - apr_pool_t *result_pool, - apr_pool_t *scratch_pool) -{ - svn_fs_root_t *rev_root; - apr_hash_t *paths_changed; - apr_hash_index_t *hi; - - if (! quiet) - SVN_ERR(svn_cmdline_fprintf(stderr, scratch_pool, - "processing r%ld\n", revnum)); - - /* Get the changed paths. */ - SVN_ERR(svn_fs_revision_root(&rev_root, fs, revnum, scratch_pool)); - SVN_ERR(svn_fs_paths_changed2(&paths_changed, rev_root, scratch_pool)); - - /* Iterate them. */ - /* ### use iterpool? */ - for (hi = apr_hash_first(scratch_pool, paths_changed); - hi; hi = apr_hash_next(hi)) - { - const char *path; - const svn_fs_path_change2_t *change; - const svn_fs_id_t *node_rev_id1, *node_rev_id2; - const svn_fs_id_t *the_id; - - node_revision_t *node_rev; - - path = svn__apr_hash_index_key(hi); - change = svn__apr_hash_index_val(hi); - if (! quiet) - SVN_ERR(svn_cmdline_fprintf(stderr, scratch_pool, - "processing r%ld:%s\n", revnum, path)); - - if (change->change_kind == svn_fs_path_change_delete) - /* Can't ask for reps of PATH at REVNUM if the path no longer exists - * at that revision! */ - continue; - - /* Okay, we have two node_rev id's for this change: the txn one and - * the revision one. We'll use the latter. */ - node_rev_id1 = change->node_rev_id; - SVN_ERR(svn_fs_node_id(&node_rev_id2, rev_root, path, scratch_pool)); - - SVN_ERR_ASSERT(svn_fs_fs__id_txn_id(node_rev_id1) != NULL); - SVN_ERR_ASSERT(svn_fs_fs__id_rev(node_rev_id2) != SVN_INVALID_REVNUM); - - the_id = node_rev_id2; - - /* Get the node_rev using the chosen node_rev_id. */ - SVN_ERR(svn_fs_fs__get_node_revision(&node_rev, fs, the_id, scratch_pool)); - - /* Maybe record the sha1's. */ - SVN_ERR(record(prop_reps, node_rev->prop_rep, result_pool)); - SVN_ERR(record(data_reps, node_rev->data_rep, result_pool)); - SVN_ERR(record(both_reps, node_rev->prop_rep, result_pool)); - SVN_ERR(record(both_reps, node_rev->data_rep, result_pool)); - } - - return SVN_NO_ERROR; -} - -/* Print REPS_REF_COUNT (a hash as for process_one_revision()) - * to stdout in "refcount => sha1" format. A sha1 may appear - * more than once if not all its instances are shared. Prepend - * each line by NAME. - * - * Use SCRATCH_POOL for temporary allocations. - */ -static svn_error_t * -pretty_print(const char *name, - apr_hash_t *reps_ref_counts, - apr_pool_t *scratch_pool) -{ - apr_hash_index_t *hi; - - if (reps_ref_counts == NULL) - return SVN_NO_ERROR; - - for (hi = apr_hash_first(scratch_pool, reps_ref_counts); - hi; hi = apr_hash_next(hi)) - { - struct value_t *value; - - SVN_ERR(cancel_func(NULL)); - - value = svn__apr_hash_index_val(hi); - SVN_ERR(svn_cmdline_printf(scratch_pool, "%s %" APR_UINT64_T_FMT " %s\n", - name, value->refcount, - svn_checksum_to_cstring_display( - value->sha1_checksum, - scratch_pool))); - } - - return SVN_NO_ERROR; -} - -/* Return an error unless FS is an fsfs fs. */ -static svn_error_t *is_fs_fsfs(svn_fs_t *fs, apr_pool_t *scratch_pool) -{ - const char *actual, *expected, *path; - - path = svn_fs_path(fs, scratch_pool); - - expected = SVN_FS_TYPE_FSFS; - SVN_ERR(svn_fs_type(&actual, path, scratch_pool)); - - if (strcmp(actual, expected) != 0) - return svn_error_createf(SVN_ERR_FS_UNKNOWN_FS_TYPE, NULL, - "Filesystem '%s' is not of type '%s'", - svn_dirent_local_style(path, scratch_pool), - actual); - - return SVN_NO_ERROR; -} - -/* The core logic. This function iterates the repository REPOS_PATH - * and sends all the (DATA and/or PROP) reps in each revision for counting - * by process_one_revision(). QUIET is passed to process_one_revision(). - */ -static svn_error_t *process(const char *repos_path, - svn_boolean_t prop, - svn_boolean_t data, - svn_boolean_t quiet, - apr_pool_t *scratch_pool) -{ - apr_hash_t *prop_reps = NULL; - apr_hash_t *data_reps = NULL; - apr_hash_t *both_reps = NULL; - svn_revnum_t rev, youngest; - apr_pool_t *iterpool; - svn_repos_t *repos; - svn_fs_t *fs; - - if (prop) - prop_reps = apr_hash_make(scratch_pool); - if (data) - data_reps = apr_hash_make(scratch_pool); - if (prop && data) - both_reps = apr_hash_make(scratch_pool); - - /* Open the FS. */ - SVN_ERR(svn_repos_open2(&repos, repos_path, NULL, scratch_pool)); - fs = svn_repos_fs(repos); - - SVN_ERR(is_fs_fsfs(fs, scratch_pool)); - - SVN_ERR(svn_fs_youngest_rev(&youngest, fs, scratch_pool)); - - /* Iterate the revisions. */ - iterpool = svn_pool_create(scratch_pool); - for (rev = 0; rev <= youngest; rev++) - { - svn_pool_clear(iterpool); - SVN_ERR(cancel_func(NULL)); - SVN_ERR(process_one_revision(fs, rev, quiet, - prop_reps, data_reps, both_reps, - scratch_pool, iterpool)); - } - svn_pool_destroy(iterpool); - - /* Print stats. */ - SVN_ERR(pretty_print("prop", prop_reps, scratch_pool)); - SVN_ERR(pretty_print("data", data_reps, scratch_pool)); - SVN_ERR(pretty_print("both", both_reps, scratch_pool)); - - return SVN_NO_ERROR; -} - -int -main(int argc, const char *argv[]) -{ - const char *repos_path; - apr_pool_t *pool; - svn_boolean_t prop = FALSE, data = FALSE; - svn_boolean_t quiet = FALSE; - svn_error_t *err; - apr_getopt_t *os; - const apr_getopt_option_t options[] = - { - {"data", OPT_DATA, 0, N_("display data reps stats")}, - {"prop", OPT_PROP, 0, N_("display prop reps stats")}, - {"both", OPT_BOTH, 0, N_("display combined (data+prop) reps stats")}, - {"quiet", 'q', 0, N_("no progress (only errors) to stderr")}, - {"help", 'h', 0, N_("display this help")}, - {"version", OPT_VERSION, 0, - N_("show program version information")}, - {0, 0, 0, 0} - }; - - /* Initialize the app. */ - if (svn_cmdline_init("svn-rep-sharing-stats", stderr) != EXIT_SUCCESS) - return EXIT_FAILURE; - - /* Create our top-level pool. Use a separate mutexless allocator, - * given this application is single threaded. - */ - pool = apr_allocator_owner_get(svn_pool_create_allocator(FALSE)); - - /* Check library versions */ - err = check_lib_versions(); - if (err) - return svn_cmdline_handle_exit_error(err, pool, "svn-rep-sharing-stats: "); - - err = svn_cmdline__getopt_init(&os, argc, argv, pool); - if (err) - return svn_cmdline_handle_exit_error(err, pool, "svn-rep-sharing-stats: "); - - SVN_INT_ERR(check_experimental()); - - os->interleave = 1; - while (1) - { - int opt; - const char *arg; - apr_status_t status = apr_getopt_long(os, options, &opt, &arg); - if (APR_STATUS_IS_EOF(status)) - break; - if (status != APR_SUCCESS) - { - usage(pool); - return EXIT_FAILURE; - } - switch (opt) - { - case OPT_DATA: - data = TRUE; - break; - /* It seems we don't actually rep-share props yet. */ - case OPT_PROP: - prop = TRUE; - break; - case OPT_BOTH: - data = TRUE; - prop = TRUE; - break; - case 'q': - quiet = TRUE; - break; - case 'h': - help(options, pool); - break; - case OPT_VERSION: - SVN_INT_ERR(version(pool)); - exit(0); - break; - default: - usage(pool); - return EXIT_FAILURE; - } - } - - /* Exactly 1 non-option argument, - * and at least one of "--data"/"--prop"/"--both". - */ - if (os->ind + 1 != argc || (!data && !prop)) - { - usage(pool); - return EXIT_FAILURE; - } - - /* Grab REPOS_PATH from argv. */ - SVN_INT_ERR(svn_utf_cstring_to_utf8(&repos_path, os->argv[os->ind], pool)); - repos_path = svn_dirent_internal_style(repos_path, pool); - - set_up_cancellation(); - - /* Do something. */ - SVN_INT_ERR(process(repos_path, prop, data, quiet, pool)); - - /* We're done. */ - - svn_pool_destroy(pool); - /* Flush stdout to make sure that the user will see any printing errors. */ - SVN_INT_ERR(svn_cmdline_fflush(stdout)); - - return EXIT_SUCCESS; -} diff --git a/tools/server-side/svnauthz.c b/tools/server-side/svnauthz.c index ab8c62d..3fadd23 100644 --- a/tools/server-side/svnauthz.c +++ b/tools/server-side/svnauthz.c @@ -234,7 +234,7 @@ get_authz_from_txn(svn_authz_t **authz, const char *repos_path, svn_error_t *err; /* Open up the repository and find the transaction root */ - SVN_ERR(svn_repos_open2(&repos, repos_path, NULL, pool)); + SVN_ERR(svn_repos_open3(&repos, repos_path, NULL, pool, pool)); fs = svn_repos_fs(repos); SVN_ERR(svn_fs_open_txn(&txn, fs, txn_name, pool)); SVN_ERR(svn_fs_txn_root(&root, txn, pool)); @@ -382,42 +382,6 @@ subcommand_accessof(apr_getopt_t *os, void *baton, apr_pool_t *pool) #undef EXIT_FAILURE #define EXIT_FAILURE 2 -/* Similar to svn_cmdline_handle_exit_error but with an exit_code argument - so we can comply with our contract and exit with 2 for internal failures. - Also is missing the pool argument since we don't need it given - main/sub_main. */ -static int -handle_exit_error(svn_error_t *err, const char *prefix, int exit_code) -{ - /* Issue #3014: - * Don't print anything on broken pipes. The pipe was likely - * closed by the process at the other end. We expect that - * process to perform error reporting as necessary. - * - * ### This assumes that there is only one error in a chain for - * ### SVN_ERR_IO_PIPE_WRITE_ERROR. See svn_cmdline_fputs(). */ - if (err->apr_err != SVN_ERR_IO_PIPE_WRITE_ERROR) - svn_handle_error2(err, stderr, FALSE, prefix); - svn_error_clear(err); - return exit_code; -} - -/* Report and clear the error ERR, and return EXIT_FAILURE. */ -#define EXIT_ERROR(err, exit_code) \ - handle_exit_error(err, "svnauthz: ", exit_code) - -/* A redefinition of the public SVN_INT_ERR macro, that suppresses the - * error message if it is SVN_ERR_IO_PIPE_WRITE_ERROR, amd with the - * program name 'svnauthz' instead of 'svn'. */ -#undef SVN_INT_ERR -#define SVN_INT_ERR(expr) \ - do { \ - svn_error_t *svn_err__temp = (expr); \ - if (svn_err__temp) \ - return EXIT_ERROR(svn_err__temp, EXIT_FAILURE); \ - } while (0) - - /* Return TRUE if the UI of 'svnauthz-validate' (svn 1.7 and earlier) should be emulated, given argv[0]. */ static svn_boolean_t @@ -485,8 +449,13 @@ canonicalize_access_file(const char **canonicalized_access_file, return SVN_NO_ERROR; } -static int -sub_main(int argc, const char *argv[], apr_pool_t *pool) +/* + * On success, leave *EXIT_CODE untouched and return SVN_NO_ERROR. On error, + * either return an error to be displayed, or set *EXIT_CODE to non-zero and + * return SVN_NO_ERROR. + */ +static svn_error_t * +sub_main(int *exit_code, int argc, const char *argv[], apr_pool_t *pool) { svn_error_t *err; @@ -497,7 +466,7 @@ sub_main(int argc, const char *argv[], apr_pool_t *pool) int i; /* Initialize the FS library. */ - SVN_INT_ERR(svn_fs_initialize(pool)); + SVN_ERR(svn_fs_initialize(pool)); received_opts = apr_array_make(pool, SVN_OPT_MAX_OPTIONS, sizeof(int)); @@ -506,7 +475,7 @@ sub_main(int argc, const char *argv[], apr_pool_t *pool) opt_state.txn = opt_state.repos_path = opt_state.groups_file = NULL; /* Parse options. */ - SVN_INT_ERR(svn_cmdline__getopt_init(&os, argc, argv, pool)); + SVN_ERR(svn_cmdline__getopt_init(&os, argc, argv, pool)); os->interleave = 1; if (!use_compat_mode(argv[0], pool)) @@ -521,8 +490,9 @@ sub_main(int argc, const char *argv[], apr_pool_t *pool) break; if (status != APR_SUCCESS) { - SVN_INT_ERR(subcommand_help(NULL, NULL, pool)); - return EXIT_FAILURE; + SVN_ERR(subcommand_help(NULL, NULL, pool)); + *exit_code = EXIT_FAILURE; + return SVN_NO_ERROR; } /* Stash the option code in an array before parsing it. */ @@ -535,7 +505,7 @@ sub_main(int argc, const char *argv[], apr_pool_t *pool) opt_state.help = TRUE; break; case 't': - SVN_INT_ERR(svn_utf_cstring_to_utf8(&opt_state.txn, arg, pool)); + SVN_ERR(svn_utf_cstring_to_utf8(&opt_state.txn, arg, pool)); break; case 'R': opt_state.recursive = TRUE; @@ -544,28 +514,29 @@ sub_main(int argc, const char *argv[], apr_pool_t *pool) opt_state.version = TRUE; break; case svnauthz__username: - SVN_INT_ERR(svn_utf_cstring_to_utf8(&opt_state.username, arg, pool)); + SVN_ERR(svn_utf_cstring_to_utf8(&opt_state.username, arg, pool)); break; case svnauthz__path: - SVN_INT_ERR(svn_utf_cstring_to_utf8(&opt_state.fspath, arg, pool)); + SVN_ERR(svn_utf_cstring_to_utf8(&opt_state.fspath, arg, pool)); opt_state.fspath = svn_fspath__canonicalize(opt_state.fspath, pool); break; case svnauthz__repos: - SVN_INT_ERR(svn_utf_cstring_to_utf8(&opt_state.repos_name, arg, pool)); + SVN_ERR(svn_utf_cstring_to_utf8(&opt_state.repos_name, arg, pool)); break; case svnauthz__is: - SVN_INT_ERR(svn_utf_cstring_to_utf8(&opt_state.is, arg, pool)); + SVN_ERR(svn_utf_cstring_to_utf8(&opt_state.is, arg, pool)); break; case svnauthz__groups_file: - SVN_INT_ERR( + SVN_ERR( svn_utf_cstring_to_utf8(&opt_state.groups_file, arg, pool)); break; default: { - SVN_INT_ERR(subcommand_help(NULL, NULL, pool)); - return EXIT_FAILURE; + SVN_ERR(subcommand_help(NULL, NULL, pool)); + *exit_code = EXIT_FAILURE; + return SVN_NO_ERROR; } } } @@ -603,8 +574,9 @@ sub_main(int argc, const char *argv[], apr_pool_t *pool) { svn_error_clear(svn_cmdline_fprintf(stderr, pool, ("subcommand argument required\n"))); - SVN_INT_ERR(subcommand_help(NULL, NULL, pool)); - return EXIT_FAILURE; + SVN_ERR(subcommand_help(NULL, NULL, pool)); + *exit_code = EXIT_FAILURE; + return SVN_NO_ERROR; } } else @@ -616,14 +588,15 @@ sub_main(int argc, const char *argv[], apr_pool_t *pool) const char *first_arg_utf8; os->ind++; - SVN_INT_ERR(svn_utf_cstring_to_utf8(&first_arg_utf8, + SVN_ERR(svn_utf_cstring_to_utf8(&first_arg_utf8, first_arg, pool)); svn_error_clear( svn_cmdline_fprintf(stderr, pool, ("Unknown subcommand: '%s'\n"), first_arg_utf8)); - SVN_INT_ERR(subcommand_help(NULL, NULL, pool)); - return EXIT_FAILURE; + SVN_ERR(subcommand_help(NULL, NULL, pool)); + *exit_code = EXIT_FAILURE; + return SVN_NO_ERROR; } } } @@ -637,13 +610,12 @@ sub_main(int argc, const char *argv[], apr_pool_t *pool) { if (os->ind +2 != argc) { - err = svn_error_create(SVN_ERR_CL_ARG_PARSING_ERROR, NULL, - ("Repository and authz file arguments " - "required")); - return EXIT_ERROR(err, EXIT_FAILURE); + return svn_error_create(SVN_ERR_CL_ARG_PARSING_ERROR, NULL, + ("Repository and authz file arguments " + "required")); } - SVN_INT_ERR(svn_utf_cstring_to_utf8(&opt_state.repos_path, os->argv[os->ind], + SVN_ERR(svn_utf_cstring_to_utf8(&opt_state.repos_path, os->argv[os->ind], pool)); os->ind++; @@ -653,24 +625,23 @@ sub_main(int argc, const char *argv[], apr_pool_t *pool) /* Exactly 1 non-option argument */ if (os->ind + 1 != argc) { - err = svn_error_create(SVN_ERR_CL_ARG_PARSING_ERROR, NULL, - ("Authz file argument required")); - return EXIT_ERROR(err, EXIT_FAILURE); + return svn_error_create(SVN_ERR_CL_ARG_PARSING_ERROR, NULL, + ("Authz file argument required")); } /* Grab AUTHZ_FILE from argv. */ - SVN_INT_ERR(svn_utf_cstring_to_utf8(&opt_state.authz_file, os->argv[os->ind], + SVN_ERR(svn_utf_cstring_to_utf8(&opt_state.authz_file, os->argv[os->ind], pool)); /* Canonicalize opt_state.authz_file appropriately. */ - SVN_INT_ERR(canonicalize_access_file(&opt_state.authz_file, + SVN_ERR(canonicalize_access_file(&opt_state.authz_file, opt_state.authz_file, opt_state.txn != NULL, pool)); /* Same for opt_state.groups_file if it is present. */ if (opt_state.groups_file) { - SVN_INT_ERR(canonicalize_access_file(&opt_state.groups_file, + SVN_ERR(canonicalize_access_file(&opt_state.groups_file, opt_state.groups_file, opt_state.txn != NULL, pool)); } @@ -696,13 +667,14 @@ sub_main(int argc, const char *argv[], apr_pool_t *pool) pool); svn_opt_format_option(&optstr, badopt, FALSE, pool); if (subcommand->name[0] == '-') - SVN_INT_ERR(subcommand_help(NULL, NULL, pool)); + SVN_ERR(subcommand_help(NULL, NULL, pool)); else svn_error_clear(svn_cmdline_fprintf(stderr, pool, ("Subcommand '%s' doesn't accept option '%s'\n" "Type 'svnauthz help %s' for usage.\n"), subcommand->name, optstr, subcommand->name)); - return EXIT_FAILURE; + *exit_code = EXIT_FAILURE; + return SVN_NO_ERROR; } } @@ -724,7 +696,8 @@ sub_main(int argc, const char *argv[], apr_pool_t *pool) { /* Follow our contract that says we exit with 1 if the file does not validate. */ - return EXIT_ERROR(err, 1); + *exit_code = 1; + return err; } else if (err->apr_err == SVN_ERR_AUTHZ_UNREADABLE || err->apr_err == SVN_ERR_AUTHZ_UNWRITABLE @@ -732,31 +705,22 @@ sub_main(int argc, const char *argv[], apr_pool_t *pool) { /* Follow our contract that says we exit with 3 if --is does not * match. */ - return EXIT_ERROR(err, 3); + *exit_code = 3; + return err; } - - return EXIT_ERROR(err, EXIT_FAILURE); - } - else - { - /* Ensure that everything is written to stdout, so the user will - see any print errors. */ - err = svn_cmdline_fflush(stdout); - if (err) - { - return EXIT_ERROR(err, EXIT_FAILURE); - } - return EXIT_SUCCESS; + return err; } + return SVN_NO_ERROR; } int main(int argc, const char *argv[]) { apr_pool_t *pool; - int exit_code; + int exit_code = EXIT_SUCCESS; + svn_error_t *err; /* Initialize the app. Send all error messages to 'stderr'. */ if (svn_cmdline_init(argv[0], stderr) != EXIT_SUCCESS) @@ -764,7 +728,18 @@ main(int argc, const char *argv[]) pool = svn_pool_create(NULL); - exit_code = sub_main(argc, argv, pool); + err = sub_main(&exit_code, argc, argv, pool); + + /* Flush stdout and report if it fails. It would be flushed on exit anyway + but this makes sure that output is not silently lost if it fails. */ + err = svn_error_compose_create(err, svn_cmdline_fflush(stdout)); + + if (err) + { + if (exit_code == 0) + exit_code = EXIT_FAILURE; + svn_cmdline_handle_exit_error(err, NULL, "svnauthz: "); + } svn_pool_destroy(pool); return exit_code; diff --git a/tools/server-side/svnpredumpfilter.py b/tools/server-side/svnpredumpfilter.py index 5a74755..f6a97c2 100755 --- a/tools/server-side/svnpredumpfilter.py +++ b/tools/server-side/svnpredumpfilter.py @@ -38,6 +38,10 @@ Use the default ordering of revisions (that is, '-r HEAD:0'). Return errorcode 0 if there are no additional dependencies found, 1 if there were; any other errorcode indicates a fatal error. +Paths in mergeinfo are not considered as additional dependencies so the +--skip-missing-merge-sources option of 'svndumpfilter' may be required +for successful filtering with the resulting path list. + Options: --help (-h) Show this usage message and exit. @@ -68,7 +72,7 @@ def sanitize_path(path): def subsumes(path, maybe_child): if path == maybe_child: return True - if maybe_child.find(path + '/') == 0: + if maybe_child.startswith(path + '/'): return True return False @@ -117,20 +121,35 @@ def log(msg, min_verbosity): class DependencyTracker: def __init__(self, include_paths): - self.include_paths = include_paths[:] - self.dependent_paths = [] + self.include_paths = set(include_paths) + self.dependent_paths = set() def path_included(self, path): - for include_path in self.include_paths + self.dependent_paths: + for include_path in self.include_paths | self.dependent_paths: if subsumes(include_path, path): return True return False - def handle_changes(self, path_copies): - for path, copyfrom_path in path_copies.items(): - if self.path_included(path) and copyfrom_path: - if not self.path_included(copyfrom_path): - self.dependent_paths.append(copyfrom_path) + def include_missing_copies(self, path_copies): + while True: + log("Cross-checking %d included paths with %d copies " + "for missing path dependencies..." % ( + len(self.include_paths) + len(self.dependent_paths), + len(path_copies)), + 1) + included_copies = [] + for path, copyfrom_path in path_copies: + if self.path_included(path): + log("Adding copy '%s' -> '%s'" % (copyfrom_path, path), 1) + self.dependent_paths.add(copyfrom_path) + included_copies.append((path, copyfrom_path)) + if not included_copies: + log("Found all missing path dependencies", 1) + break + for path, copyfrom_path in included_copies: + path_copies.remove((path, copyfrom_path)) + log("Found %d new copy dependencies, need to re-check for more" + % len(included_copies), 1) def readline(stream): line = stream.readline() @@ -151,7 +170,7 @@ def svn_log_stream_get_dependencies(stream, included_paths): line_buf = None last_revision = 0 eof = False - path_copies = {} + path_copies = set() found_changed_path = False while not eof: @@ -195,16 +214,15 @@ def svn_log_stream_get_dependencies(stream, included_paths): except EOFError: eof = True break - match = action_re.search(line) + match = copy_action_re.search(line) if match: found_changed_path = True - match = copy_action_re.search(line) - if match: - path_copies[sanitize_path(match.group(1))] = \ - sanitize_path(match.group(2)) + path_copies.add((sanitize_path(match.group(1)), + sanitize_path(match.group(2)))) + elif action_re.search(line): + found_changed_path = True else: break - dt.handle_changes(path_copies) # Finally, skip any log message lines. (If there are none, # remember the last line we read, because it probably has @@ -221,6 +239,7 @@ def svn_log_stream_get_dependencies(stream, included_paths): "'svn log' with the --verbose (-v) option when " "generating the input to this script?") + dt.include_missing_copies(path_copies) return dt def analyze_logs(included_paths): diff --git a/tools/server-side/svnpubsub/commit-hook.py b/tools/server-side/svnpubsub/commit-hook.py index 4a1a3f3..4e6a1cc 100755 --- a/tools/server-side/svnpubsub/commit-hook.py +++ b/tools/server-side/svnpubsub/commit-hook.py @@ -23,7 +23,6 @@ HOST="127.0.0.1" PORT=2069 import sys -import subprocess try: import simplejson as json except ImportError: @@ -31,32 +30,32 @@ except ImportError: import urllib2 -def svncmd(cmd): - return subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) +import svnpubsub.util -def svncmd_uuid(repo): - cmd = "%s uuid %s" % (SVNLOOK, repo) - p = svncmd(cmd) - return p.stdout.read().strip() +def svnlook(cmd, **kwargs): + args = [SVNLOOK] + cmd + return svnpubsub.util.check_output(args, **kwargs) -def svncmd_info(repo, revision): - cmd = "%s info -r %s %s" % (SVNLOOK, revision, repo) - p = svncmd(cmd) - data = p.stdout.read().split("\n") +def svnlook_uuid(repo): + cmd = ["uuid", "--", repo] + return svnlook(cmd).strip() + +def svnlook_info(repo, revision): + cmd = ["info", "-r", revision, "--", repo] + data = svnlook(cmd, universal_newlines=True).split("\n") #print data return {'author': data[0].strip(), 'date': data[1].strip(), 'log': "\n".join(data[3:]).strip()} -def svncmd_changed(repo, revision): - cmd = "%s changed -r %s %s" % (SVNLOOK, revision, repo) - p = svncmd(cmd) +def svnlook_changed(repo, revision): + cmd = ["changed", "-r", revision, "--", repo] + lines = svnlook(cmd, universal_newlines=True).split("\n") changed = {} - while True: - line = p.stdout.readline() - if not line: - break + for line in lines: line = line.strip() + if not line: + continue (flags, filename) = (line[0:3], line[4:]) changed[filename] = {'flags': flags} return changed @@ -71,23 +70,23 @@ def do_put(body): def main(repo, revision): revision = revision.lstrip('r') - i = svncmd_info(repo, revision) + i = svnlook_info(repo, revision) data = {'type': 'svn', 'format': 1, 'id': int(revision), 'changed': {}, - 'repository': svncmd_uuid(repo), + 'repository': svnlook_uuid(repo), 'committer': i['author'], 'log': i['log'], 'date': i['date'], } - data['changed'].update(svncmd_changed(repo, revision)) + data['changed'].update(svnlook_changed(repo, revision)) body = json.dumps(data) do_put(body) if __name__ == "__main__": if len(sys.argv) not in (3, 4): sys.stderr.write("invalid args\n") - sys.exit(0) + sys.exit(1) main(*sys.argv[1:3]) diff --git a/tools/server-side/svnpubsub/daemonize.py b/tools/server-side/svnpubsub/daemonize.py index 8b85258..41b1bec 100644 --- a/tools/server-side/svnpubsub/daemonize.py +++ b/tools/server-side/svnpubsub/daemonize.py @@ -24,6 +24,7 @@ import os import signal import sys import time +import multiprocessing # requires Python 2.6 # possible return values from Daemon.daemonize() @@ -50,11 +51,11 @@ class Daemon(object): def daemonize_exit(self): try: result = self.daemonize() - except (ChildFailed, DaemonFailed) as e: + except (ChildFailed, DaemonFailed), e: # duplicate the exit code sys.exit(e.code) except (ChildTerminatedAbnormally, ChildForkFailed, - DaemonTerminatedAbnormally, DaemonForkFailed) as e: + DaemonTerminatedAbnormally, DaemonForkFailed), e: sys.stderr.write('ERROR: %s\n' % e) sys.exit(1) except ChildResumedIncorrectly: @@ -71,29 +72,41 @@ class Daemon(object): # in original process. daemon is up and running. we're done. def daemonize(self): - # fork off a child that can detach itself from this process. - try: - pid = os.fork() - except OSError as e: - raise ChildForkFailed(e.errno, e.strerror) - - if pid > 0: - # we're in the parent. let's wait for the child to finish setting - # things up -- on our exit, we want to ensure the child is accepting - # connections. - cpid, status = os.waitpid(pid, 0) - assert pid == cpid - if os.WIFEXITED(status): - code = os.WEXITSTATUS(status) - if code: - raise ChildFailed(code) - return DAEMON_RUNNING - - # the child did not exit cleanly. - raise ChildTerminatedAbnormally(status) - + ### review error situations. map to backwards compat. ?? + ### be mindful of daemonize_exit(). + ### we should try and raise ChildFailed / ChildTerminatedAbnormally. + ### ref: older revisions. OR: remove exceptions. + + child_is_ready = multiprocessing.Event() + child_completed = multiprocessing.Event() + + p = multiprocessing.Process(target=self._first_child, + args=(child_is_ready, child_completed)) + p.start() + + # Wait for the child to finish setting things up (in case we need + # to communicate with it). It will only exit when ready. + ### use a timeout here! (parameterized, of course) + p.join() + + ### need to propagate errors, to adjust the return codes + if child_completed.is_set(): + ### what was the exit status? + return DAEMON_COMPLETE + if child_is_ready.is_set(): + return DAEMON_RUNNING + + ### how did we get here?! the immediate child should not exit without + ### signalling ready/complete. some kind of error. + return DAEMON_STARTED + + def _first_child(self, child_is_ready, child_completed): # we're in the child. + ### NOTE: the original design was a bit bunk. Exceptions raised from + ### this point are within the child processes. We need to signal the + ### errors to the parent in other ways. + # decouple from the parent process os.chdir('/') os.umask(0) @@ -102,63 +115,86 @@ class Daemon(object): # remember this pid so the second child can signal it. thispid = os.getpid() - # register a signal handler so the SIGUSR1 doesn't stop the process. - # this object will also record whether if got signalled. - daemon_accepting = SignalCatcher(signal.SIGUSR1) - - # if the daemon process exits before sending SIGUSR1, then we need to see - # the problem. trap SIGCHLD with a SignalCatcher. + # if the daemon process exits before signalling readiness, then we + # need to see the problem. trap SIGCHLD with a SignalCatcher. daemon_exit = SignalCatcher(signal.SIGCHLD) # perform the second fork try: pid = os.fork() - except OSError as e: + except OSError, e: + ### this won't make it to the parent process raise DaemonForkFailed(e.errno, e.strerror) if pid > 0: # in the parent. - # we want to wait for the daemon to signal that it has created and - # bound the socket, and is (thus) ready for connections. if the - # daemon improperly exits before serving, we'll see SIGCHLD and the - # .pause will return. - ### we should add a timeout to this. allow an optional parameter to - ### specify the timeout, in case it takes a long time to start up. - signal.pause() + + # Wait for the child to be ready for operation. + while True: + # The readiness event will invariably be signalled early/first. + # If it *doesn't* get signalled because the child has prematurely + # exited, then we will pause 10ms before noticing the exit. The + # pause is acceptable since that is aberrant/unexpected behavior. + ### is there a way to break this wait() on a signal such as SIGCHLD? + ### parameterize this wait, in case the app knows children may + ### fail quickly? + if child_is_ready.wait(timeout=0.010): + # The child signalled readiness. Yay! + break + if daemon_exit.signalled: + # Whoops. The child exited without signalling :-( + break + # Python 2.6 compat: .wait() may exit when set, but return None + if child_is_ready.is_set(): + break + # A simple timeout. The child is taking a while to prepare. Go + # back and wait for readiness. if daemon_exit.signalled: + # Tell the parent that the child has exited. + ### we need to communicate the exit status, if possible. + child_completed.set() + # reap the daemon process, getting its exit code. bubble it up. cpid, status = os.waitpid(pid, 0) assert pid == cpid if os.WIFEXITED(status): code = os.WEXITSTATUS(status) if code: + ### this won't make it to the parent process raise DaemonFailed(code) + ### this return value is ignored return DAEMON_NOT_RUNNING # the daemon did not exit cleanly. + ### this won't make it to the parent process raise DaemonTerminatedAbnormally(status) - if daemon_accepting.signalled: - # the daemon is up and running, so save the pid and return success. - if self.pidfile: - # Be wary of symlink attacks - try: - os.remove(self.pidfile) - except OSError: - pass - fd = os.open(self.pidfile, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0444) - os.write(fd, '%d\n' % pid) - os.close(fd) - return DAEMON_STARTED - + # child_is_ready got asserted. the daemon is up and running, so + # save the pid and return success. + if self.pidfile: + # Be wary of symlink attacks + try: + os.remove(self.pidfile) + except OSError: + pass + fd = os.open(self.pidfile, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0444) + os.write(fd, '%d\n' % pid) + os.close(fd) + + ### this return value is ignored + return DAEMON_STARTED + + ### old code. what to do with this? throw ChildResumedIncorrectly + ### or just toss this and the exception. # some other signal popped us out of the pause. the daemon might not # be running. + ### this won't make it to the parent process raise ChildResumedIncorrectly() - # we're a deamon now. get rid of the final remnants of the parent. - # start by restoring default signal handlers + # we're a daemon now. get rid of the final remnants of the parent: + # restore the signal handlers and switch std* to the proper files. signal.signal(signal.SIGUSR1, signal.SIG_DFL) signal.signal(signal.SIGCHLD, signal.SIG_DFL) sys.stdout.flush() @@ -176,30 +212,31 @@ class Daemon(object): so.close() se.close() - # TEST: don't release the parent immediately. the whole parent stack - # should pause along with this sleep. + ### TEST: don't release the parent immediately. the whole parent stack + ### should pause along with this sleep. #time.sleep(10) # everything is set up. call the initialization function. self.setup() - # sleep for one second before signalling. we want to make sure the - # parent has called signal.pause() - ### we should think of a better wait around the race condition. - time.sleep(1) + ### TEST: exit before signalling. + #sys.exit(0) + #sys.exit(1) - # okay. the daemon is ready. signal the parent to tell it we're set. - os.kill(thispid, signal.SIGUSR1) + # the child is now ready for parent/anyone to communicate with it. + child_is_ready.set() # start the daemon now. self.run() # The daemon is shutting down, so toss the pidfile. - try: - os.remove(self.pidfile) - except OSError: - pass + if self.pidfile: + try: + os.remove(self.pidfile) + except OSError: + pass + ### this return value is ignored return DAEMON_COMPLETE def setup(self): @@ -209,6 +246,34 @@ class Daemon(object): raise NotImplementedError +class _Detacher(Daemon): + def __init__(self, target, logfile='/dev/null', pidfile=None, + args=(), kwargs={}): + Daemon.__init__(self, logfile, pidfile) + self.target = target + self.args = args + self.kwargs = kwargs + + def setup(self): + pass + + def run(self): + self.target(*self.args, **self.kwargs) + + +def run_detached(target, *args, **kwargs): + """Simple function to run TARGET as a detached daemon. + + The additional arguments/keywords will be passed along. This function + does not return -- sys.exit() will be called as appropriate. + + (capture SystemExit if logging/reporting is necessary) + ### if needed, a variant of this func could be written to not exit + """ + d = _Detacher(target, args=args, kwargs=kwargs) + d.daemonize_exit() + + class SignalCatcher(object): def __init__(self, signum): self.signalled = False diff --git a/tools/server-side/svnpubsub/rc.d/svnpubsub.freebsd b/tools/server-side/svnpubsub/rc.d/svnpubsub.freebsd index 71fc8c8..79b5901 100755 --- a/tools/server-side/svnpubsub/rc.d/svnpubsub.freebsd +++ b/tools/server-side/svnpubsub/rc.d/svnpubsub.freebsd @@ -26,7 +26,7 @@ pidfile="${svnpubsub_pidfile}" export PYTHON_EGG_CACHE="/home/svn/.python-eggs" command="/usr/local/bin/twistd" -command_interpreter="/usr/local/bin/${svnwcsub_cmd_int}" +command_interpreter="/usr/local/bin/${svnpubsub_cmd_int}" command_args="-y /usr/local/svnpubsub/svnpubsub.tac \ --logfile=/var/log/vc/svnpubsub.log \ --pidfile=${pidfile} \ diff --git a/tools/server-side/svnpubsub/revprop-change-hook.py b/tools/server-side/svnpubsub/revprop-change-hook.py new file mode 100755 index 0000000..3aa857b --- /dev/null +++ b/tools/server-side/svnpubsub/revprop-change-hook.py @@ -0,0 +1,90 @@ +#!/usr/local/bin/python +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +SVNLOOK="/usr/local/svn-install/current/bin/svnlook" +#SVNLOOK="/usr/local/bin/svnlook" + +HOST="127.0.0.1" +PORT=2069 + +import sys +try: + import simplejson as json +except ImportError: + import json + +import urllib2 + + +import svnpubsub.util + +def svnlook(cmd, **kwargs): + args = [SVNLOOK] + cmd + return svnpubsub.util.check_output(args, **kwargs) + +def svnlook_uuid(repo): + cmd = ["uuid", "--", repo] + return svnlook(cmd).strip() + +def svnlook_revprop(repo, revision, propname): + cmd = ["propget", "-r", revision, "--revprop", "--", repo, propname] + data = svnlook(cmd) + #print data + return data + +def do_put(body): + opener = urllib2.build_opener(urllib2.HTTPHandler) + request = urllib2.Request("http://%s:%d/metadata" %(HOST, PORT), data=body) + request.add_header('Content-Type', 'application/json') + request.get_method = lambda: 'PUT' + url = opener.open(request) + + +def main(repo, revision, author, propname, action): + revision = revision.lstrip('r') + if action in ('A', 'M'): + new_value = svnlook_revprop(repo, revision, propname) + elif action == 'D': + new_value = None + else: + sys.stderr.write('Unknown revprop change action "%s"\n' % action) + sys.exit(1) + if action in ('D', 'M'): + old_value = sys.stdin.read() + else: + old_value = None + data = {'type': 'svn', + 'format': 1, + 'id': int(revision), + 'repository': svnlook_uuid(repo), + 'revprop': { + 'name': propname, + 'committer': author, + 'value': new_value, + 'old_value': old_value, + } + } + body = json.dumps(data) + do_put(body) + +if __name__ == "__main__": + if len(sys.argv) != 6: + sys.stderr.write("invalid args\n") + sys.exit(1) + + main(*sys.argv[1:6]) diff --git a/tools/server-side/svnpubsub/svnpubsub/client.py b/tools/server-side/svnpubsub/svnpubsub/client.py index c1631d6..871a5e9 100644 --- a/tools/server-side/svnpubsub/svnpubsub/client.py +++ b/tools/server-side/svnpubsub/svnpubsub/client.py @@ -62,7 +62,8 @@ class SvnpubsubClientException(Exception): class Client(asynchat.async_chat): - def __init__(self, url, commit_callback, event_callback): + def __init__(self, url, commit_callback, event_callback, + metadata_callback = None): asynchat.async_chat.__init__(self) self.last_activity = time.time() @@ -82,7 +83,8 @@ class Client(asynchat.async_chat): self.event_callback = event_callback - self.parser = JSONRecordHandler(commit_callback, event_callback) + self.parser = JSONRecordHandler(commit_callback, event_callback, + metadata_callback) # Wait for the end of headers. Then we start parsing JSON. self.set_terminator(b'\r\n\r\n') @@ -126,36 +128,50 @@ class Client(asynchat.async_chat): self.ibuffer.append(data) +class Notification(object): + def __init__(self, data): + self.__dict__.update(data) + +class Commit(Notification): + KIND = 'COMMIT' + +class Metadata(Notification): + KIND = 'METADATA' + + class JSONRecordHandler: - def __init__(self, commit_callback, event_callback): + def __init__(self, commit_callback, event_callback, metadata_callback): self.commit_callback = commit_callback self.event_callback = event_callback + self.metadata_callback = metadata_callback + + EXPECTED_VERSION = 1 def feed(self, record): obj = json.loads(record) if 'svnpubsub' in obj: actual_version = obj['svnpubsub'].get('version') - EXPECTED_VERSION = 1 - if actual_version != EXPECTED_VERSION: - raise SvnpubsubClientException("Unknown svnpubsub format: %r != %d" - % (actual_format, expected_format)) + if actual_version != self.EXPECTED_VERSION: + raise SvnpubsubClientException( + "Unknown svnpubsub format: %r != %d" + % (actual_version, self.EXPECTED_VERSION)) self.event_callback('version', obj['svnpubsub']['version']) elif 'commit' in obj: commit = Commit(obj['commit']) self.commit_callback(commit) elif 'stillalive' in obj: self.event_callback('ping', obj['stillalive']) - - -class Commit(object): - def __init__(self, commit): - self.__dict__.update(commit) + elif 'metadata' in obj and self.metadata_callback: + metadata = Metadata(obj['metadata']) + self.metadata_callback(metadata) class MultiClient(object): - def __init__(self, urls, commit_callback, event_callback): + def __init__(self, urls, commit_callback, event_callback, + metadata_callback = None): self.commit_callback = commit_callback self.event_callback = event_callback + self.metadata_callback = metadata_callback # No target time, as no work to do self.target_time = 0 @@ -185,9 +201,15 @@ class MultiClient(object): def _add_channel(self, url): # Simply instantiating the client will install it into the global map # for processing in the main event loop. - Client(url, - functools.partial(self.commit_callback, url), - functools.partial(self._reconnect, url)) + if self.metadata_callback: + Client(url, + functools.partial(self.commit_callback, url), + functools.partial(self._reconnect, url), + functools.partial(self.metadata_callback, url)) + else: + Client(url, + functools.partial(self.commit_callback, url), + functools.partial(self._reconnect, url)) def _check_stale(self): now = time.time() diff --git a/tools/server-side/svnpubsub/svnpubsub/server.py b/tools/server-side/svnpubsub/svnpubsub/server.py index faee423..d0cdff9 100644 --- a/tools/server-side/svnpubsub/svnpubsub/server.py +++ b/tools/server-side/svnpubsub/svnpubsub/server.py @@ -25,20 +25,27 @@ # Instead of using a complicated XMPP/AMPQ/JMS/super messaging service, # we have simple HTTP GETs and PUTs to get data in and out. # -# Currently supports both XML and JSON serialization. +# Currently supports JSON serialization. # # Example Sub clients: -# curl -sN http://127.0.0.1:2069/commits -# curl -sN http://127.0.0.1:2069/commits/svn/* -# curl -sN http://127.0.0.1:2069/commits/svn -# curl -sN http://127.0.0.1:2069/commits/*/13f79535-47bb-0310-9956-ffa450edef68 -# curl -sN http://127.0.0.1:2069/commits/svn/13f79535-47bb-0310-9956-ffa450edef68 +# curl -sN http://127.0.0.1:2069/commits +# curl -sN 'http://127.0.0.1:2069/commits/svn/*' +# curl -sN http://127.0.0.1:2069/commits/svn +# curl -sN 'http://127.0.0.1:2069/commits/*/13f79535-47bb-0310-9956-ffa450edef68' +# curl -sN http://127.0.0.1:2069/commits/svn/13f79535-47bb-0310-9956-ffa450edef68 # -# URL is built into 2 parts: -# /commits/${optional_type}/${optional_repository} +# curl -sN http://127.0.0.1:2069/metadata +# curl -sN 'http://127.0.0.1:2069/metadata/svn/*' +# curl -sN http://127.0.0.1:2069/metadata/svn +# curl -sN 'http://127.0.0.1:2069/metadata/*/13f79535-47bb-0310-9956-ffa450edef68' +# curl -sN http://127.0.0.1:2069/metadata/svn/13f79535-47bb-0310-9956-ffa450edef68 # -# If the type is included in the URL, you will only get commits of that type. -# The type can be * and then you will receive commits of any type. +# URLs are constructed from 3 parts: +# /${notification}/${optional_type}/${optional_repository} +# +# Notifications can be sent for commits or metadata (e.g., revprop) changes. +# If the type is included in the URL, you will only get notifications of that type. +# The type can be * and then you will receive notifications of any type. # # If the repository is included in the URL, you will only receive # messages about that repository. The repository can be * and then you @@ -71,7 +78,7 @@ from twisted.python import log import time -class Commit: +class Notification(object): def __init__(self, r): self.__dict__.update(r) if not self.check_value('repository'): @@ -86,7 +93,16 @@ class Commit: def check_value(self, k): return hasattr(self, k) and self.__dict__[k] - def render_commit(self): + def render(self): + raise NotImplementedError + + def render_log(self): + raise NotImplementedError + +class Commit(Notification): + KIND = 'COMMIT' + + def render(self): obj = {'commit': {}} obj['commit'].update(self.__dict__) return json.dumps(obj) @@ -96,20 +112,32 @@ class Commit: paths_changed = " %d paths changed" % len(self.changed) except: paths_changed = "" - return "%s:%s repo '%s' id '%s'%s" % (self.type, - self.format, - self.repository, - self.id, - paths_changed) + return "commit %s:%s repo '%s' id '%s'%s" % ( + self.type, self.format, self.repository, self.id, + paths_changed) + +class Metadata(Notification): + KIND = 'METADATA' + + def render(self): + obj = {'metadata': {}} + obj['metadata'].update(self.__dict__) + return json.dumps(obj) + + def render_log(self): + return "metadata %s:%s repo '%s' id '%s' revprop '%s'" % ( + self.type, self.format, self.repository, self.id, + self.revprop['name']) HEARTBEAT_TIME = 15 class Client(object): - def __init__(self, pubsub, r, type, repository): + def __init__(self, pubsub, r, kind, type, repository): self.pubsub = pubsub r.notifyFinish().addErrback(self.finished) self.r = r + self.kind = kind self.type = type self.repository = repository self.alive = True @@ -123,11 +151,14 @@ class Client(object): except ValueError: pass - def interested_in(self, commit): - if self.type and self.type != commit.type: + def interested_in(self, notification): + if self.kind != notification.KIND: + return False + + if self.type and self.type != notification.type: return False - if self.repository and self.repository != commit.repository: + if self.repository and self.repository != notification.repository: return False return True @@ -164,6 +195,13 @@ class SvnPubSub(resource.Resource): isLeaf = True clients = [] + __notification_uri_map = {'commits': Commit.KIND, + 'metadata': Metadata.KIND} + + def __init__(self, notification_class): + resource.Resource.__init__(self) + self.__notification_class = notification_class + def cc(self): return len(self.clients) @@ -183,6 +221,11 @@ class SvnPubSub(resource.Resource): request.setResponseCode(400) return "Invalid path\n" + kind = self.__notification_uri_map.get(uri[1], None) + if kind is None: + request.setResponseCode(400) + return "Invalid path\n" + if uri_len >= 3: type = uri[2] @@ -195,17 +238,18 @@ class SvnPubSub(resource.Resource): if repository == '*': repository = None - c = Client(self, request, type, repository) + c = Client(self, request, kind, type, repository) self.clients.append(c) c.start() return twisted.web.server.NOT_DONE_YET - def notifyAll(self, commit): - data = commit.render_commit() + def notifyAll(self, notification): + data = notification.render() - log.msg("COMMIT: %s (%d clients)" % (commit.render_log(), self.cc())) + log.msg("%s: %s (%d clients)" + % (notification.KIND, notification.render_log(), self.cc())) for client in self.clients: - if client.interested_in(commit): + if client.interested_in(notification): client.write_data(data) def render_PUT(self, request): @@ -218,19 +262,23 @@ class SvnPubSub(resource.Resource): #import pdb;pdb.set_trace() #print "input: %s" % (input) try: - c = json.loads(input) - commit = Commit(c) + data = json.loads(input) + notification = self.__notification_class(data) except ValueError as e: request.setResponseCode(400) - log.msg("COMMIT: failed due to: %s" % str(e)) - return str(e) - self.notifyAll(commit) + errstr = str(e) + log.msg("%s: failed due to: %s" % (notification.KIND, errstr)) + return errstr + self.notifyAll(notification) return "Ok" + def svnpubsub_server(): root = resource.Resource() - s = SvnPubSub() - root.putChild("commits", s) + c = SvnPubSub(Commit) + m = SvnPubSub(Metadata) + root.putChild('commits', c) + root.putChild('metadata', m) return server.Site(root) if __name__ == "__main__": diff --git a/tools/server-side/svnpubsub/svnpubsub/util.py b/tools/server-side/svnpubsub/svnpubsub/util.py new file mode 100644 index 0000000..e254f8b --- /dev/null +++ b/tools/server-side/svnpubsub/svnpubsub/util.py @@ -0,0 +1,36 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import subprocess as __subprocess + +# check_output() is only available in Python 2.7. Allow us to run with +# earlier versions +try: + __check_output = __subprocess.check_output + def check_output(args, env=None, universal_newlines=False): + return __check_output(args, shell=False, env=env, + universal_newlines=universal_newlines) +except AttributeError: + def check_output(args, env=None, universal_newlines=False): + # note: we only use these three args + pipe = __subprocess.Popen(args, shell=False, env=env, + stdout=__subprocess.PIPE, + universal_newlines=universal_newlines) + output, _ = pipe.communicate() + if pipe.returncode: + raise subprocess.CalledProcessError(pipe.returncode, args) + return output diff --git a/tools/server-side/svnpubsub/svnwcsub.py b/tools/server-side/svnpubsub/svnwcsub.py index 366df7c..8105d87 100755 --- a/tools/server-side/svnpubsub/svnwcsub.py +++ b/tools/server-side/svnpubsub/svnwcsub.py @@ -69,27 +69,22 @@ except ImportError: import daemonize import svnpubsub.client - -# check_output() is only available in Python 2.7. Allow us to run with -# earlier versions -try: - check_output = subprocess.check_output -except AttributeError: - def check_output(args, env): # note: we only use these two args - pipe = subprocess.Popen(args, stdout=subprocess.PIPE, env=env) - output, _ = pipe.communicate() - if pipe.returncode: - raise subprocess.CalledProcessError(pipe.returncode, args) - return output +import svnpubsub.util assert hasattr(subprocess, 'check_call') def check_call(*args, **kwds): - """Wrapper around subprocess.check_call() that logs stderr upon failure.""" + """Wrapper around subprocess.check_call() that logs stderr upon failure, + with an optional list of exit codes to consider non-failure.""" assert 'stderr' not in kwds + if '__okayexits' in kwds: + __okayexits = kwds['__okayexits'] + del kwds['__okayexits'] + else: + __okayexits = set([0]) # EXIT_SUCCESS kwds.update(stderr=subprocess.PIPE) pipe = subprocess.Popen(*args, **kwds) output, errput = pipe.communicate() - if pipe.returncode: + if pipe.returncode not in __okayexits: cmd = args[0] if len(args) else kwds.get('args', '(no command)') # TODO: log stdout too? logging.error('Command failed: returncode=%d command=%r stderr=%r', @@ -103,7 +98,7 @@ def check_call(*args, **kwds): def svn_info(svnbin, env, path): "Run 'svn info' on the target path, returning a dict of info data." args = [svnbin, "info", "--non-interactive", "--", path] - output = check_output(args, env=env).strip() + output = svnpubsub.util.check_output(args, env=env).strip() info = { } for line in output.split('\n'): idx = line.index(':') @@ -303,6 +298,21 @@ class BackgroundWorker(threading.Thread): logging.info("updating: %s", wc.path) + ## Run the hook + HEAD = svn_info(self.svnbin, self.env, wc.url)['Revision'] + if self.hook: + hook_mode = ['pre-update', 'pre-boot'][boot] + logging.info('running hook: %s at %s', + wc.path, hook_mode) + args = [self.hook, hook_mode, wc.path, HEAD, wc.url] + rc = check_call(args, env=self.env, __okayexits=[0, 1]) + if rc == 1: + # TODO: log stderr + logging.warn('hook denied update of %s at %s', + wc.path, hook_mode) + return + del rc + ### we need to move some of these args into the config. these are ### still specific to the ASF setup. args = [self.svnbin, 'switch', @@ -313,12 +323,13 @@ class BackgroundWorker(threading.Thread): '--config-option', 'config:miscellany:use-commit-times=on', '--', - wc.url, + wc.url + '@' + HEAD, wc.path] check_call(args, env=self.env) ### check the loglevel before running 'svn info'? info = svn_info(self.svnbin, self.env, wc.path) + assert info['Revision'] == HEAD logging.info("updated: %s now at r%s", wc.path, info['Revision']) ## Run the hook @@ -533,7 +544,8 @@ def main(args): # We manage the logfile ourselves (along with possible rotation). The # daemon process can just drop stdout/stderr into /dev/null. - d = Daemon('/dev/null', options.pidfile, options.umask, bdec) + d = Daemon('/dev/null', os.path.abspath(options.pidfile), + options.umask, bdec) if options.daemon: # Daemonize the process and call sys.exit() with appropriate code d.daemonize_exit() diff --git a/tools/server-side/svnpubsub/watcher.py b/tools/server-side/svnpubsub/watcher.py index 340b100..11bf066 100755 --- a/tools/server-side/svnpubsub/watcher.py +++ b/tools/server-side/svnpubsub/watcher.py @@ -35,6 +35,9 @@ def _commit(url, commit): print('COMMIT: from %s' % url) pprint.pprint(vars(commit), indent=2) +def _metadata(url, metadata): + print('METADATA: from %s' % url) + pprint.pprint(vars(metadata), indent=2) def _event(url, event_name, event_arg): if event_arg: @@ -44,7 +47,7 @@ def _event(url, event_name, event_arg): def main(urls): - mc = svnpubsub.client.MultiClient(urls, _commit, _event) + mc = svnpubsub.client.MultiClient(urls, _commit, _event, _metadata) mc.run_forever() |