diff options
author | Junio C Hamano <gitster@pobox.com> | 2013-09-09 14:50:32 -0700 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2013-09-09 14:50:36 -0700 |
commit | a0a08d48d068551ea819847743d8f5a178b9f8ba (patch) | |
tree | b8c05ed186dfe1da37b515301f9775f18511704e | |
parent | b02f5aeda6f66ac3be9b2e35f9b237d4f1f80d73 (diff) | |
parent | 6667a6ac20747eb56eb2c03c39aceaf6aebbae3c (diff) | |
download | git-a0a08d48d068551ea819847743d8f5a178b9f8ba.tar.gz |
Merge branch 'jc/url-match'
Allow section.<urlpattern>.var configuration variables to be
treated as a "virtual" section.var given a URL, and use the
mechanism to enhance http.* configuration variables.
This is a reroll of Kyle J. McKay's work.
* jc/url-match:
builtin/config.c: compilation fix
config: "git config --get-urlmatch" parses section.<url>.key
builtin/config: refactor collect_config()
config: parse http.<url>.<variable> using urlmatch
config: add generic callback wrapper to parse section.<url>.key
config: add helper to normalize and match URLs
http.c: fix parsing of http.sslCertPasswordProtected variable
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | Documentation/config.txt | 45 | ||||
-rw-r--r-- | Documentation/git-config.txt | 29 | ||||
-rw-r--r-- | Makefile | 3 | ||||
-rw-r--r-- | builtin/config.c | 140 | ||||
-rw-r--r-- | http.c | 16 | ||||
-rw-r--r-- | t/.gitattributes | 1 | ||||
-rwxr-xr-x | t/t0110-urlmatch-normalization.sh | 177 | ||||
-rw-r--r-- | t/t0110/README | 9 | ||||
-rw-r--r-- | t/t0110/url-1 | 1 | ||||
-rw-r--r-- | t/t0110/url-10 | 1 | ||||
-rw-r--r-- | t/t0110/url-11 | 1 | ||||
-rw-r--r-- | t/t0110/url-2 | 1 | ||||
-rw-r--r-- | t/t0110/url-3 | 1 | ||||
-rw-r--r-- | t/t0110/url-4 | 1 | ||||
-rw-r--r-- | t/t0110/url-5 | 1 | ||||
-rw-r--r-- | t/t0110/url-6 | 1 | ||||
-rw-r--r-- | t/t0110/url-7 | 1 | ||||
-rw-r--r-- | t/t0110/url-8 | 1 | ||||
-rw-r--r-- | t/t0110/url-9 | 1 | ||||
-rwxr-xr-x | t/t1300-repo-config.sh | 25 | ||||
-rw-r--r-- | test-urlmatch-normalization.c | 50 | ||||
-rw-r--r-- | urlmatch.c | 535 | ||||
-rw-r--r-- | urlmatch.h | 54 |
24 files changed, 1072 insertions, 24 deletions
diff --git a/.gitignore b/.gitignore index 6b1fd1bfb0..66199edd4a 100644 --- a/.gitignore +++ b/.gitignore @@ -202,6 +202,7 @@ /test-string-list /test-subprocess /test-svn-fe +/test-urlmatch-normalization /test-wildmatch /common-cmds.h *.tar.gz diff --git a/Documentation/config.txt b/Documentation/config.txt index e8eadde305..00bde9dec5 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -1533,6 +1533,51 @@ http.useragent:: of common USER_AGENT strings (but not including those like git/1.7.1). Can be overridden by the 'GIT_HTTP_USER_AGENT' environment variable. +http.<url>.*:: + Any of the http.* options above can be applied selectively to some urls. + For a config key to match a URL, each element of the config key is + compared to that of the URL, in the following order: ++ +-- +. Scheme (e.g., `https` in `https://example.com/`). This field + must match exactly between the config key and the URL. + +. Host/domain name (e.g., `example.com` in `https://example.com/`). + This field must match exactly between the config key and the URL. + +. Port number (e.g., `8080` in `http://example.com:8080/`). + This field must match exactly between the config key and the URL. + Omitted port numbers are automatically converted to the correct + default for the scheme before matching. + +. Path (e.g., `repo.git` in `https://example.com/repo.git`). The + path field of the config key must match the path field of the URL + either exactly or as a prefix of slash-delimited path elements. This means + a config key with path `foo/` matches URL path `foo/bar`. A prefix can only + match on a slash (`/`) boundary. Longer matches take precedence (so a config + key with path `foo/bar` is a better match to URL path `foo/bar` than a config + key with just path `foo/`). + +. User name (e.g., `user` in `https://user@example.com/repo.git`). If + the config key has a user name it must match the user name in the + URL exactly. If the config key does not have a user name, that + config key will match a URL with any user name (including none), + but at a lower precedence than a config key with a user name. +-- ++ +The list above is ordered by decreasing precedence; a URL that matches +a config key's path is preferred to one that matches its user name. For example, +if the URL is `https://user@example.com/foo/bar` a config key match of +`https://example.com/foo` will be preferred over a config key match of +`https://user@example.com`. ++ +All URLs are normalized before attempting any matching (the password part, +if embedded in the URL, is always ignored for matching purposes) so that +equivalent urls that are simply spelled differently will match properly. +Environment variable settings always override any matches. The urls that are +matched against are those given directly to Git commands. This means any URLs +visited as a result of a redirection do not participate in matching. + i18n.commitEncoding:: Character encoding the commit messages are stored in; Git itself does not care per se, but this information is necessary e.g. when diff --git a/Documentation/git-config.txt b/Documentation/git-config.txt index 2dbe486eb1..e9917b89a9 100644 --- a/Documentation/git-config.txt +++ b/Documentation/git-config.txt @@ -15,6 +15,7 @@ SYNOPSIS 'git config' [<file-option>] [type] [-z|--null] --get name [value_regex] 'git config' [<file-option>] [type] [-z|--null] --get-all name [value_regex] 'git config' [<file-option>] [type] [-z|--null] --get-regexp name_regex [value_regex] +'git config' [<file-option>] [type] [-z|--null] --get-urlmatch name URL 'git config' [<file-option>] --unset name [value_regex] 'git config' [<file-option>] --unset-all name [value_regex] 'git config' [<file-option>] --rename-section old_name new_name @@ -95,6 +96,14 @@ OPTIONS in which section and variable names are lowercased, but subsection names are not. +--get-urlmatch name URL:: + When given a two-part name section.key, the value for + section.<url>.key whose <url> part matches the best to the + given URL is returned (if no such key exists, the value for + section.key is used as a fallback). When given just the + section as name, do so for all the keys in the section and + list them. + --global:: For writing options: write to global `~/.gitconfig` file rather than the repository `.git/config`, write to @@ -295,6 +304,13 @@ Given a .git/config like this: gitproxy=proxy-command for kernel.org gitproxy=default-proxy ; for all the rest + ; HTTP + [http] + sslVerify + [http "https://weak.example.com"] + sslVerify = false + cookieFile = /tmp/cookie.txt + you can set the filemode to true with ------------ @@ -380,6 +396,19 @@ RESET=$(git config --get-color "" "reset") echo "${WS}your whitespace color or blue reverse${RESET}" ------------ +For URLs in `https://weak.example.com`, `http.sslVerify` is set to +false, while it is set to `true` for all others: + +------------ +% git config --bool --get-urlmatch http.sslverify https://good.example.com +true +% git config --bool --get-urlmatch http.sslverify https://weak.example.com +false +% git config --get-urlmatch http https://weak.example.com +http.cookiefile /tmp/cookie.txt +http.sslverify false +------------ + include::config.txt[] GIT @@ -580,6 +580,7 @@ TEST_PROGRAMS_NEED_X += test-sigchain TEST_PROGRAMS_NEED_X += test-string-list TEST_PROGRAMS_NEED_X += test-subprocess TEST_PROGRAMS_NEED_X += test-svn-fe +TEST_PROGRAMS_NEED_X += test-urlmatch-normalization TEST_PROGRAMS_NEED_X += test-wildmatch TEST_PROGRAMS = $(patsubst %,%$X,$(TEST_PROGRAMS_NEED_X)) @@ -736,6 +737,7 @@ LIB_H += tree-walk.h LIB_H += tree.h LIB_H += unpack-trees.h LIB_H += url.h +LIB_H += urlmatch.h LIB_H += userdiff.h LIB_H += utf8.h LIB_H += varint.h @@ -886,6 +888,7 @@ LIB_OBJS += tree.o LIB_OBJS += tree-walk.o LIB_OBJS += unpack-trees.o LIB_OBJS += url.o +LIB_OBJS += urlmatch.o LIB_OBJS += usage.o LIB_OBJS += userdiff.o LIB_OBJS += utf8.o diff --git a/builtin/config.c b/builtin/config.c index 4ab9e9a550..fc8d8820cb 100644 --- a/builtin/config.c +++ b/builtin/config.c @@ -2,6 +2,7 @@ #include "cache.h" #include "color.h" #include "parse-options.h" +#include "urlmatch.h" static const char *const builtin_config_usage[] = { N_("git config [options]"), @@ -42,6 +43,7 @@ static int respect_includes = -1; #define ACTION_SET_ALL (1<<12) #define ACTION_GET_COLOR (1<<13) #define ACTION_GET_COLORBOOL (1<<14) +#define ACTION_GET_URLMATCH (1<<15) #define TYPE_BOOL (1<<0) #define TYPE_INT (1<<1) @@ -59,6 +61,7 @@ static struct option builtin_config_options[] = { OPT_BIT(0, "get", &actions, N_("get value: name [value-regex]"), ACTION_GET), OPT_BIT(0, "get-all", &actions, N_("get all values: key [value-regex]"), ACTION_GET_ALL), OPT_BIT(0, "get-regexp", &actions, N_("get values for regexp: name-regex [value-regex]"), ACTION_GET_REGEXP), + OPT_BIT(0, "get-urlmatch", &actions, N_("get value specific for the URL: section[.var] URL"), ACTION_GET_URLMATCH), OPT_BIT(0, "replace-all", &actions, N_("replace all matching variables: name value [value_regex]"), ACTION_REPLACE_ALL), OPT_BIT(0, "add", &actions, N_("add a new variable: name value"), ACTION_ADD), OPT_BIT(0, "unset", &actions, N_("remove a variable: name [value-regex]"), ACTION_UNSET), @@ -102,25 +105,13 @@ struct strbuf_list { int alloc; }; -static int collect_config(const char *key_, const char *value_, void *cb) +static int format_config(struct strbuf *buf, const char *key_, const char *value_) { - struct strbuf_list *values = cb; - struct strbuf *buf; - char value[256]; - const char *vptr = value; int must_free_vptr = 0; int must_print_delim = 0; + char value[256]; + const char *vptr = value; - if (!use_key_regexp && strcmp(key_, key)) - return 0; - if (use_key_regexp && regexec(key_regexp, key_, 0, NULL, 0)) - return 0; - if (regexp != NULL && - (do_not_match ^ !!regexec(regexp, (value_?value_:""), 0, NULL, 0))) - return 0; - - ALLOC_GROW(values->items, values->nr + 1, values->alloc); - buf = &values->items[values->nr++]; strbuf_init(buf, 0); if (show_keys) { @@ -128,7 +119,7 @@ static int collect_config(const char *key_, const char *value_, void *cb) must_print_delim = 1; } if (types == TYPE_INT) - sprintf(value, "%d", git_config_int(key_, value_?value_:"")); + sprintf(value, "%d", git_config_int(key_, value_ ? value_ : "")); else if (types == TYPE_BOOL) vptr = git_config_bool(key_, value_) ? "true" : "false"; else if (types == TYPE_BOOL_OR_INT) { @@ -156,15 +147,27 @@ static int collect_config(const char *key_, const char *value_, void *cb) strbuf_addch(buf, term); if (must_free_vptr) - /* If vptr must be freed, it's a pointer to a - * dynamically allocated buffer, it's safe to cast to - * const. - */ free((char *)vptr); - return 0; } +static int collect_config(const char *key_, const char *value_, void *cb) +{ + struct strbuf_list *values = cb; + + if (!use_key_regexp && strcmp(key_, key)) + return 0; + if (use_key_regexp && regexec(key_regexp, key_, 0, NULL, 0)) + return 0; + if (regexp != NULL && + (do_not_match ^ !!regexec(regexp, (value_?value_:""), 0, NULL, 0))) + return 0; + + ALLOC_GROW(values->items, values->nr + 1, values->alloc); + + return format_config(&values->items[values->nr++], key_, value_); +} + static int get_value(const char *key_, const char *regex_) { int ret = CONFIG_GENERIC_ERROR; @@ -364,6 +367,97 @@ static void check_blob_write(void) die("writing config blobs is not supported"); } +struct urlmatch_current_candidate_value { + char value_is_null; + struct strbuf value; +}; + +static int urlmatch_collect_fn(const char *var, const char *value, void *cb) +{ + struct string_list *values = cb; + struct string_list_item *item = string_list_insert(values, var); + struct urlmatch_current_candidate_value *matched = item->util; + + if (!matched) { + matched = xmalloc(sizeof(*matched)); + strbuf_init(&matched->value, 0); + item->util = matched; + } else { + strbuf_reset(&matched->value); + } + + if (value) { + strbuf_addstr(&matched->value, value); + matched->value_is_null = 0; + } else { + matched->value_is_null = 1; + } + return 0; +} + +static char *dup_downcase(const char *string) +{ + char *result; + size_t len, i; + + len = strlen(string); + result = xmalloc(len + 1); + for (i = 0; i < len; i++) + result[i] = tolower(string[i]); + result[i] = '\0'; + return result; +} + +static int get_urlmatch(const char *var, const char *url) +{ + char *section_tail; + struct string_list_item *item; + struct urlmatch_config config = { STRING_LIST_INIT_DUP }; + struct string_list values = STRING_LIST_INIT_DUP; + + config.collect_fn = urlmatch_collect_fn; + config.cascade_fn = NULL; + config.cb = &values; + + if (!url_normalize(url, &config.url)) + die("%s", config.url.err); + + config.section = dup_downcase(var); + section_tail = strchr(config.section, '.'); + if (section_tail) { + *section_tail = '\0'; + config.key = section_tail + 1; + show_keys = 0; + } else { + config.key = NULL; + show_keys = 1; + } + + git_config_with_options(urlmatch_config_entry, &config, + given_config_file, NULL, respect_includes); + + for_each_string_list_item(item, &values) { + struct urlmatch_current_candidate_value *matched = item->util; + struct strbuf key = STRBUF_INIT; + struct strbuf buf = STRBUF_INIT; + + strbuf_addstr(&key, item->string); + format_config(&buf, key.buf, + matched->value_is_null ? NULL : matched->value.buf); + fwrite(buf.buf, 1, buf.len, stdout); + strbuf_release(&key); + strbuf_release(&buf); + + strbuf_release(&matched->value); + } + string_list_clear(&config.vars, 1); + string_list_clear(&values, 1); + free(config.url.url); + + free((void *)config.section); + return 0; +} + int cmd_config(int argc, const char **argv, const char *prefix) { int nongit = !startup_info->have_repository; @@ -523,6 +617,10 @@ int cmd_config(int argc, const char **argv, const char *prefix) check_argc(argc, 1, 2); return get_value(argv[0], argv[1]); } + else if (actions == ACTION_GET_URLMATCH) { + check_argc(argc, 2, 2); + return get_urlmatch(argv[0], argv[1]); + } else if (actions == ACTION_UNSET) { check_blob_write(); check_argc(argc, 1, 2); @@ -3,6 +3,7 @@ #include "sideband.h" #include "run-command.h" #include "url.h" +#include "urlmatch.h" #include "credential.h" #include "version.h" #include "pkt-line.h" @@ -161,8 +162,7 @@ static int http_options(const char *var, const char *value, void *cb) if (!strcmp("http.sslcainfo", var)) return git_config_string(&ssl_cainfo, var, value); if (!strcmp("http.sslcertpasswordprotected", var)) { - if (git_config_bool(var, value)) - ssl_cert_password_required = 1; + ssl_cert_password_required = git_config_bool(var, value); return 0; } if (!strcmp("http.ssltry", var)) { @@ -346,10 +346,20 @@ void http_init(struct remote *remote, const char *url, int proactive_auth) { char *low_speed_limit; char *low_speed_time; + char *normalized_url; + struct urlmatch_config config = { STRING_LIST_INIT_DUP }; + + config.section = "http"; + config.key = NULL; + config.collect_fn = http_options; + config.cascade_fn = git_default_config; + config.cb = NULL; http_is_verbose = 0; + normalized_url = url_normalize(url, &config.url); - git_config(http_options, NULL); + git_config(urlmatch_config_entry, &config); + free(normalized_url); curl_global_init(CURL_GLOBAL_ALL); diff --git a/t/.gitattributes b/t/.gitattributes index 1b97c5465b..2d44088f56 100644 --- a/t/.gitattributes +++ b/t/.gitattributes @@ -1 +1,2 @@ t[0-9][0-9][0-9][0-9]/* -whitespace +t0110/url-* binary diff --git a/t/t0110-urlmatch-normalization.sh b/t/t0110-urlmatch-normalization.sh new file mode 100755 index 0000000000..8d6096d4d1 --- /dev/null +++ b/t/t0110-urlmatch-normalization.sh @@ -0,0 +1,177 @@ +#!/bin/sh + +test_description='urlmatch URL normalization' +. ./test-lib.sh + +# The base name of the test url files +tu="$TEST_DIRECTORY/t0110/url" + +# Note that only file: URLs should be allowed without a host + +test_expect_success 'url scheme' ' + ! test-urlmatch-normalization "" && + ! test-urlmatch-normalization "_" && + ! test-urlmatch-normalization "scheme" && + ! test-urlmatch-normalization "scheme:" && + ! test-urlmatch-normalization "scheme:/" && + ! test-urlmatch-normalization "scheme://" && + ! test-urlmatch-normalization "file" && + ! test-urlmatch-normalization "file:" && + ! test-urlmatch-normalization "file:/" && + test-urlmatch-normalization "file://" && + ! test-urlmatch-normalization "://acme.co" && + ! test-urlmatch-normalization "x_test://acme.co" && + ! test-urlmatch-normalization "-test://acme.co" && + ! test-urlmatch-normalization "0test://acme.co" && + ! test-urlmatch-normalization "+test://acme.co" && + ! test-urlmatch-normalization ".test://acme.co" && + ! test-urlmatch-normalization "schem%6e://" && + test-urlmatch-normalization "x-Test+v1.0://acme.co" && + test "$(test-urlmatch-normalization -p "AbCdeF://x.Y")" = "abcdef://x.y/" +' + +test_expect_success 'url authority' ' + ! test-urlmatch-normalization "scheme://user:pass@" && + ! test-urlmatch-normalization "scheme://?" && + ! test-urlmatch-normalization "scheme://#" && + ! test-urlmatch-normalization "scheme:///" && + ! test-urlmatch-normalization "scheme://:" && + ! test-urlmatch-normalization "scheme://:555" && + test-urlmatch-normalization "file://user:pass@" && + test-urlmatch-normalization "file://?" && + test-urlmatch-normalization "file://#" && + test-urlmatch-normalization "file:///" && + test-urlmatch-normalization "file://:" && + ! test-urlmatch-normalization "file://:555" && + test-urlmatch-normalization "scheme://user:pass@host" && + test-urlmatch-normalization "scheme://@host" && + test-urlmatch-normalization "scheme://%00@host" && + ! test-urlmatch-normalization "scheme://%%@host" && + ! test-urlmatch-normalization "scheme://host_" && + test-urlmatch-normalization "scheme://user:pass@host/" && + test-urlmatch-normalization "scheme://@host/" && + test-urlmatch-normalization "scheme://host/" && + test-urlmatch-normalization "scheme://host?x" && + test-urlmatch-normalization "scheme://host#x" && + test-urlmatch-normalization "scheme://host/@" && + test-urlmatch-normalization "scheme://host?@x" && + test-urlmatch-normalization "scheme://host#@x" && + test-urlmatch-normalization "scheme://[::1]" && + test-urlmatch-normalization "scheme://[::1]/" && + ! test-urlmatch-normalization "scheme://hos%41/" && + test-urlmatch-normalization "scheme://[invalid....:/" && + test-urlmatch-normalization "scheme://invalid....:]/" && + ! test-urlmatch-normalization "scheme://invalid....:[/" && + ! test-urlmatch-normalization "scheme://invalid....:[" +' + +test_expect_success 'url port checks' ' + test-urlmatch-normalization "xyz://q@some.host:" && + test-urlmatch-normalization "xyz://q@some.host:456/" && + ! test-urlmatch-normalization "xyz://q@some.host:0" && + ! test-urlmatch-normalization "xyz://q@some.host:0000000" && + test-urlmatch-normalization "xyz://q@some.host:0000001?" && + test-urlmatch-normalization "xyz://q@some.host:065535#" && + test-urlmatch-normalization "xyz://q@some.host:65535" && + ! test-urlmatch-normalization "xyz://q@some.host:65536" && + ! test-urlmatch-normalization "xyz://q@some.host:99999" && + ! test-urlmatch-normalization "xyz://q@some.host:100000" && + ! test-urlmatch-normalization "xyz://q@some.host:100001" && + test-urlmatch-normalization "http://q@some.host:80" && + test-urlmatch-normalization "https://q@some.host:443" && + test-urlmatch-normalization "http://q@some.host:80/" && + test-urlmatch-normalization "https://q@some.host:443?" && + ! test-urlmatch-normalization "http://q@:8008" && + ! test-urlmatch-normalization "http://:8080" && + ! test-urlmatch-normalization "http://:" && + test-urlmatch-normalization "xyz://q@some.host:456/" && + test-urlmatch-normalization "xyz://[::1]:456/" && + test-urlmatch-normalization "xyz://[::1]:/" && + ! test-urlmatch-normalization "xyz://[::1]:000/" && + ! test-urlmatch-normalization "xyz://[::1]:0%300/" && + ! test-urlmatch-normalization "xyz://[::1]:0x80/" && + ! test-urlmatch-normalization "xyz://[::1]:4294967297/" && + ! test-urlmatch-normalization "xyz://[::1]:030f/" +' + +test_expect_success 'url port normalization' ' + test "$(test-urlmatch-normalization -p "http://x:800")" = "http://x:800/" && + test "$(test-urlmatch-normalization -p "http://x:0800")" = "http://x:800/" && + test "$(test-urlmatch-normalization -p "http://x:00000800")" = "http://x:800/" && + test "$(test-urlmatch-normalization -p "http://x:065535")" = "http://x:65535/" && + test "$(test-urlmatch-normalization -p "http://x:1")" = "http://x:1/" && + test "$(test-urlmatch-normalization -p "http://x:80")" = "http://x/" && + test "$(test-urlmatch-normalization -p "http://x:080")" = "http://x/" && + test "$(test-urlmatch-normalization -p "http://x:000000080")" = "http://x/" && + test "$(test-urlmatch-normalization -p "https://x:443")" = "https://x/" && + test "$(test-urlmatch-normalization -p "https://x:0443")" = "https://x/" && + test "$(test-urlmatch-normalization -p "https://x:000000443")" = "https://x/" +' + +test_expect_success 'url general escapes' ' + ! test-urlmatch-normalization "http://x.y?%fg" && + test "$(test-urlmatch-normalization -p "X://W/%7e%41^%3a")" = "x://w/~A%5E%3A" && + test "$(test-urlmatch-normalization -p "X://W/:/?#[]@")" = "x://w/:/?#[]@" && + test "$(test-urlmatch-normalization -p "X://W/$&()*+,;=")" = "x://w/$&()*+,;=" && + test "$(test-urlmatch-normalization -p "X://W/'\''")" = "x://w/'\''" && + test "$(test-urlmatch-normalization -p "X://W?'\!'")" = "x://w/?'\!'" +' + +test_expect_success 'url high-bit escapes' ' + test "$(test-urlmatch-normalization -p "$(cat "$tu-1")")" = "x://q/%01%02%03%04%05%06%07%08%0E%0F%10%11%12" && + test "$(test-urlmatch-normalization -p "$(cat "$tu-2")")" = "x://q/%13%14%15%16%17%18%19%1B%1C%1D%1E%1F%7F" && + test "$(test-urlmatch-normalization -p "$(cat "$tu-3")")" = "x://q/%80%81%82%83%84%85%86%87%88%89%8A%8B%8C%8D%8E%8F" && + test "$(test-urlmatch-normalization -p "$(cat "$tu-4")")" = "x://q/%90%91%92%93%94%95%96%97%98%99%9A%9B%9C%9D%9E%9F" && + test "$(test-urlmatch-normalization -p "$(cat "$tu-5")")" = "x://q/%A0%A1%A2%A3%A4%A5%A6%A7%A8%A9%AA%AB%AC%AD%AE%AF" && + test "$(test-urlmatch-normalization -p "$(cat "$tu-6")")" = "x://q/%B0%B1%B2%B3%B4%B5%B6%B7%B8%B9%BA%BB%BC%BD%BE%BF" && + test "$(test-urlmatch-normalization -p "$(cat "$tu-7")")" = "x://q/%C0%C1%C2%C3%C4%C5%C6%C7%C8%C9%CA%CB%CC%CD%CE%CF" && + test "$(test-urlmatch-normalization -p "$(cat "$tu-8")")" = "x://q/%D0%D1%D2%D3%D4%D5%D6%D7%D8%D9%DA%DB%DC%DD%DE%DF" && + test "$(test-urlmatch-normalization -p "$(cat "$tu-9")")" = "x://q/%E0%E1%E2%E3%E4%E5%E6%E7%E8%E9%EA%EB%EC%ED%EE%EF" && + test "$(test-urlmatch-normalization -p "$(cat "$tu-10")")" = "x://q/%F0%F1%F2%F3%F4%F5%F6%F7%F8%F9%FA%FB%FC%FD%FE%FF" && + test "$(test-urlmatch-normalization -p "$(cat "$tu-11")")" = "x://q/%C2%80%DF%BF%E0%A0%80%EF%BF%BD%F0%90%80%80%F0%AF%BF%BD" +' + +test_expect_success 'url username/password escapes' ' + test "$(test-urlmatch-normalization -p "x://%41%62(^):%70+d@foo")" = "x://Ab(%5E):p+d@foo/" +' + +test_expect_success 'url normalized lengths' ' + test "$(test-urlmatch-normalization -l "Http://%4d%65:%4d^%70@The.Host")" = 25 && + test "$(test-urlmatch-normalization -l "http://%41:%42@x.y/%61/")" = 17 && + test "$(test-urlmatch-normalization -l "http://@x.y/^")" = 15 +' + +test_expect_success 'url . and .. segments' ' + test "$(test-urlmatch-normalization -p "x://y/.")" = "x://y/" && + test "$(test-urlmatch-normalization -p "x://y/./")" = "x://y/" && + test "$(test-urlmatch-normalization -p "x://y/a/.")" = "x://y/a" && + test "$(test-urlmatch-normalization -p "x://y/a/./")" = "x://y/a/" && + test "$(test-urlmatch-normalization -p "x://y/.?")" = "x://y/?" && + test "$(test-urlmatch-normalization -p "x://y/./?")" = "x://y/?" && + test "$(test-urlmatch-normalization -p "x://y/a/.?")" = "x://y/a?" && + test "$(test-urlmatch-normalization -p "x://y/a/./?")" = "x://y/a/?" && + test "$(test-urlmatch-normalization -p "x://y/a/./b/.././../c")" = "x://y/c" && + test "$(test-urlmatch-normalization -p "x://y/a/./b/../.././c/")" = "x://y/c/" && + test "$(test-urlmatch-normalization -p "x://y/a/./b/.././../c/././.././.")" = "x://y/" && + ! test-urlmatch-normalization "x://y/a/./b/.././../c/././.././.." && + test "$(test-urlmatch-normalization -p "x://y/a/./?/././..")" = "x://y/a/?/././.." && + test "$(test-urlmatch-normalization -p "x://y/%2e/")" = "x://y/" && + test "$(test-urlmatch-normalization -p "x://y/%2E/")" = "x://y/" && + test "$(test-urlmatch-normalization -p "x://y/a/%2e./")" = "x://y/" && + test "$(test-urlmatch-normalization -p "x://y/b/.%2E/")" = "x://y/" && + test "$(test-urlmatch-normalization -p "x://y/c/%2e%2E/")" = "x://y/" +' + +# http://@foo specifies an empty user name but does not specify a password +# http://foo specifies neither a user name nor a password +# So they should not be equivalent +test_expect_success 'url equivalents' ' + test-urlmatch-normalization "httP://x" "Http://X/" && + test-urlmatch-normalization "Http://%4d%65:%4d^%70@The.Host" "hTTP://Me:%4D^p@the.HOST:80/" && + ! test-urlmatch-normalization "https://@x.y/^" "httpS://x.y:443/^" && + test-urlmatch-normalization "https://@x.y/^" "httpS://@x.y:0443/^" && + test-urlmatch-normalization "https://@x.y/^/../abc" "httpS://@x.y:0443/abc" && + test-urlmatch-normalization "https://@x.y/^/.." "httpS://@x.y:0443/" +' + +test_done diff --git a/t/t0110/README b/t/t0110/README new file mode 100644 index 0000000000..ad4a50ecd8 --- /dev/null +++ b/t/t0110/README @@ -0,0 +1,9 @@ +The url data files in this directory contain URLs with characters +in the range 0x01-0x1f and 0x7f-0xff to test the proper normalization +of unprintable characters. + +A select few characters in the 0x01-0x1f range are skipped to help +avoid problems running the test itself. + +The urls are in test files in this directory rather than being +embedded in the test script for portability. diff --git a/t/t0110/url-1 b/t/t0110/url-1 new file mode 100644 index 0000000000..519019c5ce --- /dev/null +++ b/t/t0110/url-1 @@ -0,0 +1 @@ +x://q/ diff --git a/t/t0110/url-10 b/t/t0110/url-10 new file mode 100644 index 0000000000..b9965de6a5 --- /dev/null +++ b/t/t0110/url-10 @@ -0,0 +1 @@ +x://q/ðñòóôõö÷øùúûüýþÿ diff --git a/t/t0110/url-11 b/t/t0110/url-11 new file mode 100644 index 0000000000..f0a50f1009 --- /dev/null +++ b/t/t0110/url-11 @@ -0,0 +1 @@ +x://q/Â€ß¿à €ï¿½ð€€ð¯¿½ diff --git a/t/t0110/url-2 b/t/t0110/url-2 new file mode 100644 index 0000000000..43334b05b2 --- /dev/null +++ b/t/t0110/url-2 @@ -0,0 +1 @@ +x://q/ diff --git a/t/t0110/url-3 b/t/t0110/url-3 new file mode 100644 index 0000000000..7378c7bec2 --- /dev/null +++ b/t/t0110/url-3 @@ -0,0 +1 @@ +x://q/€‚ƒ„…†‡ˆ‰Š‹ŒŽ diff --git a/t/t0110/url-4 b/t/t0110/url-4 new file mode 100644 index 0000000000..220b198c97 --- /dev/null +++ b/t/t0110/url-4 @@ -0,0 +1 @@ +x://q/‘’“”•–—˜™š›œžŸ diff --git a/t/t0110/url-5 b/t/t0110/url-5 new file mode 100644 index 0000000000..1ccd927779 --- /dev/null +++ b/t/t0110/url-5 @@ -0,0 +1 @@ +x://q/ ¡¢£¤¥¦§¨©ª«¬®¯ diff --git a/t/t0110/url-6 b/t/t0110/url-6 new file mode 100644 index 0000000000..e8283aac6d --- /dev/null +++ b/t/t0110/url-6 @@ -0,0 +1 @@ +x://q/°±²³´µ¶·¸¹º»¼½¾¿ diff --git a/t/t0110/url-7 b/t/t0110/url-7 new file mode 100644 index 0000000000..fa7c10b615 --- /dev/null +++ b/t/t0110/url-7 @@ -0,0 +1 @@ +x://q/ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ diff --git a/t/t0110/url-8 b/t/t0110/url-8 new file mode 100644 index 0000000000..79a0ba836f --- /dev/null +++ b/t/t0110/url-8 @@ -0,0 +1 @@ +x://q/ÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞß diff --git a/t/t0110/url-9 b/t/t0110/url-9 new file mode 100644 index 0000000000..8b44bec48b --- /dev/null +++ b/t/t0110/url-9 @@ -0,0 +1 @@ +x://q/àáâãäåæçèéêëìíîï diff --git a/t/t1300-repo-config.sh b/t/t1300-repo-config.sh index c4a7d84f46..c23f4781e1 100755 --- a/t/t1300-repo-config.sh +++ b/t/t1300-repo-config.sh @@ -1087,6 +1087,31 @@ test_expect_success 'barf on incomplete string' ' grep " line 3 " error ' +test_expect_success 'urlmatch' ' + cat >.git/config <<-\EOF && + [http] + sslVerify + [http "https://weak.example.com"] + sslVerify = false + cookieFile = /tmp/cookie.txt + EOF + + echo true >expect && + git config --bool --get-urlmatch http.SSLverify https://good.example.com >actual && + test_cmp expect actual && + + echo false >expect && + git config --bool --get-urlmatch http.sslverify https://weak.example.com >actual && + test_cmp expect actual && + + { + echo http.cookiefile /tmp/cookie.txt && + echo http.sslverify false + } >expect && + git config --get-urlmatch HTTP https://weak.example.com >actual && + test_cmp expect actual +' + # good section hygiene test_expect_failure 'unsetting the last key in a section removes header' ' cat >.git/config <<-\EOF && diff --git a/test-urlmatch-normalization.c b/test-urlmatch-normalization.c new file mode 100644 index 0000000000..090bf219a7 --- /dev/null +++ b/test-urlmatch-normalization.c @@ -0,0 +1,50 @@ +#include "git-compat-util.h" +#include "urlmatch.h" + +int main(int argc, char **argv) +{ + const char usage[] = "test-urlmatch-normalization [-p | -l] <url1> | <url1> <url2>"; + char *url1, *url2; + int opt_p = 0, opt_l = 0; + + /* + * For one url, succeed if url_normalize succeeds on it, fail otherwise. + * For two urls, succeed only if url_normalize succeeds on both and + * the results compare equal with strcmp. If -p is given (one url only) + * and url_normalize succeeds, print the result followed by "\n". If + * -l is given (one url only) and url_normalize succeeds, print the + * returned length in decimal followed by "\n". + */ + + if (argc > 1 && !strcmp(argv[1], "-p")) { + opt_p = 1; + argc--; + argv++; + } else if (argc > 1 && !strcmp(argv[1], "-l")) { + opt_l = 1; + argc--; + argv++; + } + + if (argc < 2 || argc > 3) + die("%s", usage); + + if (argc == 2) { + struct url_info info; + url1 = url_normalize(argv[1], &info); + if (!url1) + return 1; + if (opt_p) + printf("%s\n", url1); + if (opt_l) + printf("%u\n", (unsigned)info.url_len); + return 0; + } + + if (opt_p || opt_l) + die("%s", usage); + + url1 = url_normalize(argv[1], NULL); + url2 = url_normalize(argv[2], NULL); + return (url1 && url2 && !strcmp(url1, url2)) ? 0 : 1; +} diff --git a/urlmatch.c b/urlmatch.c new file mode 100644 index 0000000000..1db76c89bc --- /dev/null +++ b/urlmatch.c @@ -0,0 +1,535 @@ +#include "cache.h" +#include "urlmatch.h" + +#define URL_ALPHA "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" +#define URL_DIGIT "0123456789" +#define URL_ALPHADIGIT URL_ALPHA URL_DIGIT +#define URL_SCHEME_CHARS URL_ALPHADIGIT "+.-" +#define URL_HOST_CHARS URL_ALPHADIGIT ".-[:]" /* IPv6 literals need [:] */ +#define URL_UNSAFE_CHARS " <>\"%{}|\\^`" /* plus 0x00-0x1F,0x7F-0xFF */ +#define URL_GEN_RESERVED ":/?#[]@" +#define URL_SUB_RESERVED "!$&'()*+,;=" +#define URL_RESERVED URL_GEN_RESERVED URL_SUB_RESERVED /* only allowed delims */ + +static int append_normalized_escapes(struct strbuf *buf, + const char *from, + size_t from_len, + const char *esc_extra, + const char *esc_ok) +{ + /* + * Append to strbuf 'buf' characters from string 'from' with length + * 'from_len' while unescaping characters that do not need to be escaped + * and escaping characters that do. The set of characters to escape + * (the complement of which is unescaped) starts out as the RFC 3986 + * unsafe characters (0x00-0x1F,0x7F-0xFF," <>\"#%{}|\\^`"). If + * 'esc_extra' is not NULL, those additional characters will also always + * be escaped. If 'esc_ok' is not NULL, those characters will be left + * escaped if found that way, but will not be unescaped otherwise (used + * for delimiters). If a %-escape sequence is encountered that is not + * followed by 2 hexadecimal digits, the sequence is invalid and + * false (0) will be returned. Otherwise true (1) will be returned for + * success. + * + * Note that all %-escape sequences will be normalized to UPPERCASE + * as indicated in RFC 3986. Unless included in esc_extra or esc_ok + * alphanumerics and "-._~" will always be unescaped as per RFC 3986. + */ + + while (from_len) { + int ch = *from++; + int was_esc = 0; + + from_len--; + if (ch == '%') { + if (from_len < 2 || + !isxdigit((unsigned char)from[0]) || + !isxdigit((unsigned char)from[1])) + return 0; + ch = hexval_table[(unsigned char)*from++] << 4; + ch |= hexval_table[(unsigned char)*from++]; + from_len -= 2; + was_esc = 1; + } + if ((unsigned char)ch <= 0x1F || (unsigned char)ch >= 0x7F || + strchr(URL_UNSAFE_CHARS, ch) || + (esc_extra && strchr(esc_extra, ch)) || + (was_esc && strchr(esc_ok, ch))) + strbuf_addf(buf, "%%%02X", (unsigned char)ch); + else + strbuf_addch(buf, ch); + } + + return 1; +} + +char *url_normalize(const char *url, struct url_info *out_info) +{ + /* + * Normalize NUL-terminated url using the following rules: + * + * 1. Case-insensitive parts of url will be converted to lower case + * 2. %-encoded characters that do not need to be will be unencoded + * 3. Characters that are not %-encoded and must be will be encoded + * 4. All %-encodings will be converted to upper case hexadecimal + * 5. Leading 0s are removed from port numbers + * 6. If the default port for the scheme is given it will be removed + * 7. A path part (including empty) not starting with '/' has one added + * 8. Any dot segments (. or ..) in the path are resolved and removed + * 9. IPv6 host literals are allowed (but not normalized or validated) + * + * The rules are based on information in RFC 3986. + * + * Please note this function requires a full URL including a scheme + * and host part (except for file: URLs which may have an empty host). + * + * The return value is a newly allocated string that must be freed + * or NULL if the url is not valid. + * + * If out_info is non-NULL, the url and err fields therein will always + * be set. If a non-NULL value is returned, it will be stored in + * out_info->url as well, out_info->err will be set to NULL and the + * other fields of *out_info will also be filled in. If a NULL value + * is returned, NULL will be stored in out_info->url and out_info->err + * will be set to a brief, translated, error message, but no other + * fields will be filled in. + * + * This is NOT a URL validation function. Full URL validation is NOT + * performed. Some invalid host names are passed through this function + * undetected. However, most all other problems that make a URL invalid + * will be detected (including a missing host for non file: URLs). + */ + + size_t url_len = strlen(url); + struct strbuf norm; + size_t spanned; + size_t scheme_len, user_off=0, user_len=0, passwd_off=0, passwd_len=0; + size_t host_off=0, host_len=0, port_len=0, path_off, path_len, result_len; + const char *slash_ptr, *at_ptr, *colon_ptr, *path_start; + char *result; + + /* + * Copy lowercased scheme and :// suffix, %-escapes are not allowed + * First character of scheme must be URL_ALPHA + */ + spanned = strspn(url, URL_SCHEME_CHARS); + if (!spanned || !isalpha(url[0]) || spanned + 3 > url_len || + url[spanned] != ':' || url[spanned+1] != '/' || url[spanned+2] != '/') { + if (out_info) { + out_info->url = NULL; + out_info->err = _("invalid URL scheme name or missing '://' suffix"); + } + return NULL; /* Bad scheme and/or missing "://" part */ + } + strbuf_init(&norm, url_len); + scheme_len = spanned; + spanned += 3; + url_len -= spanned; + while (spanned--) + strbuf_addch(&norm, tolower(*url++)); + + + /* + * Copy any username:password if present normalizing %-escapes + */ + at_ptr = strchr(url, '@'); + slash_ptr = url + strcspn(url, "/?#"); + if (at_ptr && at_ptr < slash_ptr) { + user_off = norm.len; + if (at_ptr > url) { + if (!append_normalized_escapes(&norm, url, at_ptr - url, + "", URL_RESERVED)) { + if (out_info) { + out_info->url = NULL; + out_info->err = _("invalid %XX escape sequence"); + } + strbuf_release(&norm); + return NULL; + } + colon_ptr = strchr(norm.buf + scheme_len + 3, ':'); + if (colon_ptr) { + passwd_off = (colon_ptr + 1) - norm.buf; + passwd_len = norm.len - passwd_off; + user_len = (passwd_off - 1) - (scheme_len + 3); + } else { + user_len = norm.len - (scheme_len + 3); + } + } + strbuf_addch(&norm, '@'); + url_len -= (++at_ptr - url); + url = at_ptr; + } + + + /* + * Copy the host part excluding any port part, no %-escapes allowed + */ + if (!url_len || strchr(":/?#", *url)) { + /* Missing host invalid for all URL schemes except file */ + if (strncmp(norm.buf, "file:", 5)) { + if (out_info) { + out_info->url = NULL; + out_info->err = _("missing host and scheme is not 'file:'"); + } + strbuf_release(&norm); + return NULL; + } + } else { + host_off = norm.len; + } + colon_ptr = slash_ptr - 1; + while (colon_ptr > url && *colon_ptr != ':' && *colon_ptr != ']') + colon_ptr--; + if (*colon_ptr != ':') { + colon_ptr = slash_ptr; + } else if (!host_off && colon_ptr < slash_ptr && colon_ptr + 1 != slash_ptr) { + /* file: URLs may not have a port number */ + if (out_info) { + out_info->url = NULL; + out_info->err = _("a 'file:' URL may not have a port number"); + } + strbuf_release(&norm); + return NULL; + } + spanned = strspn(url, URL_HOST_CHARS); + if (spanned < colon_ptr - url) { + /* Host name has invalid characters */ + if (out_info) { + out_info->url = NULL; + out_info->err = _("invalid characters in host name"); + } + strbuf_release(&norm); + return NULL; + } + while (url < colon_ptr) { + strbuf_addch(&norm, tolower(*url++)); + url_len--; + } + + + /* + * Check the port part and copy if not the default (after removing any + * leading 0s); no %-escapes allowed + */ + if (colon_ptr < slash_ptr) { + /* skip the ':' and leading 0s but not the last one if all 0s */ + url++; + url += strspn(url, "0"); + if (url == slash_ptr && url[-1] == '0') + url--; + if (url == slash_ptr) { + /* Skip ":" port with no number, it's same as default */ + } else if (slash_ptr - url == 2 && + !strncmp(norm.buf, "http:", 5) && + !strncmp(url, "80", 2)) { + /* Skip http :80 as it's the default */ + } else if (slash_ptr - url == 3 && + !strncmp(norm.buf, "https:", 6) && + !strncmp(url, "443", 3)) { + /* Skip https :443 as it's the default */ + } else { + /* + * Port number must be all digits with leading 0s removed + * and since all the protocols we deal with have a 16-bit + * port number it must also be in the range 1..65535 + * 0 is not allowed because that means "next available" + * on just about every system and therefore cannot be used + */ + unsigned long pnum = 0; + spanned = strspn(url, URL_DIGIT); + if (spanned < slash_ptr - url) { + /* port number has invalid characters */ + if (out_info) { + out_info->url = NULL; + out_info->err = _("invalid port number"); + } + strbuf_release(&norm); + return NULL; + } + if (slash_ptr - url <= 5) + pnum = strtoul(url, NULL, 10); + if (pnum == 0 || pnum > 65535) { + /* port number not in range 1..65535 */ + if (out_info) { + out_info->url = NULL; + out_info->err = _("invalid port number"); + } + strbuf_release(&norm); + return NULL; + } + strbuf_addch(&norm, ':'); + strbuf_add(&norm, url, slash_ptr - url); + port_len = slash_ptr - url; + } + url_len -= slash_ptr - colon_ptr; + url = slash_ptr; + } + if (host_off) + host_len = norm.len - host_off; + + + /* + * Now copy the path resolving any . and .. segments being careful not + * to corrupt the URL by unescaping any delimiters, but do add an + * initial '/' if it's missing and do normalize any %-escape sequences. + */ + path_off = norm.len; + path_start = norm.buf + path_off; + strbuf_addch(&norm, '/'); + if (*url == '/') { + url++; + url_len--; + } + for (;;) { + const char *seg_start = norm.buf + norm.len; + const char *next_slash = url + strcspn(url, "/?#"); + int skip_add_slash = 0; + /* + * RFC 3689 indicates that any . or .. segments should be + * unescaped before being checked for. + */ + if (!append_normalized_escapes(&norm, url, next_slash - url, "", + URL_RESERVED)) { + if (out_info) { + out_info->url = NULL; + out_info->err = _("invalid %XX escape sequence"); + } + strbuf_release(&norm); + return NULL; + } + if (!strcmp(seg_start, ".")) { + /* ignore a . segment; be careful not to remove initial '/' */ + if (seg_start == path_start + 1) { + strbuf_setlen(&norm, norm.len - 1); + skip_add_slash = 1; + } else { + strbuf_setlen(&norm, norm.len - 2); + } + } else if (!strcmp(seg_start, "..")) { + /* + * ignore a .. segment and remove the previous segment; + * be careful not to remove initial '/' from path + */ + const char *prev_slash = norm.buf + norm.len - 3; + if (prev_slash == path_start) { + /* invalid .. because no previous segment to remove */ + if (out_info) { + out_info->url = NULL; + out_info->err = _("invalid '..' path segment"); + } + strbuf_release(&norm); + return NULL; + } + while (*--prev_slash != '/') {} + if (prev_slash == path_start) { + strbuf_setlen(&norm, prev_slash - norm.buf + 1); + skip_add_slash = 1; + } else { + strbuf_setlen(&norm, prev_slash - norm.buf); + } + } + url_len -= next_slash - url; + url = next_slash; + /* if the next char is not '/' done with the path */ + if (*url != '/') + break; + url++; + url_len--; + if (!skip_add_slash) + strbuf_addch(&norm, '/'); + } + path_len = norm.len - path_off; + + + /* + * Now simply copy the rest, if any, only normalizing %-escapes and + * being careful not to corrupt the URL by unescaping any delimiters. + */ + if (*url) { + if (!append_normalized_escapes(&norm, url, url_len, "", URL_RESERVED)) { + if (out_info) { + out_info->url = NULL; + out_info->err = _("invalid %XX escape sequence"); + } + strbuf_release(&norm); + return NULL; + } + } + + + result = strbuf_detach(&norm, &result_len); + if (out_info) { + out_info->url = result; + out_info->err = NULL; + out_info->url_len = result_len; + out_info->scheme_len = scheme_len; + out_info->user_off = user_off; + out_info->user_len = user_len; + out_info->passwd_off = passwd_off; + out_info->passwd_len = passwd_len; + out_info->host_off = host_off; + out_info->host_len = host_len; + out_info->port_len = port_len; + out_info->path_off = path_off; + out_info->path_len = path_len; + } + return result; +} + +static size_t url_match_prefix(const char *url, + const char *url_prefix, + size_t url_prefix_len) +{ + /* + * url_prefix matches url if url_prefix is an exact match for url or it + * is a prefix of url and the match ends on a path component boundary. + * Both url and url_prefix are considered to have an implicit '/' on the + * end for matching purposes if they do not already. + * + * url must be NUL terminated. url_prefix_len is the length of + * url_prefix which need not be NUL terminated. + * + * The return value is the length of the match in characters (including + * the final '/' even if it's implicit) or 0 for no match. + * + * Passing NULL as url and/or url_prefix will always cause 0 to be + * returned without causing any faults. + */ + if (!url || !url_prefix) + return 0; + if (!url_prefix_len || (url_prefix_len == 1 && *url_prefix == '/')) + return (!*url || *url == '/') ? 1 : 0; + if (url_prefix[url_prefix_len - 1] == '/') + url_prefix_len--; + if (strncmp(url, url_prefix, url_prefix_len)) + return 0; + if ((strlen(url) == url_prefix_len) || (url[url_prefix_len] == '/')) + return url_prefix_len + 1; + return 0; +} + +int match_urls(const struct url_info *url, + const struct url_info *url_prefix, + int *exactusermatch) +{ + /* + * url_prefix matches url if the scheme, host and port of url_prefix + * are the same as those of url and the path portion of url_prefix + * is the same as the path portion of url or it is a prefix that + * matches at a '/' boundary. If url_prefix contains a user name, + * that must also exactly match the user name in url. + * + * If the user, host, port and path match in this fashion, the returned + * value is the length of the path match including any implicit + * final '/'. For example, "http://me@example.com/path" is matched by + * "http://example.com" with a path length of 1. + * + * If there is a match and exactusermatch is not NULL, then + * *exactusermatch will be set to true if both url and url_prefix + * contained a user name or false if url_prefix did not have a + * user name. If there is no match *exactusermatch is left untouched. + */ + int usermatched = 0; + int pathmatchlen; + + if (!url || !url_prefix || !url->url || !url_prefix->url) + return 0; + + /* check the scheme */ + if (url_prefix->scheme_len != url->scheme_len || + strncmp(url->url, url_prefix->url, url->scheme_len)) + return 0; /* schemes do not match */ + + /* check the user name if url_prefix has one */ + if (url_prefix->user_off) { + if (!url->user_off || url->user_len != url_prefix->user_len || + strncmp(url->url + url->user_off, + url_prefix->url + url_prefix->user_off, + url->user_len)) + return 0; /* url_prefix has a user but it's not a match */ + usermatched = 1; + } + + /* check the host and port */ + if (url_prefix->host_len != url->host_len || + strncmp(url->url + url->host_off, + url_prefix->url + url_prefix->host_off, url->host_len)) + return 0; /* host names and/or ports do not match */ + + /* check the path */ + pathmatchlen = url_match_prefix( + url->url + url->path_off, + url_prefix->url + url_prefix->path_off, + url_prefix->url_len - url_prefix->path_off); + + if (pathmatchlen && exactusermatch) + *exactusermatch = usermatched; + return pathmatchlen; +} + +int urlmatch_config_entry(const char *var, const char *value, void *cb) +{ + struct string_list_item *item; + struct urlmatch_config *collect = cb; + struct urlmatch_item *matched; + struct url_info *url = &collect->url; + const char *key, *dot; + struct strbuf synthkey = STRBUF_INIT; + size_t matched_len = 0; + int user_matched = 0; + int retval; + + key = skip_prefix(var, collect->section); + if (!key || *(key++) != '.') { + if (collect->cascade_fn) + return collect->cascade_fn(var, value, cb); + return 0; /* not interested */ + } + dot = strrchr(key, '.'); + if (dot) { + char *config_url, *norm_url; + struct url_info norm_info; + + config_url = xmemdupz(key, dot - key); + norm_url = url_normalize(config_url, &norm_info); + free(config_url); + if (!norm_url) + return 0; + matched_len = match_urls(url, &norm_info, &user_matched); + free(norm_url); + if (!matched_len) + return 0; + key = dot + 1; + } + + if (collect->key && strcmp(key, collect->key)) + return 0; + + item = string_list_insert(&collect->vars, key); + if (!item->util) { + matched = xcalloc(1, sizeof(*matched)); + item->util = matched; + } else { + matched = item->util; + /* + * Is our match shorter? Is our match the same + * length, and without user while the current + * candidate is with user? Then we cannot use it. + */ + if (matched_len < matched->matched_len || + ((matched_len == matched->matched_len) && + (!user_matched && matched->user_matched))) + return 0; + /* Otherwise, replace it with this one. */ + } + + matched->matched_len = matched_len; + matched->user_matched = user_matched; + strbuf_addstr(&synthkey, collect->section); + strbuf_addch(&synthkey, '.'); + strbuf_addstr(&synthkey, key); + retval = collect->collect_fn(synthkey.buf, value, collect->cb); + + strbuf_release(&synthkey); + return retval; +} diff --git a/urlmatch.h b/urlmatch.h new file mode 100644 index 0000000000..b461dfd3df --- /dev/null +++ b/urlmatch.h @@ -0,0 +1,54 @@ +#ifndef URL_MATCH_H +#include "string-list.h" + +struct url_info { + /* normalized url on success, must be freed, otherwise NULL */ + char *url; + /* if !url, a brief reason for the failure, otherwise NULL */ + const char *err; + + /* the rest of the fields are only set if url != NULL */ + + size_t url_len; /* total length of url (which is now normalized) */ + size_t scheme_len; /* length of scheme name (excluding final :) */ + size_t user_off; /* offset into url to start of user name (0 => none) */ + size_t user_len; /* length of user name; if user_off != 0 but + user_len == 0, an empty user name was given */ + size_t passwd_off; /* offset into url to start of passwd (0 => none) */ + size_t passwd_len; /* length of passwd; if passwd_off != 0 but + passwd_len == 0, an empty passwd was given */ + size_t host_off; /* offset into url to start of host name (0 => none) */ + size_t host_len; /* length of host name; this INCLUDES any ':portnum'; + * file urls may have host_len == 0 */ + size_t port_len; /* if a portnum is present (port_len != 0), it has + * this length (excluding the leading ':') at the + * end of the host name (always 0 for file urls) */ + size_t path_off; /* offset into url to the start of the url path; + * this will always point to a '/' character + * after the url has been normalized */ + size_t path_len; /* length of path portion excluding any trailing + * '?...' and '#...' portion; will always be >= 1 */ +}; + +extern char *url_normalize(const char *, struct url_info *); +extern int match_urls(const struct url_info *url, const struct url_info *url_prefix, int *exactusermatch); + +struct urlmatch_item { + size_t matched_len; + char user_matched; +}; + +struct urlmatch_config { + struct string_list vars; + struct url_info url; + const char *section; + const char *key; + + void *cb; + int (*collect_fn)(const char *var, const char *value, void *cb); + int (*cascade_fn)(const char *var, const char *value, void *cb); +}; + +extern int urlmatch_config_entry(const char *var, const char *value, void *cb); + +#endif /* URL_MATCH_H */ |