summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Vatamaniuc <vatamane@apache.org>2017-09-19 10:16:41 -0400
committerNick Vatamaniuc <nickva@users.noreply.github.com>2017-09-19 11:27:36 -0400
commitc531a13b22cc6fcea6afb342eb3f5cb315db0313 (patch)
treee5a080e70619160d3b323470f0915918ffa0c2a1
parent190ee307b0ce70ece06face009eb4ab5abdb33b4 (diff)
downloadcouchdb-c531a13b22cc6fcea6afb342eb3f5cb315db0313.tar.gz
Fix replication ID parsing in URL paths
Previously users had to URL encode replication IDs when using `_scheduler/jobs/<job_id>` endpoint because Mochiweb incorrectly decoded the `+` character from URL path. So users were forced to encode so that the replicator would correctly receive a `+` after Mochiweb parsing. `+` is decoded as ` ` (space) probably because in query strings that's a valid application/x-www-form-urlencoded encoding, but that decoding is not meant for decoding URL paths, only query strings. Notice RFC 3986 https://tools.ietf.org/html/rfc3986#section-2.2 `+` is a `sub-delim` (term from RFC) and in the path component it can be used unquoted as a delimiter. https://tools.ietf.org/html/rfc3986#section-3.3 Indeed, the replication ID is a compound ID and `+` is a valid delimiter which separates the base part from the extensions. For more details see also: https://github.com/perwendel/spark/issues/490 https://www.w3.org/TR/html401/interact/forms.html#h-17.13.4.1 Fixes #825
-rw-r--r--src/couch_replicator/src/couch_replicator_ids.erl16
1 files changed, 15 insertions, 1 deletions
diff --git a/src/couch_replicator/src/couch_replicator_ids.erl b/src/couch_replicator/src/couch_replicator_ids.erl
index 62cfdf267..e7067622b 100644
--- a/src/couch_replicator/src/couch_replicator_ids.erl
+++ b/src/couch_replicator/src/couch_replicator_ids.erl
@@ -78,7 +78,11 @@ replication_id(#rep{user_ctx = UserCtx} = Rep, 1) ->
-spec convert([_] | binary() | {string(), string()}) -> {string(), string()}.
convert(Id) when is_list(Id) ->
convert(?l2b(Id));
-convert(Id) when is_binary(Id) ->
+convert(Id0) when is_binary(Id0) ->
+ % Spaces can result from mochiweb incorrectly unquoting + characters from
+ % the URL path. So undo the incorrect parsing here to avoid forcing
+ % users to url encode + characters.
+ Id = binary:replace(Id0, <<" ">>, <<"+">>, [global]),
lists:splitwith(fun(Char) -> Char =/= $+ end, ?b2l(Id));
convert({BaseId, Ext} = Id) when is_list(BaseId), is_list(Ext) ->
Id.
@@ -222,6 +226,16 @@ get_non_default_port(_Schema, Port) ->
-include_lib("eunit/include/eunit.hrl").
+
+replication_id_convert_test_() ->
+ [?_assertEqual(Expected, convert(Id)) || {Expected, Id} <- [
+ {{"abc", ""}, "abc"},
+ {{"abc", ""}, <<"abc">>},
+ {{"abc", "+x+y"}, <<"abc+x+y">>},
+ {{"abc", "+x+y"}, {"abc", "+x+y"}},
+ {{"abc", "+x+y"}, <<"abc x y">>}
+ ]].
+
http_v4_endpoint_test_() ->
[?_assertMatch({remote, User, Host, Port, Path, HeadersNoAuth, undefined},
get_v4_endpoint(nil, #httpdb{url = Url, headers = Headers})) ||